carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject carbondata git commit: [CARBONDATA-2675][32K] Support config long_string_columns when create datamap
Date Fri, 06 Jul 2018 06:40:08 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 61187115c -> 5159abfc3


[CARBONDATA-2675][32K] Support config long_string_columns when create datamap

Create datamap use select statement, but long string column is defined with StringType in
the result dataframe if this column is selected. This PR allows to set long_string_columns
property in dmproperties.

This closes #2432


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5159abfc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5159abfc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5159abfc

Branch: refs/heads/master
Commit: 5159abfc312942b4796667e9d509eb5152cc0566
Parents: 6118711
Author: Manhua <kevinjmh@qq.com>
Authored: Sat Jun 30 10:53:41 2018 +0800
Committer: Jacky Li <jacky.likun@qq.com>
Committed: Fri Jul 6 14:39:53 2018 +0800

----------------------------------------------------------------------
 .../preaggregate/TestPreAggCreateCommand.scala  |  3 +-
 .../VarcharDataTypesBasicTestCase.scala         | 33 +++++++++++++++++++-
 .../datamap/PreAggregateDataMapProvider.java    |  5 ++-
 .../preaaggregate/PreAggregateTableHelper.scala | 20 ++++++++++++
 4 files changed, 58 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/5159abfc/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
index fb25141..ddfb9e7 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggCreateCommand.scala
@@ -274,7 +274,8 @@ class TestPreAggCreateCommand extends QueryTest with BeforeAndAfterAll
{
            | GROUP BY dob,name
        """.stripMargin)
     }
-    assert(e.getMessage.contains("Only 'path' and 'partitioning' dmproperties are allowed
for this datamap"))
+    assert(e.getMessage.contains("Only 'path', 'partitioning' and 'long_string_columns' dmproperties
"
+      + "are allowed for this datamap"))
     sql("DROP TABLE IF EXISTS maintabletime")
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5159abfc/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
index 9798178..cb7cd81 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/longstring/VarcharDataTypesBasicTestCase.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField,
StructT
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.metadata.CarbonMetadata
+import org.apache.carbondata.core.metadata.datatype.DataTypes
 import org.apache.carbondata.core.util.CarbonProperties
 
 class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll
{
@@ -233,7 +235,36 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach
wi
       CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_DEFAULT)
   }
 
-  // ignore this test in CI, because it will need at least 4GB memory to run successfully
+  test("Create datamap with long string column selected") {
+    val datamapName = "pre_agg_dm"
+    prepareTable()
+    sql(
+      s"""
+         | CREATE DATAMAP $datamapName ON TABLE $longStringTable
+         | USING 'preaggregate'
+         | DMPROPERTIES('LONG_STRING_COLUMNS'='description, note')
+         | AS SELECT id,description,note,count(*) FROM $longStringTable
+         | GROUP BY id,description,note
+         |""".
+        stripMargin)
+
+    val parentTable = CarbonMetadata.getInstance().getCarbonTable("default", longStringTable)
+    assert(null != parentTable)
+    val dmSchemaList = parentTable.getTableInfo.getDataMapSchemaList
+    assert(dmSchemaList.size() == 1)
+    assert(dmSchemaList.get(0).getDataMapName.equalsIgnoreCase(datamapName))
+
+    val dmTableName = longStringTable + "_" + datamapName
+    val dmTable = CarbonMetadata.getInstance().getCarbonTable("default", dmTableName)
+    assert(null != dmTable)
+    assert(dmTable.getColumnByName(dmTableName.toLowerCase(), longStringTable + "_description").getDataType
+      == DataTypes.VARCHAR)
+    assert(dmTable.getColumnByName(dmTableName.toLowerCase(), longStringTable + "_note").getDataType
+      == DataTypes.VARCHAR)
+    sql(s"DROP DATAMAP IF EXISTS $datamapName ON TABLE $longStringTable")
+  }
+
+    // ignore this test in CI, because it will need at least 4GB memory to run successfully
   ignore("Exceed 2GB per column page for varchar datatype") {
     deleteFile(inputFile_2g_column_page)
     if (!new File(inputDir).exists()) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5159abfc/integration/spark2/src/main/java/org/apache/carbondata/datamap/PreAggregateDataMapProvider.java
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/java/org/apache/carbondata/datamap/PreAggregateDataMapProvider.java
b/integration/spark2/src/main/java/org/apache/carbondata/datamap/PreAggregateDataMapProvider.java
index 37d49e5..8226f22 100644
--- a/integration/spark2/src/main/java/org/apache/carbondata/datamap/PreAggregateDataMapProvider.java
+++ b/integration/spark2/src/main/java/org/apache/carbondata/datamap/PreAggregateDataMapProvider.java
@@ -22,6 +22,7 @@ import java.util.Map;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandException;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datamap.DataMapCatalog;
 import org.apache.carbondata.core.datamap.DataMapProvider;
 import org.apache.carbondata.core.datamap.dev.DataMapFactory;
@@ -63,9 +64,11 @@ public class PreAggregateDataMapProvider extends DataMapProvider {
       properties.remove(DataMapProperty.DEFERRED_REBUILD);
       properties.remove(DataMapProperty.PATH);
       properties.remove(DataMapProperty.PARTITIONING);
+      properties.remove(CarbonCommonConstants.LONG_STRING_COLUMNS);
       if (properties.size() > 0) {
         throw new MalformedDataMapCommandException(
-                "Only 'path' and 'partitioning' dmproperties are allowed for this datamap");
+                "Only 'path', 'partitioning' and 'long_string_columns' dmproperties " +
+                "are allowed for this datamap");
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/5159abfc/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
index 7d57be5..c37ba89 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/preaaggregate/PreAggregateTableHelper.scala
@@ -106,6 +106,26 @@ case class PreAggregateTableHelper(
     tableProperties.put(CarbonCommonConstants.FLAT_FOLDER,
       parentTable.getTableInfo.getFactTable.getTableProperties.asScala.getOrElse(
         CarbonCommonConstants.FLAT_FOLDER, CarbonCommonConstants.DEFAULT_FLAT_FOLDER))
+
+    // Datamap table name and columns are automatically added prefix with parent table name
+    // in carbon. For convenient, users can type column names same as the ones in select
statement
+    // when config dmproperties, and here we update column names with prefix.
+    val longStringColumn = tableProperties.get(CarbonCommonConstants.LONG_STRING_COLUMNS)
+    if (longStringColumn != None) {
+      val fieldNames = fields.map(_.column)
+      val newLongStringColumn = longStringColumn.get.split(",").map(_.trim).map{ colName
=>
+        val newColName = parentTable.getTableName.toLowerCase() + "_" + colName
+        if (!fieldNames.contains(newColName)) {
+          throw new MalformedDataMapCommandException(
+            CarbonCommonConstants.LONG_STRING_COLUMNS.toUpperCase() + ":" + colName
+              + " does not in datamap")
+        }
+        newColName
+      }
+      tableProperties.put(CarbonCommonConstants.LONG_STRING_COLUMNS,
+        newLongStringColumn.mkString(","))
+    }
+
     // inherit the local dictionary properties of main parent table
     tableProperties
       .put(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE,


Mime
View raw message