carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3644Support Configuration of Complex Delimiters in Carbon Properties
Date Fri, 03 Jan 2020 01:12:48 GMT
This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new b3e6e80  [CARBONDATA-3644Support Configuration of Complex Delimiters in Carbon Properties
b3e6e80 is described below

commit b3e6e801bfd5222a42b8f1c29f7150560732c29a
Author: h00424960 <haoxingjun@huawei.com>
AuthorDate: Tue Dec 31 17:35:11 2019 +0800

    [CARBONDATA-3644Support Configuration of Complex Delimiters in Carbon Properties
    
    Why is this PR needed?
    
    Modification reason: In the insert carbontable select from a parquet table processing,
if the binary column has the content '\001', like 'col1\001col2', the content before '\001'
will be truncated as '\001' is the Complex Delimiter. The problem is that Complex Delimiter
can't be configured in the insert flow, which needs to improve.
    
    What changes were proposed in this PR?
    
    Modification content: We add the configuration of complex delimiters in the CarbonProperties,
which will be loaded in the LoadOption.
    
    Does this PR introduce any user interface change?
    
    No
    
    Is any new testcase added?
    
    Yes
    
    This closes #3552
---
 .../core/constants/CarbonCommonConstants.java      | 24 ++++++++++++++++++++++
 .../complexType/TestComplexDataType.scala          | 21 +++++++++++++++++++
 .../command/management/CarbonLoadDataCommand.scala |  3 ---
 .../processing/loading/model/LoadOption.java       | 18 +++++++++++++---
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 9ad276c..10b2d89 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -146,6 +146,30 @@ public final class CarbonCommonConstants {
   public static final String DEFAULT_COMPRESSOR = "snappy";
 
   /**
+   * the level 1 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_1 = "carbon.complex.delimiter.level.1";
+
+  /**
+   * the level 2 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_2 = "carbon.complex.delimiter.level.2";
+
+  /**
+   * the level 3 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_3 = "carbon.complex.delimiter.level.3";
+
+  /**
+   * the level 4 complex delimiter
+   */
+  @CarbonProperty
+  public static final String COMPLEX_DELIMITERS_LEVEL_4 = "carbon.complex.delimiter.level.4";
+
+  /**
    * ZOOKEEPER_ENABLE_LOCK if this is set to true then zookeeper
    * will be used to handle locking
    * mechanism of carbon
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
index 2dbae36..32a5d92 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala
@@ -42,6 +42,8 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll {
   override def beforeAll(): Unit = {
     sql("DROP TABLE IF EXISTS table1")
     sql("DROP TABLE IF EXISTS test")
+    sql("DROP TABLE IF EXISTS datatype_struct_carbondata")
+    sql("DROP TABLE IF EXISTS datatype_struct_parquet")
   }
 
   override def afterAll(): Unit = {
@@ -54,6 +56,8 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll {
         CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction)
+    CarbonProperties.getInstance()
+      .removeProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1)
   }
 
   test("test Projection PushDown for Struct - Integer type") {
@@ -1064,6 +1068,23 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll
{
     sql("drop table if exists hive_table")
   }
 
+  test("test when insert select from a parquet table with an struct with binary and custom
complex delimiter") {
+    var carbonProperties = CarbonProperties.getInstance()
+    carbonProperties.addProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1, "#")
+
+    sql("create table datatype_struct_parquet(price struct<a:binary>) stored as parquet")
+    sql("insert into table datatype_struct_parquet values(named_struct('a', 'col1\001col2'))")
+    sql("create table datatype_struct_carbondata(price struct<a:binary>) stored as
carbondata")
+    sql("insert into datatype_struct_carbondata select * from datatype_struct_parquet")
+    checkAnswer(
+      sql("SELECT * FROM datatype_struct_carbondata"),
+      sql("SELECT * FROM datatype_struct_parquet"))
+    sql("DROP TABLE IF EXISTS datatype_struct_carbondata")
+    sql("DROP TABLE IF EXISTS datatype_struct_parquet")
+
+    carbonProperties.removeProperty(CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1)
+  }
+
   test("[CARBONDATA-3527] Fix 'String length cannot exceed 32000 characters' issue when load
data with 'GLOBAL_SORT' from csv files which include big complex type data") {
     val tableName = "complexdata3_table"
     sql(s"drop table if exists ${tableName}")
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 7a853b9..0309e91 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -169,9 +169,6 @@ case class CarbonLoadDataCommand(
     val carbonLoadModel = new CarbonLoadModel()
     val tableProperties = table.getTableInfo.getFactTable.getTableProperties
     val optionsFinal = LoadOption.fillOptionWithDefaultValue(options.asJava)
-    optionsFinal
-      .put("complex_delimiter_level_4",
-        ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())
 
     /**
     * Priority of sort_scope assignment :
diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
index b0206bc..7915fdd 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java
@@ -107,15 +107,27 @@ public class LoadOption {
 
     optionsFinal.put("complex_delimiter_level_1",
         Maps.getOrDefault(options, "complex_delimiter_level_1",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_1,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_1.value())));
 
     optionsFinal.put("complex_delimiter_level_2",
         Maps.getOrDefault(options, "complex_delimiter_level_2",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_2,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_2.value())));
 
     optionsFinal.put("complex_delimiter_level_3",
         Maps.getOrDefault(options, "complex_delimiter_level_3",
-            ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value()));
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_3,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_3.value())));
+
+    optionsFinal.put("complex_delimiter_level_4",
+        Maps.getOrDefault(options, "complex_delimiter_level_4",
+            CarbonProperties.getInstance().getProperty(
+                CarbonCommonConstants.COMPLEX_DELIMITERS_LEVEL_4,
+                ComplexDelimitersEnum.COMPLEX_DELIMITERS_LEVEL_4.value())));
 
     optionsFinal.put(
         "dateformat",


Mime
View raw message