carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject carbondata git commit: [CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK
Date Thu, 10 May 2018 15:19:39 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master ff5166ef7 -> 35a7b5e9a


[CARBONDATA-2443][SDK] Multi level complex type support for AVRO based SDK

Problem:
Problem inferring the complex type schema with boolean array type from the store created using
SDK writer

Analysis:
When we create an external table and infer the schema from store created using SDK writer,
the operation fails because of complex type field with boolean array dataType. This is because
during schema creation by SDK writer, for array type children a child with column name val
is added.
While parsing the logic to append the parent name with child column name is missing for boolean
type which is causing this problem.

Solution:
Handle the parsing for boolean type

This closes #2294


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/35a7b5e9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/35a7b5e9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/35a7b5e9

Branch: refs/heads/master
Commit: 35a7b5e9af5cabe85794274e07cab9a6bbbbc53f
Parents: ff5166e
Author: manishgupta88 <tomanishgupta18@gmail.com>
Authored: Thu May 10 17:09:17 2018 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Thu May 10 20:49:28 2018 +0530

----------------------------------------------------------------------
 .../schema/table/TableSchemaBuilder.java        | 33 ++++++++------------
 .../schema/table/TableSchemaBuilderSuite.java   | 13 +++++---
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala |  5 +++
 .../sdk/file/CarbonWriterBuilder.java           | 13 ++++++--
 4 files changed, 36 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
index ca082e1..b078400 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
@@ -114,12 +115,12 @@ public class TableSchemaBuilder {
     this.sortColumns = sortColumns;
   }
 
-  public ColumnSchema addColumn(StructField field, boolean isSortColumn) {
-    return addColumn(field, null, isSortColumn, false);
+  public ColumnSchema addColumn(StructField field, AtomicInteger valIndex, boolean isSortColumn)
{
+    return addColumn(field, null, valIndex, isSortColumn, false);
   }
 
-  private ColumnSchema addColumn(StructField field, String parentName, boolean isSortColumn,
-      boolean isComplexChild) {
+  private ColumnSchema addColumn(StructField field, String parentName, AtomicInteger valIndex,
+      boolean isSortColumn, boolean isComplexChild) {
     Objects.requireNonNull(field);
     checkRepeatColumnName(field);
     ColumnSchema newColumn = new ColumnSchema();
@@ -184,33 +185,25 @@ public class TableSchemaBuilder {
     if (field.getDataType().isComplexType()) {
       String parentFieldName = newColumn.getColumnName();
       if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
-        String colName = getColNameForArray(parentFieldName);
-        addColumn(new StructField(colName,
-            ((ArrayType) field.getDataType()).getElementType()), field.getFieldName(), false,
true);
+        String colName = getColNameForArray(valIndex);
+        addColumn(new StructField(colName, ((ArrayType) field.getDataType()).getElementType()),
+            field.getFieldName(), valIndex, false, true);
       } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT")
           && ((StructType) field.getDataType()).getFields().size() > 0) {
         // This field has children.
         List<StructField> fields = ((StructType) field.getDataType()).getFields();
         for (int i = 0; i < fields.size(); i++) {
-          addColumn(fields.get(i), parentFieldName, false, true);
+          addColumn(fields.get(i), parentFieldName, valIndex, false, true);
         }
       }
     }
     return newColumn;
   }
 
-  private String getColNameForArray(String parentFieldName) {
-    if (!parentFieldName.endsWith(".val")) {
-      return "val";
-    } else {
-      String[] splits = parentFieldName.split("val");
-      if (splits.length == 1) {
-        return "val" + 1;
-      } else {
-        return "val" + (Integer.parseInt(parentFieldName
-            .substring(parentFieldName.lastIndexOf("val") + 3, parentFieldName.length()))
+ 1);
-      }
-    }
+  private String getColNameForArray(AtomicInteger valIndex) {
+    String colName = "val" + valIndex.get();
+    valIndex.incrementAndGet();
+    return colName;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilderSuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilderSuite.java
b/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilderSuite.java
index e9dce94..48e5d1b 100644
--- a/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilderSuite.java
+++ b/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilderSuite.java
@@ -19,6 +19,7 @@ package org.apache.carbondata.core.metadata.schema.table;
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.datatype.StructField;
@@ -32,15 +33,16 @@ public class TableSchemaBuilderSuite {
   @Test(expected = NullPointerException.class)
   public void testNullField() {
     TableSchemaBuilder builder = TableSchema.builder();
-    builder.addColumn(null, true);
+    builder.addColumn(null, new AtomicInteger(0), true);
   }
 
   @Test
   public void testBuilder() {
     TableSchemaBuilder builder = TableSchema.builder();
-    ColumnSchema columnSchema = builder.addColumn(new StructField("a", DataTypes.INT), true);
+    ColumnSchema columnSchema =
+        builder.addColumn(new StructField("a", DataTypes.INT), new AtomicInteger(0), true);
     builder.setSortColumns(Arrays.asList(columnSchema));
-    builder.addColumn(new StructField("b", DataTypes.DOUBLE), false);
+    builder.addColumn(new StructField("b", DataTypes.DOUBLE), new AtomicInteger(0), false);
     TableSchema schema = builder.build();
     Assert.assertEquals(2, schema.getListOfColumns().size());
     List<ColumnSchema> columns = schema.getListOfColumns();
@@ -51,9 +53,10 @@ public class TableSchemaBuilderSuite {
   @Test(expected = IllegalArgumentException.class)
   public void testRepeatedColumn() {
     TableSchemaBuilder builder = TableSchema.builder();
-    ColumnSchema columnSchema = builder.addColumn(new StructField("a", DataTypes.INT), true);
+    ColumnSchema columnSchema =
+        builder.addColumn(new StructField("a", DataTypes.INT), new AtomicInteger(0), true);
     builder.setSortColumns(Arrays.asList(columnSchema));
-    builder.addColumn(new StructField("a", DataTypes.DOUBLE), false);
+    builder.addColumn(new StructField("a", DataTypes.DOUBLE), new AtomicInteger(0), false);
     TableSchema schema = builder.build();
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 890f8fc..9bc5597 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -1147,6 +1147,9 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser
{
       case "decimal" => Field(field.column, Some("Decimal"), field.name, Some(null), field.parent,
         field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
         field.columnComment)
+      case "boolean" => Field(field.column, Some("Boolean"), field.name, Some(null), field.parent,
+        field.storeType, field.schemaOrdinal, field.precision, field.scale, field.rawSchema,
+        field.columnComment)
       // checking if the nested data type contains the child type as decimal(10,0),
       // if it is present then extracting the precision and scale. resetting the data type
       // with Decimal.
@@ -1214,6 +1217,8 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser
{
       case "Decimal" => Field(parentName + "." + field.column, Some("Decimal"),
         Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName,
         field.storeType, field.schemaOrdinal, field.precision, field.scale)
+      case "Boolean" => Field(parentName + "." + field.column, Some("Boolean"),
+        Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
       case _ => field
     }
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/35a7b5e9/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
----------------------------------------------------------------------
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
index 76a46d0..f541dbb 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.annotations.InterfaceStability;
@@ -426,6 +427,10 @@ public class CarbonWriterBuilder {
 
   private void buildTableSchema(Field[] fields, TableSchemaBuilder tableSchemaBuilder,
       List<String> sortColumnsList, ColumnSchema[] sortColumnsSchemaList) {
+    // a counter which will be used in case of complex array type. This valIndex will be
assigned
+    // to child of complex array type in the order val1, val2 so that each array type child
is
+    // differentiated to any level
+    AtomicInteger valIndex = new AtomicInteger(0);
     for (Field field : fields) {
       if (null != field) {
         int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
@@ -443,7 +448,8 @@ public class CarbonWriterBuilder {
             // Loop through the inner columns and for a StructData
             DataType complexType =
                 DataTypes.createArrayType(field.getChildren().get(0).getDataType());
-            tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType),
false);
+            tableSchemaBuilder
+                .addColumn(new StructField(field.getFieldName(), complexType), valIndex,
false);
           } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT")) {
             // Loop through the inner columns and for a StructData
             List<StructField> structFieldsArray =
@@ -453,12 +459,13 @@ public class CarbonWriterBuilder {
                   .add(new StructField(childFld.getFieldName(), childFld.getDataType()));
             }
             DataType complexType = DataTypes.createStructType(structFieldsArray);
-            tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType),
false);
+            tableSchemaBuilder
+                .addColumn(new StructField(field.getFieldName(), complexType), valIndex,
false);
           }
         } else {
           ColumnSchema columnSchema = tableSchemaBuilder
               .addColumn(new StructField(field.getFieldName(), field.getDataType()),
-                  isSortColumn > -1);
+                  valIndex, isSortColumn > -1);
           columnSchema.setSortColumn(true);
           if (isSortColumn > -1) {
             sortColumnsSchemaList[isSortColumn] = columnSchema;


Mime
View raw message