carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [carbondata] 12/41: [CARBONDATA-3301]Fix inserting null values to Array<date> columns in carbon file format data load
Date Tue, 02 Apr 2019 02:41:32 GMT
This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 6fa9bd2720536956bebb2a3ff37b25946efec545
Author: akashrn5 <akashrn25@gmail.com>
AuthorDate: Fri Feb 22 17:11:57 2019 +0530

    [CARBONDATA-3301]Fix inserting null values to Array<date> columns in carbon file
format data load
    
    Problem:
    When carbon datasource table contains columns like complex column with Array or Array
and data is inserted and queried, it gives null data for those columns.
    
    Solution:
    In file format case before the actual load, we hwt the internal row object from spark,
and we convert the internal row to carbondata understandable object, so that obejvct for date
will be of Integertype, So while inserting data only long case is handled and we were passing
this interger value to parse in SimpleDateFormat, which throws exception and we were inserting
null. SO handled for integer. In this case directly assign the surrogate key with this value.
    
    This closes #3133
---
 .../sql/carbondata/datasource/SparkCarbonDataSourceTest.scala | 11 +++++++++++
 .../carbondata/processing/datatypes/PrimitiveDataType.java    |  7 ++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index fa37548..d25e675 100644
--- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
+++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
@@ -1760,6 +1760,16 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll
{
     spark.sql("drop table if exists fileformat_drop_hive")
   }
 
+  test("test complexdatype for date and timestamp datatype") {
+    spark.sql("drop table if exists fileformat_date")
+    spark.sql("drop table if exists fileformat_date_hive")
+    spark.sql("create table fileformat_date_hive(name string, age int, dob array<date>,
joinTime array<timestamp>) using parquet")
+    spark.sql("create table fileformat_date(name string, age int, dob array<date>,
joinTime array<timestamp>) using carbon")
+    spark.sql("insert into fileformat_date_hive select 'joey', 32, array('1994-04-06','1887-05-06'),
array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
+    spark.sql("insert into fileformat_date select 'joey', 32, array('1994-04-06','1887-05-06'),
array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
+    checkAnswer(spark.sql("select * from fileformat_date_hive"), spark.sql("select * from
fileformat_date"))
+  }
+
   test("validate the columns not present in schema") {
     spark.sql("drop table if exists validate")
     spark.sql("create table validate (name string, age int, address string) using carbon
options('inverted_index'='abc')")
@@ -1785,5 +1795,6 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll
{
     spark.sql("drop table if exists par_table")
     spark.sql("drop table if exists sdkout")
     spark.sql("drop table if exists validate")
+    spark.sql("drop table if exists fileformat_date")
   }
 }
diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
index cfbaa11..18dc89d 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -344,7 +344,7 @@ public class PrimitiveDataType implements GenericDataType<Object>
{
             byte[] value = null;
             if (isDirectDictionary) {
               int surrogateKey;
-              if (!(input instanceof Long)) {
+              if (!(input instanceof Long) && !(input instanceof Integer)) {
                 SimpleDateFormat parser = new SimpleDateFormat(getDateFormat(carbonDimension));
                 parser.parse(parsedValue);
               }
@@ -353,6 +353,11 @@ public class PrimitiveDataType implements GenericDataType<Object>
{
               // using dictionaryGenerator.
               if (dictionaryGenerator instanceof DirectDictionary && input instanceof
Long) {
                 surrogateKey = ((DirectDictionary) dictionaryGenerator).generateKey((long)
input);
+              } else if (dictionaryGenerator instanceof DirectDictionary
+                  && input instanceof Integer) {
+                // In case of file format, for complex type date or time type, input data
comes as a
+                // Integer object, so just assign the surrogate key with the input object
value
+                surrogateKey = (int) input;
               } else {
                 surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue);
               }


Mime
View raw message