carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [carbondata] 04/22: [CARBONDATA-3334] fixed multiple segment file issue for partition
Date Thu, 16 May 2019 19:05:37 GMT
This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit d1bb3a0e1d9da21ba8493f4845f5c404b8eae56b
Author: kunal642 <kunalkapoor642@gmail.com>
AuthorDate: Thu Mar 28 14:33:45 2019 +0530

    [CARBONDATA-3334] fixed multiple segment file issue for partition
    
    Problem:
    During partition load, while writing merge index files the FactTimestamp in load model
is being changed to current timestamp due to which a new file with mergeindex entry is written.
    
    Solution:
    Set new timestamp if FactTimestamp in load model is 0L(meaning nothing is set).
    
    This closes #3167
---
 .../standardpartition/StandardPartitionTableLoadingTestCase.scala | 8 ++++++++
 .../sql/execution/command/management/CarbonLoadDataCommand.scala  | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
index 059dd2b..bee118a 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
@@ -496,6 +496,13 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte
     }
   }
 
+  test("test number of segment files should not be more than 1 per segment") {
+    sql("drop table if exists new_par")
+    sql("create table new_par(a string) partitioned by ( b int) stored by 'carbondata'")
+    sql("insert into new_par select 'k',1")
+    assert(new File(s"$storeLocation/new_par/Metadata/segments/").listFiles().size == 1)
+  }
+
 
 
   def restoreData(dblocation: String, tableName: String) = {
@@ -556,6 +563,7 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte
     sql("drop table if exists emp1")
     sql("drop table if exists restorepartition")
     sql("drop table if exists casesensitivepartition")
+    sql("drop table if exists new_par")
   }
 
 }
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 0c8a1df..b4ef1f0 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -805,6 +805,8 @@ case class CarbonLoadDataCommand(
       }
       if (updateModel.isDefined) {
         carbonLoadModel.setFactTimeStamp(updateModel.get.updatedTimeStamp)
+      } else if (carbonLoadModel.getFactTimeStamp == 0L) {
+        carbonLoadModel.setFactTimeStamp(System.currentTimeMillis())
       }
       // Create and ddd the segment to the tablestatus.
       CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(carbonLoadModel, isOverwriteTable)
@@ -869,7 +871,6 @@ case class CarbonLoadDataCommand(
       }
     }
     try {
-      carbonLoadModel.setFactTimeStamp(System.currentTimeMillis())
       val compactedSegments = new util.ArrayList[String]()
       // Trigger auto compaction
       CarbonDataRDDFactory.handleSegmentMerging(


Mime
View raw message