carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [carbondata] 19/41: [CARBONDATA-3313] count(*) is not invalidating the invalid segments cache
Date Tue, 02 Apr 2019 02:41:39 GMT
This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 79d91fe3c469a3cfe7b017c64a9f0812d0f80ce8
Author: dhatchayani <dhatcha.official@gmail.com>
AuthorDate: Tue Mar 12 14:36:54 2019 +0530

    [CARBONDATA-3313] count(*) is not invalidating the invalid segments cache
    
    Problem:
    If any segment is deleted the next query has to clear/invalidate the datamap cache for
those invalid segments. But count(*) has not considered the invalid segments to clear the
datamap cache.
    
    Solution:
    In count(*) flow, before clearing the datamap cache, check and add the invalid segments
of that table.
    
    This closes #3144
---
 .../hadoop/api/CarbonTableInputFormat.java         |  2 ++
 .../sql/commands/TestCarbonShowCacheCommand.scala  | 23 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index c56b1db..281143b 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -617,6 +617,8 @@ public class CarbonTableInputFormat<T> extends CarbonInputFormat<T>
{
         toBeCleanedSegments.add(eachSegment);
       }
     }
+    // remove entry in the segment index if there are invalid segments
+    toBeCleanedSegments.addAll(allSegments.getInvalidSegments());
     if (toBeCleanedSegments.size() > 0) {
       DataMapStoreManager.getInstance()
           .clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()),
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
index e999fc7..69c5f7e 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
@@ -110,6 +110,28 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll
{
     sql("select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname").collect()
   }
 
+  test("test drop cache invalidation in case of invalid segments"){
+    sql(s"CREATE TABLE empTable(empno int, empname String, designation String, " +
+        s"doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int,
" +
+        s"deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp,"
+
+        s"attendance int, utilization int, salary int) stored by 'carbondata'")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql("select count(*) from empTable").show()
+    var showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("3/3 index files cached"))
+    sql("delete from table empTable where segment.id in(0)").show()
+    // check whether count(*) query invalidates the cache for the invalid segments
+    sql("select count(*) from empTable").show()
+    showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("2/2 index files cached"))
+    sql("delete from table empTable where segment.id in(1)").show()
+    // check whether select * query invalidates the cache for the invalid segments
+    sql("select * from empTable").show()
+    showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("1/1 index files cached"))
+  }
 
   override protected def afterAll(): Unit = {
     sql("use default").collect()
@@ -122,6 +144,7 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll
{
     sql("DROP TABLE IF EXISTS cache_db.cache_3")
     sql("DROP TABLE IF EXISTS default.cache_4")
     sql("DROP TABLE IF EXISTS default.cache_5")
+    sql("DROP TABLE IF EXISTS empTable")
   }
 
   test("show cache") {


Mime
View raw message