carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kunalkap...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3508] Support CG datamap pruning fallback while querying
Date Wed, 11 Sep 2019 10:02:40 GMT
This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e788be  [CARBONDATA-3508] Support CG datamap pruning fallback while querying
6e788be is described below

commit 6e788bea897fa5f265a4849214a139204f2fb10a
Author: shivamasn <shivamasn17@gmail.com>
AuthorDate: Thu Aug 29 11:49:41 2019 +0530

    [CARBONDATA-3508] Support CG datamap pruning fallback while querying
    
    Problem: Select query fails when the cg datamap is dropped concurrently
    while running the select query on filter column on which datamap is created.
    
    Solution: Handle the exception from datamap blocklet pruning if
    it fails and consider only the pruned blocklets from default datamap pruning.
    
    This closes #3369
---
 .../core/indexstore/BlockletDataMapIndexStore.java |  2 +-
 .../statusmanager/SegmentUpdateStatusManager.java  |  6 ++--
 .../datamap/bloom/BloomCoarseGrainDataMap.java     |  2 +-
 .../carbondata/hadoop/api/CarbonInputFormat.java   | 32 ++++++++++++++--------
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
index 32ee9cb..fd549e0 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
@@ -80,7 +80,7 @@ public class BlockletDataMapIndexStore
     return get(identifierWrapper, null);
   }
 
-  private BlockletDataMapIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
+  public BlockletDataMapIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper,
       Map<String, Map<String, BlockMetaInfo>> segInfoCache) throws IOException
{
     TableBlockIndexUniqueIdentifier identifier =
         identifierWrapper.getTableBlockIndexUniqueIdentifier();
diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
index f7083dc..bc794f4 100644
--- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
+++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
@@ -27,8 +27,10 @@ import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.carbondata.common.logging.LogServiceFactory;
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
@@ -790,8 +792,8 @@ public class SegmentUpdateStatusManager {
     final long deltaEndTimestamp =
         getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
 
-    List<CarbonFile> files =
-        new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+    Set<CarbonFile> files =
+        new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
 
     for (CarbonFile eachFile : allSegmentFiles) {
       String fileName = eachFile.getName();
diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index fea48c3..f931353 100644
--- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -232,7 +232,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap {
       LOGGER.warn(String.format("HitBlocklets is empty in bloom filter prune method. " +
               "bloomQueryModels size is %d, filterShards size if %d",
               bloomQueryModels.size(), filteredShard.size()));
-      return null;
+      return new ArrayList<>();
     }
     return new ArrayList<>(hitBlocklets);
   }
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index ac9e11e..45041e4 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -573,19 +573,29 @@ m filterExpression
       if (cgDataMapExprWrapper != null) {
         // Prune segments from already pruned blocklets
         DataMapUtil.pruneSegments(segmentIds, prunedBlocklets);
-        List<ExtendedBlocklet> cgPrunedBlocklets;
+        List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
+        boolean isCGPruneFallback = false;
         // Again prune with CG datamap.
-        if (distributedCG && dataMapJob != null) {
-          cgPrunedBlocklets = DataMapUtil
-              .executeDataMapJob(carbonTable, filter.getResolver(), dataMapJob, partitionsToPrune,
-                  segmentIds, invalidSegments, DataMapLevel.CG, true, new ArrayList<String>());
-        } else {
-          cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune);
+        try {
+          if (distributedCG && dataMapJob != null) {
+            cgPrunedBlocklets = DataMapUtil
+                .executeDataMapJob(carbonTable, filter.getResolver(), dataMapJob, partitionsToPrune,
+                    segmentIds, invalidSegments, DataMapLevel.CG, true, new ArrayList<String>());
+          } else {
+            cgPrunedBlocklets = cgDataMapExprWrapper.prune(segmentIds, partitionsToPrune);
+          }
+        } catch (Exception e) {
+          isCGPruneFallback = true;
+          LOG.error("CG datamap pruning failed.", e);
+        }
+        // If isCGPruneFallback = true, it means that CG datamap pruning failed,
+        // hence no need to do intersect and simply pass the prunedBlocklets from default
datamap
+        if (!isCGPruneFallback) {
+          // since index datamap prune in segment scope,
+          // the result need to intersect with previous pruned result
+          prunedBlocklets =
+              intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
         }
-        // since index datamap prune in segment scope,
-        // the result need to intersect with previous pruned result
-        prunedBlocklets =
-            intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
         if (ExplainCollector.enabled()) {
           ExplainCollector.recordCGDataMapPruning(
               DataMapWrapperSimpleInfo.fromDataMapWrapper(cgDataMapExprWrapper),


Mime
View raw message