carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject carbondata git commit: [CARBONDATA-2902][DataMap] Fix showing negative pruning result for explain command
Date Mon, 03 Sep 2018 15:13:59 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 257672790 -> 3cbabcde0


[CARBONDATA-2902][DataMap] Fix showing negative pruning result for explain command

For legacy store, print no pruning info
For cache_level = block, get number of blocklets of hit blocks, print skipped blocklet info
For cache_level = blocklet, get number of blocklets of hit blocklets, print skipped blocklet
info

This closes #2676


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3cbabcde
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3cbabcde
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3cbabcde

Branch: refs/heads/master
Commit: 3cbabcde079d7c9179545727af8fc25cffee7a89
Parents: 2576727
Author: Manhua <kevinjmh@qq.com>
Authored: Mon Sep 3 11:41:12 2018 +0800
Committer: Jacky Li <jacky.likun@qq.com>
Committed: Mon Sep 3 23:13:45 2018 +0800

----------------------------------------------------------------------
 .../indexstore/blockletindex/BlockDataMap.java  | 59 +++++++++++++----
 .../blockletindex/BlockletDataMap.java          | 28 +++++++-
 .../blockletindex/BlockletDataMapFactory.java   |  3 +-
 .../core/profiler/ExplainCollector.java         | 12 +++-
 .../core/profiler/TablePruningInfo.java         | 67 ++++++++++----------
 .../core/util/BlockletDataMapUtil.java          | 13 ++--
 .../hadoop/api/CarbonInputFormat.java           |  6 +-
 7 files changed, 125 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
index 0875e75..01c8670 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
@@ -18,9 +18,7 @@ package org.apache.carbondata.core.indexstore.blockletindex;
 
 import java.io.*;
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.List;
+import java.util.*;
 
 import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
@@ -544,6 +542,34 @@ public class BlockDataMap extends CoarseGrainDataMap
         .getSegmentPropertiesWrapper(segmentPropertiesIndex).getMinMaxCacheColumns();
   }
 
+  /**
+   * for CACHE_LEVEL=BLOCK, each entry in memoryDMStore is for a block
+   * if data is not legacy store, we can get blocklet count from taskSummaryDMStore
+   */
+  protected short getBlockletNumOfEntry(int index) {
+    if (isLegacyStore) {
+      // dummy value
+      return 0;
+    } else {
+      return ByteBuffer.wrap(getBlockletRowCountForEachBlock()).getShort(index);
+    }
+  }
+
+  // get total blocklet number in this datamap
+  protected int getTotalBlocklets() {
+    if (isLegacyStore) {
+      // dummy value
+      return 0;
+    } else {
+      ByteBuffer byteBuffer = ByteBuffer.wrap(getBlockletRowCountForEachBlock());
+      int sum = 0;
+      while (byteBuffer.hasRemaining()) {
+        sum += byteBuffer.getShort();
+      }
+      return sum;
+    }
+  }
+
   private List<Blocklet> prune(FilterResolverIntf filterExp) {
     if (memoryDMStore.getRowCount() == 0) {
       return new ArrayList<>();
@@ -551,26 +577,27 @@ public class BlockDataMap extends CoarseGrainDataMap
     List<Blocklet> blocklets = new ArrayList<>();
     CarbonRowSchema[] schema = getFileFooterEntrySchema();
     String filePath = getFilePath();
-    int numBlocklets = 0;
+    int numEntries = memoryDMStore.getRowCount();
+    int totalBlocklets = getTotalBlocklets();
+    int hitBlocklets = 0;
     if (filterExp == null) {
-      numBlocklets = memoryDMStore.getRowCount();
-      for (int i = 0; i < numBlocklets; i++) {
+      for (int i = 0; i < numEntries; i++) {
         DataMapRow safeRow = memoryDMStore.getDataMapRow(schema, i).convertToSafeRow();
         blocklets.add(createBlocklet(safeRow, getFileNameWithFilePath(safeRow, filePath),
             getBlockletId(safeRow), false));
       }
+      hitBlocklets = totalBlocklets;
     } else {
       // Remove B-tree jump logic as start and end key prepared is not
       // correct for old store scenarios
-      int startIndex = 0;
-      numBlocklets = memoryDMStore.getRowCount();
+      int entryIndex = 0;
       FilterExecuter filterExecuter = FilterUtil
           .getFilterExecuterTree(filterExp, getSegmentProperties(), null, getMinMaxCacheColumns());
       // flag to be used for deciding whether use min/max in executor pruning for BlockletDataMap
       boolean useMinMaxForPruning = useMinMaxForExecutorPruning(filterExp);
       // min and max for executor pruning
-      while (startIndex < numBlocklets) {
-        DataMapRow safeRow = memoryDMStore.getDataMapRow(schema, startIndex).convertToSafeRow();
+      while (entryIndex < numEntries) {
+        DataMapRow safeRow = memoryDMStore.getDataMapRow(schema, entryIndex).convertToSafeRow();
         String fileName = getFileNameWithFilePath(safeRow, filePath);
         short blockletId = getBlockletId(safeRow);
         boolean isValid =
@@ -578,11 +605,19 @@ public class BlockDataMap extends CoarseGrainDataMap
                 getMinMaxValue(safeRow, MIN_VALUES_INDEX), fileName, blockletId);
         if (isValid) {
           blocklets.add(createBlocklet(safeRow, fileName, blockletId, useMinMaxForPruning));
+          hitBlocklets += getBlockletNumOfEntry(entryIndex);
         }
-        startIndex++;
+        entryIndex++;
       }
     }
-    ExplainCollector.addTotalBlocklets(numBlocklets);
+
+    if (isLegacyStore) {
+      ExplainCollector.setShowPruningInfo(false);
+    } else {
+      ExplainCollector.setShowPruningInfo(true);
+      ExplainCollector.addTotalBlocklets(totalBlocklets);
+      ExplainCollector.addDefaultDataMapPruningHit(hitBlocklets);
+    }
     return blocklets;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
index d4d1cbb..683657a 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
@@ -48,7 +48,8 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
 
   private static final long serialVersionUID = -2170289352240810993L;
 
-  @Override public void init(DataMapModel dataMapModel) throws IOException, MemoryException
{
+  @Override
+  public void init(DataMapModel dataMapModel) throws IOException, MemoryException {
     super.init(dataMapModel);
   }
 
@@ -60,6 +61,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
    * @throws IOException
    * @throws MemoryException
    */
+  @Override
   protected DataMapRowImpl loadMetadata(CarbonRowSchema[] taskSummarySchema,
       SegmentProperties segmentProperties, BlockletDataMapModel blockletDataMapInfo,
       List<DataFileFooter> indexInfo) throws IOException, MemoryException {
@@ -72,6 +74,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
     }
   }
 
+  @Override
   protected CarbonRowSchema[] getTaskSummarySchema() {
     if (isLegacyStore) {
       return super.getTaskSummarySchema();
@@ -86,6 +89,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
     }
   }
 
+  @Override
   protected CarbonRowSchema[] getFileFooterEntrySchema() {
     if (isLegacyStore) {
       return super.getFileFooterEntrySchema();
@@ -203,6 +207,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
     return summaryRow;
   }
 
+  @Override
   public ExtendedBlocklet getDetailedBlocklet(String blockletId) {
     if (isLegacyStore) {
       return super.getDetailedBlocklet(blockletId);
@@ -216,6 +221,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
         false);
   }
 
+  @Override
   protected short getBlockletId(DataMapRow dataMapRow) {
     if (isLegacyStore) {
       return super.getBlockletId(dataMapRow);
@@ -223,6 +229,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
     return dataMapRow.getShort(BLOCKLET_ID_INDEX);
   }
 
+  @Override
   protected ExtendedBlocklet createBlocklet(DataMapRow row, String fileName, short blockletId,
       boolean useMinMaxForPruning) {
     if (isLegacyStore) {
@@ -238,4 +245,23 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
     return blocklet;
   }
 
+  @Override
+  protected short getBlockletNumOfEntry(int index) {
+    if (isLegacyStore) {
+      return super.getBlockletNumOfEntry(index);
+    } else {
+      //in blocklet datamap, each entry contains info of one blocklet
+      return 1;
+    }
+  }
+
+  @Override
+  protected int getTotalBlocklets() {
+    if (isLegacyStore) {
+      return super.getTotalBlocklets();
+    } else {
+      return memoryDMStore.getRowCount();
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
index 61d93f7..f36afa0 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
@@ -97,8 +97,7 @@ public class BlockletDataMapFactory extends CoarseGrainDataMapFactory
    * @return
    */
   public static DataMap createDataMap(CarbonTable carbonTable) {
-    boolean cacheLevelBlock =
-        BlockletDataMapUtil.isCacheLevelBlock(carbonTable, CACHE_LEVEL_BLOCKLET);
+    boolean cacheLevelBlock = BlockletDataMapUtil.isCacheLevelBlock(carbonTable);
     if (cacheLevelBlock) {
       // case1: when CACHE_LEVEL = BLOCK
       return new BlockDataMap();

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java
b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java
index 02e45f6..a86937c 100644
--- a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java
@@ -94,11 +94,17 @@ public class ExplainCollector {
     }
   }
 
-  public static void recordDefaultDataMapPruning(DataMapWrapperSimpleInfo dataMapWrapperSimpleInfo,
-      int numBlocklets) {
+  public static void setShowPruningInfo(boolean showPruningInfo) {
+    if (enabled()) {
+      TablePruningInfo scan = getCurrentTablePruningInfo();
+      scan.setShowPruningInfo(showPruningInfo);
+    }
+  }
+
+  public static void addDefaultDataMapPruningHit(int numBlocklets) {
     if (enabled()) {
       TablePruningInfo scan = getCurrentTablePruningInfo();
-      scan.setNumBlockletsAfterDefaultPruning(dataMapWrapperSimpleInfo, numBlocklets);
+      scan.addNumBlockletsAfterDefaultPruning(numBlocklets);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/profiler/TablePruningInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/profiler/TablePruningInfo.java
b/core/src/main/java/org/apache/carbondata/core/profiler/TablePruningInfo.java
index 651af9d..187540c 100644
--- a/core/src/main/java/org/apache/carbondata/core/profiler/TablePruningInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/profiler/TablePruningInfo.java
@@ -28,9 +28,9 @@ public class TablePruningInfo {
 
   private int totalBlocklets;
   private String filterStatement;
+  private boolean showPruningInfo;
 
-  private DataMapWrapperSimpleInfo defaultDataMap;
-  private int numBlockletsAfterDefaultPruning;
+  private int numBlockletsAfterDefaultPruning = 0;
 
   private DataMapWrapperSimpleInfo cgDataMap;
   private int numBlockletsAfterCGPruning;
@@ -46,10 +46,12 @@ public class TablePruningInfo {
     this.filterStatement = filterStatement;
   }
 
-  void setNumBlockletsAfterDefaultPruning(DataMapWrapperSimpleInfo dataMapWrapperSimpleInfo,
-      int numBlocklets) {
-    this.defaultDataMap = dataMapWrapperSimpleInfo;
-    this.numBlockletsAfterDefaultPruning = numBlocklets;
+  void setShowPruningInfo(boolean showPruningInfo) {
+    this.showPruningInfo = showPruningInfo;
+  }
+
+  void addNumBlockletsAfterDefaultPruning(int numBlocklets) {
+    this.numBlockletsAfterDefaultPruning += numBlocklets;
   }
 
   void setNumBlockletsAfterCGPruning(DataMapWrapperSimpleInfo dataMapWrapperSimpleInfo,
@@ -66,37 +68,38 @@ public class TablePruningInfo {
 
   @Override
   public String toString() {
-    StringBuilder builder = new StringBuilder();
-    builder
-        .append(" - total blocklets: ").append(totalBlocklets).append("\n")
-        .append(" - filter: ").append(filterStatement).append("\n");
-    if (defaultDataMap != null) {
+    if (showPruningInfo) {
+      StringBuilder builder = new StringBuilder();
+      builder
+          .append(" - total blocklets: ").append(totalBlocklets).append("\n")
+          .append(" - filter: ").append(filterStatement).append("\n");
       int skipBlocklets = totalBlocklets - numBlockletsAfterDefaultPruning;
       builder
           .append(" - pruned by Main DataMap").append("\n")
           .append("    - skipped blocklets: ").append(skipBlocklets).append("\n");
-    }
-    if (cgDataMap != null) {
-      int skipBlocklets = numBlockletsAfterDefaultPruning - numBlockletsAfterCGPruning;
-      builder
-          .append(" - pruned by CG DataMap").append("\n")
-          .append("    - name: ").append(cgDataMap.getDataMapWrapperName()).append("\n")
-          .append("    - provider: ").append(cgDataMap.getDataMapWrapperProvider()).append("\n")
-          .append("    - skipped blocklets: ").append(skipBlocklets).append("\n");
-    }
-    if (fgDataMap != null) {
-      int skipBlocklets;
-      if (numBlockletsAfterCGPruning != 0) {
-        skipBlocklets = numBlockletsAfterCGPruning - numBlockletsAfterFGPruning;
-      } else {
-        skipBlocklets = numBlockletsAfterDefaultPruning - numBlockletsAfterFGPruning;
+      if (cgDataMap != null) {
+        skipBlocklets = numBlockletsAfterDefaultPruning - numBlockletsAfterCGPruning;
+        builder
+            .append(" - pruned by CG DataMap").append("\n")
+            .append("    - name: ").append(cgDataMap.getDataMapWrapperName()).append("\n")
+            .append("    - provider: ").append(cgDataMap.getDataMapWrapperProvider()).append("\n")
+            .append("    - skipped blocklets: ").append(skipBlocklets).append("\n");
       }
-      builder
-          .append(" - pruned by FG DataMap").append("\n")
-          .append("    - name: ").append(fgDataMap.getDataMapWrapperName()).append("\n")
-          .append("    - provider: ").append(fgDataMap.getDataMapWrapperProvider()).append("\n")
-          .append("    - skipped blocklets: ").append(skipBlocklets).append("\n");
+      if (fgDataMap != null) {
+        if (numBlockletsAfterCGPruning != 0) {
+          skipBlocklets = numBlockletsAfterCGPruning - numBlockletsAfterFGPruning;
+        } else {
+          skipBlocklets = numBlockletsAfterDefaultPruning - numBlockletsAfterFGPruning;
+        }
+        builder
+            .append(" - pruned by FG DataMap").append("\n")
+            .append("    - name: ").append(fgDataMap.getDataMapWrapperName()).append("\n")
+            .append("    - provider: ").append(fgDataMap.getDataMapWrapperProvider()).append("\n")
+            .append("    - skipped blocklets: ").append(skipBlocklets).append("\n");
+      }
+      return builder.toString();
+    } else {
+      return "";
     }
-    return builder.toString();
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java b/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java
index 404b426..6ede653 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/BlockletDataMapUtil.java
@@ -45,6 +45,7 @@ import org.apache.carbondata.core.indexstore.BlockMetaInfo;
 import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier;
 import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper;
 import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapDistributable;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
 import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore;
 import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
 import org.apache.carbondata.core.metadata.datatype.DataType;
@@ -236,18 +237,14 @@ public class BlockletDataMapUtil {
 
   /**
    * Method to check if CACHE_LEVEL is set to BLOCK or BLOCKLET
-   *
-   * @param carbonTable
-   * @param cacheLevelBlocklet
-   * @return
    */
-  public static boolean isCacheLevelBlock(CarbonTable carbonTable, String cacheLevelBlocklet)
{
+  public static boolean isCacheLevelBlock(CarbonTable carbonTable) {
     String cacheLevel = carbonTable.getTableInfo().getFactTable().getTableProperties()
         .get(CarbonCommonConstants.CACHE_LEVEL);
-    if (!cacheLevelBlocklet.equals(cacheLevel)) {
-      return true;
+    if (BlockletDataMapFactory.CACHE_LEVEL_BLOCKLET.equals(cacheLevel)) {
+      return false;
     }
-    return false;
+    return true;
   }
 
   private static boolean isSameColumnSchemaList(List<ColumnSchema> indexFileColumnList,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cbabcde/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
index 84c5d18..bcbbb10 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java
@@ -37,7 +37,6 @@ import org.apache.carbondata.core.datamap.dev.expr.DataMapWrapperSimpleInfo;
 import org.apache.carbondata.core.exception.InvalidConfigurationException;
 import org.apache.carbondata.core.indexstore.ExtendedBlocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
-import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
 import org.apache.carbondata.core.metadata.schema.PartitionInfo;
@@ -467,8 +466,6 @@ m filterExpression
     List<ExtendedBlocklet> prunedBlocklets =
         dataMapExprWrapper.prune(segmentIds, partitionsToPrune);
 
-    ExplainCollector.recordDefaultDataMapPruning(
-        DataMapWrapperSimpleInfo.fromDataMapWrapper(dataMapExprWrapper), prunedBlocklets.size());
     if (prunedBlocklets.size() == 0) {
       return prunedBlocklets;
     }
@@ -523,8 +520,7 @@ m filterExpression
       List<ExtendedBlocklet> previousDataMapPrunedBlocklets,
       List<ExtendedBlocklet> otherDataMapPrunedBlocklets) {
     List<ExtendedBlocklet> prunedBlocklets = null;
-    if (BlockletDataMapUtil.isCacheLevelBlock(
-        carbonTable, BlockletDataMapFactory.CACHE_LEVEL_BLOCKLET)) {
+    if (BlockletDataMapUtil.isCacheLevelBlock(carbonTable)) {
       prunedBlocklets = new ArrayList<>();
       for (ExtendedBlocklet otherBlocklet : otherDataMapPrunedBlocklets) {
         if (previousDataMapPrunedBlocklets.contains(otherBlocklet)) {


Mime
View raw message