carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [30/50] [abbrv] carbondata git commit: [CARBONDATA-2649] Fixed arrayIndexOutOfBoundException while loading Blocklet DataMap after alter add column operation
Date Wed, 18 Jul 2018 02:20:03 GMT
[CARBONDATA-2649] Fixed arrayIndexOutOfBoundException while loading Blocklet DataMap after
alter add column operation

Things done as part of this PR

Fixed arrayIndexOutOfBoundException while loading Blocklet DataMap after alter add column
operation
Problem:
Array Index out of bound exception was thrown after alter add column operation.

Analysis:
After alter add column operation if COLUMN_META_CACHE is set on the newly added columns, then
on executing select query on the data loaded before alter operation threw exception. This
was because minMaxCache caching columns were fetched irrespective of the segmentProperties.
Data loaded before alter add column operation will not have the newly added columns in its
columnSchemaList and hence can throw exception if non existent column are not removed from
min/max column cache. Solution:
Fetch the min/max cache columns based on segmentProperties

This closes #2510


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8e789571
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8e789571
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8e789571

Branch: refs/heads/carbonstore
Commit: 8e7895715753f13964887688fdf6e59d3dca5ed8
Parents: 7341907
Author: m00258959 <manish.gupta@huawei.com>
Authored: Mon Jul 16 12:26:41 2018 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Mon Jul 16 20:02:01 2018 +0530

----------------------------------------------------------------------
 .../block/SegmentPropertiesAndSchemaHolder.java | 14 ++++++----
 .../indexstore/BlockletDataMapIndexStore.java   |  3 ++-
 .../indexstore/blockletindex/BlockDataMap.java  | 10 +++----
 .../blockletindex/BlockletDataMap.java          |  2 +-
 .../blockletindex/BlockletDataMapModel.java     |  9 -------
 .../core/metadata/schema/table/CarbonTable.java | 28 ++++++++++++++------
 ...ithColumnMetCacheAndCacheLevelProperty.scala | 11 ++++++++
 7 files changed, 48 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
index e094076..bb7ff0d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
@@ -108,7 +108,7 @@ public class SegmentPropertiesAndSchemaHolder {
             this.segmentPropWrapperToSegmentSetMap.get(segmentPropertiesWrapper);
         if (null == segmentIdSetAndIndexWrapper) {
           // create new segmentProperties
-          segmentPropertiesWrapper.initSegmentProperties();
+          segmentPropertiesWrapper.initSegmentProperties(carbonTable);
           int segmentPropertiesIndex = segmentPropertiesIndexCounter.incrementAndGet();
           indexToSegmentPropertiesWrapperMapping
               .put(segmentPropertiesIndex, segmentPropertiesWrapper);
@@ -216,8 +216,11 @@ public class SegmentPropertiesAndSchemaHolder {
    *
    * @param segmentId
    * @param segmentPropertiesIndex
+   * @param clearSegmentWrapperFromMap flag to specify whether to clear segmentPropertiesWrapper
+   *                                   from Map if all the segment's using it have become
stale
    */
-  public void invalidate(String segmentId, int segmentPropertiesIndex) {
+  public void invalidate(String segmentId, int segmentPropertiesIndex,
+      boolean clearSegmentWrapperFromMap) {
     SegmentPropertiesWrapper segmentPropertiesWrapper =
         indexToSegmentPropertiesWrapperMapping.get(segmentPropertiesIndex);
     if (null != segmentPropertiesWrapper) {
@@ -230,7 +233,8 @@ public class SegmentPropertiesAndSchemaHolder {
       // if after removal of given SegmentId, the segmentIdSet becomes empty that means this
       // segmentPropertiesWrapper is not getting used at all. In that case this object can
be
       // removed from all the holders
-      if (segmentIdAndSegmentPropertiesIndexWrapper.segmentIdSet.isEmpty()) {
+      if (clearSegmentWrapperFromMap && segmentIdAndSegmentPropertiesIndexWrapper.segmentIdSet
+          .isEmpty()) {
         indexToSegmentPropertiesWrapperMapping.remove(segmentPropertiesIndex);
         segmentPropWrapperToSegmentSetMap.remove(segmentPropertiesWrapper);
       }
@@ -254,11 +258,11 @@ public class SegmentPropertiesAndSchemaHolder {
       this.tableIdentifier = carbonTable.getAbsoluteTableIdentifier();
       this.columnsInTable = columnsInTable;
       this.columnCardinality = columnCardinality;
-      this.minMaxCacheColumns = carbonTable.getMinMaxCacheColumns();
     }
 
-    public void initSegmentProperties() {
+    public void initSegmentProperties(CarbonTable carbonTable) {
       segmentProperties = new SegmentProperties(columnsInTable, columnCardinality);
+      this.minMaxCacheColumns = carbonTable.getMinMaxCacheColumns(segmentProperties);
     }
 
     @Override public boolean equals(Object obj) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
index 3918f3e..1501e6d 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
@@ -199,7 +199,8 @@ public class BlockletDataMapIndexStore
         // as segmentId will be same for all the dataMaps and segmentProperties cache is
         // maintained at segment level so it need to be called only once for clearing
         SegmentPropertiesAndSchemaHolder.getInstance()
-            .invalidate(segmentId, dataMaps.get(0).getSegmentPropertiesIndex());
+            .invalidate(segmentId, dataMaps.get(0).getSegmentPropertiesIndex(),
+                tableSegmentUniqueIdentifierWrapper.isAddTableBlockToUnsafe());
       }
     }
     lruCache.remove(tableSegmentUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier()

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
index f8126cc..7baba89 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java
@@ -224,7 +224,7 @@ public class BlockDataMap extends CoarseGrainDataMap
         byte[][] updatedMaxValues =
             CarbonUtil.updateMinMaxValues(fileFooter, maxValues, minValues, false);
         summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, fileFooter, segmentProperties,
-            blockletDataMapInfo.getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow,
+            getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow,
             blockMetaInfo, updatedMinValues, updatedMaxValues);
       }
     }
@@ -286,8 +286,8 @@ public class BlockDataMap extends CoarseGrainDataMap
           TableBlockInfo previousBlockInfo =
               previousDataFileFooter.getBlockInfo().getTableBlockInfo();
           summaryRow = loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter,
-              segmentProperties, blockletDataMapInfo.getMinMaxCacheColumns(),
-              previousBlockInfo.getFilePath(), summaryRow,
+              segmentProperties, getMinMaxCacheColumns(), previousBlockInfo.getFilePath(),
+              summaryRow,
               blockletDataMapInfo.getBlockMetaInfoMap().get(previousBlockInfo.getFilePath()),
               blockMinValues, blockMaxValues);
           // flag to check whether last file footer entry is different from previous entry.
@@ -311,7 +311,7 @@ public class BlockDataMap extends CoarseGrainDataMap
     if (isLastFileFooterEntryNeedToBeAdded) {
       summaryRow =
           loadToUnsafeBlock(schema, taskSummarySchema, previousDataFileFooter, segmentProperties,
-              blockletDataMapInfo.getMinMaxCacheColumns(),
+              getMinMaxCacheColumns(),
               previousDataFileFooter.getBlockInfo().getTableBlockInfo().getFilePath(), summaryRow,
               blockletDataMapInfo.getBlockMetaInfoMap()
                   .get(previousDataFileFooter.getBlockInfo().getTableBlockInfo().getFilePath()),
@@ -530,7 +530,7 @@ public class BlockDataMap extends CoarseGrainDataMap
     return false;
   }
 
-  private List<CarbonColumn> getMinMaxCacheColumns() {
+  protected List<CarbonColumn> getMinMaxCacheColumns() {
     return SegmentPropertiesAndSchemaHolder.getInstance()
         .getSegmentPropertiesWrapper(segmentPropertiesIndex).getMinMaxCacheColumns();
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
index bbe37c0..6a05442 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
@@ -126,7 +126,7 @@ public class BlockletDataMap extends BlockDataMap implements Serializable
{
           relativeBlockletId = 0;
         }
         summaryRow = loadToUnsafe(schema, taskSummarySchema, fileFooter, segmentProperties,
-            blockletDataMapInfo.getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow,
+            getMinMaxCacheColumns(), blockInfo.getFilePath(), summaryRow,
             blockMetaInfo, relativeBlockletId);
         // this is done because relative blocklet id need to be incremented based on the
         // total number of blocklets

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapModel.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapModel.java
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapModel.java
index 7516204..180c812 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapModel.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapModel.java
@@ -16,13 +16,11 @@
  */
 package org.apache.carbondata.core.indexstore.blockletindex;
 
-import java.util.List;
 import java.util.Map;
 
 import org.apache.carbondata.core.datamap.dev.DataMapModel;
 import org.apache.carbondata.core.indexstore.BlockMetaInfo;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 
 /**
  * It is the model object to keep the information to build or initialize BlockletDataMap.
@@ -35,8 +33,6 @@ public class BlockletDataMapModel extends DataMapModel {
 
   private CarbonTable carbonTable;
 
-  private List<CarbonColumn> minMaxCacheColumns;
-
   private String segmentId;
 
   private boolean addToUnsafe = true;
@@ -48,7 +44,6 @@ public class BlockletDataMapModel extends DataMapModel {
     this.blockMetaInfoMap = blockMetaInfoMap;
     this.segmentId = segmentId;
     this.carbonTable = carbonTable;
-    this.minMaxCacheColumns = carbonTable.getMinMaxCacheColumns();
   }
 
   public BlockletDataMapModel(CarbonTable carbonTable, String filePath,
@@ -77,8 +72,4 @@ public class BlockletDataMapModel extends DataMapModel {
   public CarbonTable getCarbonTable() {
     return carbonTable;
   }
-
-  public List<CarbonColumn> getMinMaxCacheColumns() {
-    return minMaxCacheColumns;
-  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index ffdd6b3..71256d4 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -34,6 +34,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.datamap.DataMapStoreManager;
 import org.apache.carbondata.core.datamap.TableDataMap;
 import org.apache.carbondata.core.datamap.dev.DataMapFactory;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
 import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
@@ -1195,11 +1196,14 @@ public class CarbonTable implements Serializable {
 
   /**
    * Method to find get carbon columns for columns to be cached. It will fill dimension first
and
-   * then measures
+   * then measures based on the block segmentProperties.
+   * In alter add column scenarios it can happen that the newly added columns are being cached
+   * which do not exist in already loaded data. In those cases newly added columns should
not be
+   * cached for the already loaded data
    *
    * @return
    */
-  public List<CarbonColumn> getMinMaxCacheColumns() {
+  public List<CarbonColumn> getMinMaxCacheColumns(SegmentProperties segmentProperties)
{
     List<CarbonColumn> minMaxCachedColsList = null;
     String tableName = tableInfo.getFactTable().getTableName();
     String cacheColumns =
@@ -1215,12 +1219,16 @@ public class CarbonTable implements Serializable {
         CarbonDimension dimension = getDimensionByName(tableName, column);
         // if found in dimension then add to dimension else add to measures
         if (null != dimension) {
-          // first add normal dimensions and then complex dimensions
-          if (dimension.isComplex()) {
-            complexDimensions.add(dimension);
-            continue;
+          CarbonDimension dimensionFromCurrentBlock =
+              segmentProperties.getDimensionFromCurrentBlock(dimension);
+          if (null != dimensionFromCurrentBlock) {
+            // first add normal dimensions and then complex dimensions
+            if (dimensionFromCurrentBlock.isComplex()) {
+              complexDimensions.add(dimensionFromCurrentBlock);
+              continue;
+            }
+            minMaxCachedColsList.add(dimensionFromCurrentBlock);
           }
-          minMaxCachedColsList.add(dimension);
         } else {
           measureColumns.add(column);
         }
@@ -1231,7 +1239,11 @@ public class CarbonTable implements Serializable {
       for (String measureColumn : measureColumns) {
         CarbonMeasure measure = getMeasureByName(tableName, measureColumn);
         if (null != measure) {
-          minMaxCachedColsList.add(measure);
+          CarbonMeasure measureFromCurrentBlock =
+              segmentProperties.getMeasureFromCurrentBlock(measure.getColumnId());
+          if (null != measureFromCurrentBlock) {
+            minMaxCachedColsList.add(measureFromCurrentBlock);
+          }
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8e789571/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestQueryWithColumnMetCacheAndCacheLevelProperty.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestQueryWithColumnMetCacheAndCacheLevelProperty.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestQueryWithColumnMetCacheAndCacheLevelProperty.scala
index 3e1f188..af9930a 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestQueryWithColumnMetCacheAndCacheLevelProperty.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestQueryWithColumnMetCacheAndCacheLevelProperty.scala
@@ -257,4 +257,15 @@ class TestQueryWithColumnMetCacheAndCacheLevelProperty extends QueryTest
with Be
       Row(5))
   }
 
+  test("verify column caching with alter add column") {
+    sql("drop table if exists alter_add_column_min_max")
+    sql("create table alter_add_column_min_max (imei string,AMSize string,channelsId string,ActiveCountry
string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate
timestamp,deliverycharge double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('table_blocksize'='1','COLUMN_META_CACHE'='AMSize','CACHE_LEVEL'='BLOCKLET')")
+    sql("insert into alter_add_column_min_max select '1AA1','8RAM size','4','Chinese','guangzhou',2738,1,'2014-07-01
12:07:28','2014-07-01 12:07:28',25")
+    sql("alter table alter_add_column_min_max add columns(age int, name string)")
+    sql("ALTER TABLE alter_add_column_min_max SET TBLPROPERTIES('COLUMN_META_CACHE'='age,name')")
+    sql("insert into alter_add_column_min_max select '1AA1','8RAM size','4','Chinese','guangzhou',2738,1,'2014-07-01
12:07:28','2014-07-01 12:07:28',25,29,'Rahul'")
+    checkAnswer(sql("select count(*) from alter_add_column_min_max where AMSize='8RAM size'"),
Row(2))
+    sql("drop table if exists alter_add_column_min_max")
+  }
+
 }


Mime
View raw message