carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kumarvisha...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3515] Limit local dictionary size to 16MB and allow configuration.
Date Thu, 12 Sep 2019 07:30:33 GMT
This is an automated email from the ASF dual-hosted git repository.

kumarvishal09 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new da525ec  [CARBONDATA-3515] Limit local dictionary size to 16MB and allow configuration.
da525ec is described below

commit da525ece20f6606f8b2113ca32b7acb82f0698fd
Author: ajantha-bhat <ajanthabhat@gmail.com>
AuthorDate: Tue Sep 10 10:48:26 2019 +0530

    [CARBONDATA-3515] Limit local dictionary size to 16MB and allow configuration.
    
    problem: currently local dictionary max size is 2GB, because of this, for varchar columns
or long string columns,
    local dictionary can be of 2GB size. so, as local dictionary is stored in blocklet. blocklet
size will exceed 2 GB,
     even though configured maximum blocklet size is 64MB. some places inter overflow happens
during casting.
    
    solution: Limit local dictionary size to 16MB and allow configuration. default size is
4MB
    
    This closes #3380
---
 .../core/constants/CarbonCommonConstants.java      | 11 ++++++
 .../dictionaryholder/MapBasedDictionaryStore.java  | 16 ++++++--
 .../carbondata/core/util/CarbonProperties.java     | 43 ++++++++++++++++++++++
 docs/configuration-parameters.md                   |  1 +
 4 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 67fa13f..ac77582 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1209,6 +1209,17 @@ public final class CarbonCommonConstants {
 
   public static final String CARBON_ENABLE_RANGE_COMPACTION_DEFAULT = "true";
 
+  @CarbonProperty
+  /**
+   * size based threshold for local dictionary in mb.
+   */
+  public static final String CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB =
+      "carbon.local.dictionary.size.threshold.inmb";
+
+  public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT = 4;
+
+  public static final int CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX = 16;
+
   //////////////////////////////////////////////////////////////////////////////////////////
   // Query parameter start here
   //////////////////////////////////////////////////////////////////////////////////////////
diff --git a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
index 7b8617a..0a50451 100644
--- a/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/localdictionary/dictionaryholder/MapBasedDictionaryStore.java
@@ -20,7 +20,9 @@ import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.carbondata.core.cache.dictionary.DictionaryByteArrayWrapper;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 import org.apache.carbondata.core.localdictionary.exception.DictionaryThresholdReachedException;
+import org.apache.carbondata.core.util.CarbonProperties;
 
 /**
  * Map based dictionary holder class, it will use map to hold
@@ -51,6 +53,11 @@ public class MapBasedDictionaryStore implements DictionaryStore {
   private int dictionaryThreshold;
 
   /**
+   * dictionary threshold size in bytes
+   */
+  private long dictionarySizeThresholdInBytes;
+
+  /**
    * for checking threshold is reached or not
    */
   private boolean isThresholdReached;
@@ -62,6 +69,8 @@ public class MapBasedDictionaryStore implements DictionaryStore {
 
   public MapBasedDictionaryStore(int dictionaryThreshold) {
     this.dictionaryThreshold = dictionaryThreshold;
+    this.dictionarySizeThresholdInBytes = Integer.parseInt(CarbonProperties.getInstance()
+        .getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB))
<< 20;
     this.dictionary = new ConcurrentHashMap<>();
     this.referenceDictionaryArray = new DictionaryByteArrayWrapper[dictionaryThreshold];
   }
@@ -93,7 +102,7 @@ public class MapBasedDictionaryStore implements DictionaryStore {
           value = ++lastAssignValue;
           currentSize += data.length;
           // if new value is greater than threshold
-          if (value > dictionaryThreshold || currentSize >= Integer.MAX_VALUE) {
+          if (value > dictionaryThreshold || currentSize > dictionarySizeThresholdInBytes)
{
             // set the threshold boolean to true
             isThresholdReached = true;
             // throw exception
@@ -111,9 +120,10 @@ public class MapBasedDictionaryStore implements DictionaryStore {
 
   private void checkIfThresholdReached() throws DictionaryThresholdReachedException {
     if (isThresholdReached) {
-      if (currentSize >= Integer.MAX_VALUE) {
+      if (currentSize > dictionarySizeThresholdInBytes) {
         throw new DictionaryThresholdReachedException(
-            "Unable to generate dictionary. Dictionary Size crossed 2GB limit");
+            "Unable to generate dictionary. Dictionary Size crossed bytes: "
+                + dictionarySizeThresholdInBytes);
       } else {
         throw new DictionaryThresholdReachedException(
             "Unable to generate dictionary value. Dictionary threshold reached");
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
index adf4905..e4efc0b 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
@@ -202,6 +202,9 @@ public final class CarbonProperties {
       case CarbonCommonConstants.CARBON_INDEX_SERVER_SERIALIZATION_THRESHOLD:
         validateIndexServerSerializationThreshold();
         break;
+      case CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB:
+        validateAndGetLocalDictionarySizeThresholdInMB();
+        break;
       // TODO : Validation for carbon.lock.type should be handled for addProperty flow
       default:
         // none
@@ -268,6 +271,7 @@ public final class CarbonProperties {
     validateStringCharacterLimit();
     validateDetailQueryBatchSize();
     validateIndexServerSerializationThreshold();
+    validateAndGetLocalDictionarySizeThresholdInMB();
   }
 
   /**
@@ -1789,4 +1793,43 @@ public final class CarbonProperties {
       return !prefetchEnable.equalsIgnoreCase("false");
     }
   }
+
+  /**
+   * get local dictionary size threshold in mb.
+   */
+  private void validateAndGetLocalDictionarySizeThresholdInMB() {
+    String sizeStr = carbonProperties
+        .getProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB);
+    String defaultValue = Integer
+        .toString(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_DEFAULT);
+    if (sizeStr == null) {
+      carbonProperties
+          .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+              defaultValue);
+    } else {
+      try {
+        int size = Integer.parseInt(sizeStr);
+        if (size < 0 || size == 0
+            || size > CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB_MAX)
{
+          LOGGER.info("using default value of carbon.local.dictionary.size.threshold.inmb
= "
+              + defaultValue);
+          carbonProperties
+              .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                  defaultValue);
+        } else {
+          LOGGER.info("using carbon.local.dictionary.size.threshold.inmb = " + size);
+          carbonProperties
+              .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                  Integer.toString(size));
+        }
+      } catch (Exception ex) {
+        LOGGER.info(
+            "using default value of carbon.local.dictionary.size.threshold.inmb = " + defaultValue);
+        carbonProperties
+            .setProperty(CarbonCommonConstants.CARBON_LOCAL_DICTIONARY_SIZE_THRESHOLD_IN_MB,
+                defaultValue);
+      }
+    }
+  }
+
 }
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index da226ec..51017fe 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -96,6 +96,7 @@ This section provides the details of all the configurations required for
the Car
 | carbon.minmax.allowed.byte.count | 200 | CarbonData will write the min max values for string/varchar
types column using the byte count specified by this configuration. Max value is 1000 bytes(500
characters) and Min value is 10 bytes(5 characters). **NOTE:** This property is useful for
reducing the store size thereby improving the query performance but can lead to query degradation
if value is not configured properly. | |
 | carbon.merge.index.failure.throw.exception | true | It is used to configure whether or
not merge index failure should result in data load failure also. |
 | carbon.binary.decoder | None | Support configurable decode for loading. Two decoders supported:
base64 and hex |
+| carbon.local.dictionary.size.threshold.inmb | 4 | size based threshold for local dictionary
in MB, maximum allowed size is 16 MB. |
 
 ## Compaction Configuration
 


Mime
View raw message