carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kumarvisha...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3345]A growing streaming ROW_V1 carbondata file would be ingored some InputSplits
Date Tue, 07 May 2019 06:56:58 GMT
This is an automated email from the ASF dual-hosted git repository.

kumarvishal09 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 0ab2412  [CARBONDATA-3345]A growing streaming ROW_V1 carbondata file would be ingored
some InputSplits
0ab2412 is described below

commit 0ab2412b2f403392a7a17ce20a2327a35f4b8dd0
Author: junyan-zg <275620888@qq.com>
AuthorDate: Wed Apr 24 22:46:51 2019 +0800

    [CARBONDATA-3345]A growing streaming ROW_V1 carbondata file would be ingored some InputSplits
    
    After looking at carbondata segments, when the file grows to more than 150 M (possibly
128M),
    Presto initiates a query by separating several small files, including those in ROW_V1
format.
    This bug causes some small files in ROW_V1 format to be ignored, resulting in inaccurate
queries.
    So for the carbondata ROW_V1 inputSplits MapKey(Java), I adjust concat 'carbonInput.getStart()'
to keeping the required inputSplit
    
    This closes #3186
---
 .../org/apache/carbondata/presto/impl/CarbonTableReader.java     | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
index 57d8d5e..7ffe053 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
@@ -46,6 +46,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.metadata.schema.table.TableInfo;
 import org.apache.carbondata.core.reader.ThriftReader;
 import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.statusmanager.FileFormat;
 import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
 import org.apache.carbondata.core.util.CarbonProperties;
@@ -291,7 +292,13 @@ public class CarbonTableReader {
         // Use block distribution
         List<List<CarbonLocalInputSplit>> inputSplits = new ArrayList(
             result.stream().map(x -> (CarbonLocalInputSplit) x).collect(Collectors.groupingBy(
-                carbonInput -> carbonInput.getSegmentId().concat(carbonInput.getPath()))).values());
+                carbonInput -> {
+                  if (FileFormat.ROW_V1.equals(carbonInput.getFileFormat())) {
+                    return carbonInput.getSegmentId().concat(carbonInput.getPath())
+                      .concat(carbonInput.getStart() + "");
+                  }
+                  return carbonInput.getSegmentId().concat(carbonInput.getPath());
+                })).values());
         if (inputSplits != null) {
           for (int j = 0; j < inputSplits.size(); j++) {
             multiBlockSplitList.add(new CarbonLocalMultiBlockSplit(inputSplits.get(j),


Mime
View raw message