carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
Date Sun, 22 Sep 2019 15:34:14 GMT
This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new f068926  [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
f068926 is described below

commit f06892674a0553c4956cfbb6e9f1b29d8b2b1c42
Author: Manhua <kevinjmh@qq.com>
AuthorDate: Wed Jul 17 17:39:29 2019 +0800

    [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
    
    When update last column chunk data size, current code use columnDataSize.add(fileSizeInBytes
- footerSizeInBytes - previousChunkOffset) for every blocklet. This leads to wrong result
for calculting the data size of the last column, especially when a carbon data file has multiple
blocklet.
    
    In this PR, we fix this problem and modify the calcultion by remarking the end offset
of blocklet.
    
    This closes #3330
---
 .../java/org/apache/carbondata/tool/DataFile.java  | 32 +++++++++++-----------
 .../org/apache/carbondata/tool/CarbonCliTest.java  |  6 ++--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
index e553a78..4ed3945 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -121,16 +121,21 @@ class DataFile {
     this.partNo = CarbonTablePath.DataFileUtil.getPartNo(fileName);
 
     // calculate blocklet size and column size
-    // first calculate the header size, it equals the offset of first
-    // column chunk in first blocklet
-    long headerSizeInBytes = footer.blocklet_info_list3.get(0).column_data_chunks_offsets.get(0);
-    long previousOffset = headerSizeInBytes;
-    for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) {
+    for (int j = 0; j < footer.getBlocklet_info_list3().size(); j++) {
+      // remark start and end offset of current blocklet for computing blocklet size
+      // and chunk data size of the last column
+      BlockletInfo3 blockletInfo3 = footer.blocklet_info_list3.get(j);
+      long blockletEndOffset;
+      if (j != footer.getBlocklet_info_list3().size() - 1) {
+        // use start offset of next blocklet as end offset of current blocklet
+        blockletEndOffset = footer.blocklet_info_list3.get(j + 1).column_data_chunks_offsets.get(j);
+      } else {
+        // use start offset of footer as end offset of current blocklet if it is the last
blocklet
+        blockletEndOffset = fileSizeInBytes - footerSizeInBytes;
+      }
       // calculate blocklet size in bytes
-      long blockletOffset = blockletInfo3.column_data_chunks_offsets.get(0);
-      blockletSizeInBytes.add(blockletOffset - previousOffset);
-      previousOffset = blockletOffset;
-
+      this.blockletSizeInBytes.add(
+              blockletEndOffset - blockletInfo3.column_data_chunks_offsets.get(0));
       // calculate column size in bytes for each column
       LinkedList<Long> columnDataSize = new LinkedList<>();
       LinkedList<Long> columnMetaSize = new LinkedList<>();
@@ -140,17 +145,12 @@ class DataFile {
         columnMetaSize.add(blockletInfo3.column_data_chunks_length.get(i).longValue());
         previousChunkOffset = blockletInfo3.column_data_chunks_offsets.get(i);
       }
-      // last column chunk data size
-      columnDataSize.add(fileSizeInBytes - footerSizeInBytes - previousChunkOffset);
+      // update chunk data size of the last column
+      columnDataSize.add(blockletEndOffset - previousChunkOffset);
       columnDataSize.removeFirst();
       this.columnDataSizeInBytes.add(columnDataSize);
       this.columnMetaSizeInBytes.add(columnMetaSize);
-
     }
-    // last blocklet size
-    blockletSizeInBytes.add(
-        fileSizeInBytes - footerSizeInBytes - headerSizeInBytes - previousOffset);
-    this.blockletSizeInBytes.removeFirst();
 
     assert (blockletSizeInBytes.size() == getNumBlocklets());
   }
diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index af8d51d..4d89777 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -234,11 +234,11 @@ public class CarbonCliTest {
 
     expectedOutput = buildLines(
         "BLK  BLKLT  Meta Size  Data Size  LocalDict  DictEntries  DictSize  AvgPageSize
 Min%  Max%   Min  Max      " ,
-        "0    0      3.36KB     5.14MB     false      0            0.0B      93.76KB    
 0.0   100.0  0    2999990  " ,
+        "0    0      3.36KB     2.57MB     false      0            0.0B      93.76KB    
 0.0   100.0  0    2999990  " ,
         "0    1      3.36KB     2.57MB     false      0            0.0B      93.76KB    
 0.0   100.0  1    2999992  " ,
-        "1    0      3.36KB     5.14MB     false      0            0.0B      93.76KB    
 0.0   100.0  3    2999994  " ,
+        "1    0      3.36KB     2.57MB     false      0            0.0B      93.76KB    
 0.0   100.0  3    2999994  " ,
         "1    1      3.36KB     2.57MB     false      0            0.0B      93.76KB    
 0.0   100.0  5    2999996  " ,
-        "2    0      3.36KB     4.06MB     false      0            0.0B      93.76KB    
 0.0   100.0  7    2999998  " ,
+        "2    0      3.36KB     2.57MB     false      0            0.0B      93.76KB    
 0.0   100.0  7    2999998  " ,
         "2    1      2.04KB     1.49MB     false      0            0.0B      89.62KB    
 0.0   100.0  9    2999999  ");
     Assert.assertTrue(output.contains(expectedOutput));
     Assert.assertTrue(output.contains("## version Details"));


Mime
View raw message