This is an automated email from the ASF dual-hosted git repository.
jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new f068926 [CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
f068926 is described below
commit f06892674a0553c4956cfbb6e9f1b29d8b2b1c42
Author: Manhua <kevinjmh@qq.com>
AuthorDate: Wed Jul 17 17:39:29 2019 +0800
[CARBONDATA-3473] Fix data size calcution of the last column in CarbonCli
When update last column chunk data size, current code use columnDataSize.add(fileSizeInBytes
- footerSizeInBytes - previousChunkOffset) for every blocklet. This leads to wrong result
for calculting the data size of the last column, especially when a carbon data file has multiple
blocklet.
In this PR, we fix this problem and modify the calcultion by remarking the end offset
of blocklet.
This closes #3330
---
.../java/org/apache/carbondata/tool/DataFile.java | 32 +++++++++++-----------
.../org/apache/carbondata/tool/CarbonCliTest.java | 6 ++--
2 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
index e553a78..4ed3945 100644
--- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
+++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -121,16 +121,21 @@ class DataFile {
this.partNo = CarbonTablePath.DataFileUtil.getPartNo(fileName);
// calculate blocklet size and column size
- // first calculate the header size, it equals the offset of first
- // column chunk in first blocklet
- long headerSizeInBytes = footer.blocklet_info_list3.get(0).column_data_chunks_offsets.get(0);
- long previousOffset = headerSizeInBytes;
- for (BlockletInfo3 blockletInfo3 : footer.blocklet_info_list3) {
+ for (int j = 0; j < footer.getBlocklet_info_list3().size(); j++) {
+ // remark start and end offset of current blocklet for computing blocklet size
+ // and chunk data size of the last column
+ BlockletInfo3 blockletInfo3 = footer.blocklet_info_list3.get(j);
+ long blockletEndOffset;
+ if (j != footer.getBlocklet_info_list3().size() - 1) {
+ // use start offset of next blocklet as end offset of current blocklet
+ blockletEndOffset = footer.blocklet_info_list3.get(j + 1).column_data_chunks_offsets.get(j);
+ } else {
+ // use start offset of footer as end offset of current blocklet if it is the last
blocklet
+ blockletEndOffset = fileSizeInBytes - footerSizeInBytes;
+ }
// calculate blocklet size in bytes
- long blockletOffset = blockletInfo3.column_data_chunks_offsets.get(0);
- blockletSizeInBytes.add(blockletOffset - previousOffset);
- previousOffset = blockletOffset;
-
+ this.blockletSizeInBytes.add(
+ blockletEndOffset - blockletInfo3.column_data_chunks_offsets.get(0));
// calculate column size in bytes for each column
LinkedList<Long> columnDataSize = new LinkedList<>();
LinkedList<Long> columnMetaSize = new LinkedList<>();
@@ -140,17 +145,12 @@ class DataFile {
columnMetaSize.add(blockletInfo3.column_data_chunks_length.get(i).longValue());
previousChunkOffset = blockletInfo3.column_data_chunks_offsets.get(i);
}
- // last column chunk data size
- columnDataSize.add(fileSizeInBytes - footerSizeInBytes - previousChunkOffset);
+ // update chunk data size of the last column
+ columnDataSize.add(blockletEndOffset - previousChunkOffset);
columnDataSize.removeFirst();
this.columnDataSizeInBytes.add(columnDataSize);
this.columnMetaSizeInBytes.add(columnMetaSize);
-
}
- // last blocklet size
- blockletSizeInBytes.add(
- fileSizeInBytes - footerSizeInBytes - headerSizeInBytes - previousOffset);
- this.blockletSizeInBytes.removeFirst();
assert (blockletSizeInBytes.size() == getNumBlocklets());
}
diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index af8d51d..4d89777 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -234,11 +234,11 @@ public class CarbonCliTest {
expectedOutput = buildLines(
"BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize
Min% Max% Min Max " ,
- "0 0 3.36KB 5.14MB false 0 0.0B 93.76KB
0.0 100.0 0 2999990 " ,
+ "0 0 3.36KB 2.57MB false 0 0.0B 93.76KB
0.0 100.0 0 2999990 " ,
"0 1 3.36KB 2.57MB false 0 0.0B 93.76KB
0.0 100.0 1 2999992 " ,
- "1 0 3.36KB 5.14MB false 0 0.0B 93.76KB
0.0 100.0 3 2999994 " ,
+ "1 0 3.36KB 2.57MB false 0 0.0B 93.76KB
0.0 100.0 3 2999994 " ,
"1 1 3.36KB 2.57MB false 0 0.0B 93.76KB
0.0 100.0 5 2999996 " ,
- "2 0 3.36KB 4.06MB false 0 0.0B 93.76KB
0.0 100.0 7 2999998 " ,
+ "2 0 3.36KB 2.57MB false 0 0.0B 93.76KB
0.0 100.0 7 2999998 " ,
"2 1 2.04KB 1.49MB false 0 0.0B 89.62KB
0.0 100.0 9 2999999 ");
Assert.assertTrue(output.contains(expectedOutput));
Assert.assertTrue(output.contains("## version Details"));
|