Repository: drill
Updated Branches:
refs/heads/master cb1a0236f -> ca53c2440
DRILL-4349: parquet reader returns wrong results when reading a nullable column that starts
with a large number of nulls (>30k)
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/ca53c244
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/ca53c244
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/ca53c244
Branch: refs/heads/master
Commit: ca53c2440fb33e31220d11aee297fee67fc6bd6a
Parents: cb1a023
Author: adeneche <adeneche@gmail.com>
Authored: Wed Feb 3 15:42:22 2016 -0800
Committer: adeneche <adeneche@gmail.com>
Committed: Thu Feb 4 15:41:43 2016 -0800
----------------------------------------------------------------------
.../parquet/columnreaders/NullableColumnReader.java | 2 +-
.../exec/store/parquet2/TestDrillParquetReader.java | 13 +++++++++++++
.../src/test/resources/parquet2/4349.csv.gz | Bin 0 -> 202 bytes
3 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
index 4e52b70..2929eb2 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/NullableColumnReader.java
@@ -154,10 +154,10 @@ abstract class NullableColumnReader<V extends ValueVector> extends
ColumnReader<
writeCount += runLength;
valuesReadInCurrentPass += runLength;
+ pageReader.readPosInBytes = readStartInBytes + readLength;
}
pageReader.valuesRead += recordsReadInThisIteration;
- pageReader.readPosInBytes = readStartInBytes + readLength;
totalValuesRead += runLength + nullRunLength;
http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
index 05ca7fc..b18fd9d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet2/TestDrillParquetReader.java
@@ -71,4 +71,17 @@ public class TestDrillParquetReader extends BaseTestQuery {
public void testOptionalDecimal38() throws Exception {
testColumn("d38_opt");
}
+
+ @Test
+ public void test4349() throws Exception {
+ // start by creating a parquet file from the input csv file
+ runSQL("CREATE TABLE dfs_test.tmp.`4349` AS SELECT columns[0] id, CAST(NULLIF(columns[1],
'') AS DOUBLE) val FROM cp.`parquet2/4349.csv.gz`");
+
+ // querying the parquet file should return the same results found in the csv file
+ testBuilder()
+ .unOrdered()
+ .sqlQuery("SELECT * FROM dfs_test.tmp.`4349` WHERE id = 'b'")
+ .sqlBaselineQuery("SELECT columns[0] id, CAST(NULLIF(columns[1], '') AS DOUBLE) val
FROM cp.`parquet2/4349.csv.gz` WHERE columns[0] = 'b'")
+ .go();
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/ca53c244/exec/java-exec/src/test/resources/parquet2/4349.csv.gz
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet2/4349.csv.gz b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz
new file mode 100644
index 0000000..0729b0c
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet2/4349.csv.gz differ
|