This is an automated email from the ASF dual-hosted git repository.
boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit 3603f240922389939ca0498face0eb255448ea53
Author: Venkata Jyothsna Donapati <jyothsna.dvj@gmail.com>
AuthorDate: Fri May 3 15:21:59 2019 -0700
DRILL-7238: Fixed ConvertCountToDirectScan to handle non-existent columns
closes #1781
---
.../logical/ConvertCountToDirectScanRule.java | 6 +++-
.../logical/TestConvertCountToDirectScan.java | 37 ++++++++++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
index 7375499..3a4e6ab 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
@@ -228,6 +228,7 @@ public class ConvertCountToDirectScanRule extends RelOptRule {
* 2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>),
the count = total row count
* 3. For COUNT(nullable column), count = (total row count - column's null count)
* 4. Also count can not be calculated for parition columns.
+ * 5. For the columns that are not present in the Summary(Non-existent columns), the
count = 0
*
* @param settings planner options
* @param metadataSummary metadata summary containing row counts and column counts
@@ -288,7 +289,10 @@ public class ConvertCountToDirectScanRule extends RelOptRule {
Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new
Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
- if (columnMetadata == null || columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS)
{
+ if (columnMetadata == null) {
+ // If the column doesn't exist in the table, row count is set to 0
+ cnt = 0;
+ } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
} else {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
index eaf9257..c35ab2d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
@@ -340,4 +340,41 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
test("drop table if exists %s", tableName);
}
}
+
+ @Test
+ public void testCountsWithNonExColumn() throws Exception {
+ test("use dfs.tmp");
+ String tableName = "parquet_table_counts_nonex";
+
+ try {
+ test(String.format("create table `%s/1` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+ test(String.format("create table `%s/2` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+ test(String.format("create table `%s/3` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+ test(String.format("create table `%s/4` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+
+ test("refresh table metadata %s", tableName);
+
+ String sql = String.format("select\n" +
+ "count(*) as star_count,\n" +
+ "count(col_int) as int_column_count,\n" +
+ "count(col_vrchr) as vrchr_column_count,\n" +
+ "count(non_existent) as non_existent\n" +
+ "from %s", tableName);
+
+ String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+ String recordReaderPattern = "DynamicPojoRecordReader";
+
+ testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern});
+
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("star_count", "int_column_count", "vrchr_column_count", "non_existent"
)
+ .baselineValues(24L, 8L, 12L, 0L)
+ .go();
+
+ } finally {
+ test("drop table if exists %s", tableName);
+ }
+ }
}
|