drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [drill] 04/05: DRILL-7238: Fixed ConvertCountToDirectScan to handle non-existent columns
Date Thu, 09 May 2019 01:58:54 GMT
This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 3603f240922389939ca0498face0eb255448ea53
Author: Venkata Jyothsna Donapati <jyothsna.dvj@gmail.com>
AuthorDate: Fri May 3 15:21:59 2019 -0700

    DRILL-7238: Fixed ConvertCountToDirectScan to handle non-existent columns
    
    closes #1781
---
 .../logical/ConvertCountToDirectScanRule.java      |  6 +++-
 .../logical/TestConvertCountToDirectScan.java      | 37 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
index 7375499..3a4e6ab 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/ConvertCountToDirectScanRule.java
@@ -228,6 +228,7 @@ public class ConvertCountToDirectScanRule extends RelOptRule {
    *   2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>),
the count = total row count
    *   3. For COUNT(nullable column), count = (total row count - column's null count)
    *   4. Also count can not be calculated for parition columns.
+   *   5. For the columns that are not present in the Summary(Non-existent columns), the
count = 0
    *
    * @param settings planner options
    * @param metadataSummary metadata summary containing row counts and column counts
@@ -288,7 +289,10 @@ public class ConvertCountToDirectScanRule extends RelOptRule {
 
           Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new
Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
 
-         if (columnMetadata == null || columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS)
{
+          if (columnMetadata == null) {
+            // If the column doesn't exist in the table, row count is set to 0
+            cnt = 0;
+          } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
             // if column stats is not available don't apply this rule, return empty counts
             return ImmutableMap.of();
           } else {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
index eaf9257..c35ab2d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/planner/logical/TestConvertCountToDirectScan.java
@@ -340,4 +340,41 @@ public class TestConvertCountToDirectScan extends PlanTestBase {
       test("drop table if exists %s", tableName);
     }
   }
+
+  @Test
+  public void testCountsWithNonExColumn() throws Exception {
+    test("use dfs.tmp");
+    String tableName = "parquet_table_counts_nonex";
+
+    try {
+      test(String.format("create table `%s/1` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+      test(String.format("create table `%s/2` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+      test(String.format("create table `%s/3` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+      test(String.format("create table `%s/4` as select * from cp.`parquet/alltypes_optional.parquet`",
tableName));
+
+      test("refresh table metadata %s", tableName);
+
+      String sql = String.format("select\n" +
+              "count(*) as star_count,\n" +
+              "count(col_int) as int_column_count,\n" +
+              "count(col_vrchr) as vrchr_column_count,\n" +
+              "count(non_existent) as non_existent\n" +
+              "from %s", tableName);
+
+      String usedMetaSummaryPattern = "usedMetadataSummaryFile = true";
+      String recordReaderPattern = "DynamicPojoRecordReader";
+
+      testPlanMatchingPatterns(sql, new String[]{usedMetaSummaryPattern, recordReaderPattern});
+
+      testBuilder()
+              .sqlQuery(sql)
+              .unOrdered()
+              .baselineColumns("star_count", "int_column_count", "vrchr_column_count", "non_existent"
)
+              .baselineValues(24L, 8L, 12L, 0L)
+              .go();
+
+    } finally {
+      test("drop table if exists %s", tableName);
+    }
+  }
 }


Mime
View raw message