drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gpa...@apache.org
Subject [drill] branch master updated: DRILL-7245: Cap NDV at row count after applying filters
Date Thu, 09 May 2019 06:37:45 GMT
This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 0fa2967  DRILL-7245: Cap NDV at row count after applying filters
0fa2967 is described below

commit 0fa29677aea131727459c23602080fdf307c9ae1
Author: Gautam Parai <gparai@maprtech.com>
AuthorDate: Tue May 7 17:44:44 2019 -0700

    DRILL-7245: Cap NDV at row count after applying filters
    
    closes #1786
---
 .../drill/exec/planner/cost/DrillRelMdDistinctRowCount.java   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index ae62449..d7f701e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -136,7 +136,7 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
    */
   private Double getDistinctRowCountInternal(TableScan scan, RelMetadataQuery mq, DrillTable
table,
       ImmutableBitSet groupKey, RelDataType type, RexNode predicate) {
-    double selectivity, rowCount;
+    double selectivity, gbyColPredSel, rowCount;
     /* If predicate is present, determine its selectivity to estimate filtered rows.
      * Thereafter, compute the number of distinct rows.
      */
@@ -172,16 +172,17 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
         break;
       }
       estRowCnt *= ndv;
-      selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
+      gbyColPredSel = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
       /* If predicate is on group-by column, scale down the NDV by selectivity. Consider
the query
        * select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down
by SEL(a)
        * whereas NDV(b) will not.
        */
-      if (selectivity > 0) {
-        estRowCnt *= selectivity;
+      if (gbyColPredSel > 0) {
+        estRowCnt *= gbyColPredSel;
       }
     }
-    estRowCnt = Math.min(estRowCnt, rowCount);
+    // Estimated NDV should not exceed number of rows after applying the filters
+    estRowCnt = Math.min(estRowCnt, selectivity*rowCount);
     if (!allColsHaveNDV) {
       if (logger.isDebugEnabled()) {
         logger.debug(String.format("NDV not available for %s(%s). Using default rowcount
for group-by %s",


Mime
View raw message