drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From timothyfar...@apache.org
Subject [drill] 02/03: DRILL-6474: Don't use TopN when order by and offset are used without a limit specified.
Date Thu, 14 Jun 2018 04:11:21 GMT
This is an automated email from the ASF dual-hosted git repository.

timothyfarkas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 98dbc3a222990703aebe983883779763e0cdc1e9
Author: Timothy Farkas <timothyfarkas@apache.org>
AuthorDate: Wed Jun 6 12:04:39 2018 -0700

    DRILL-6474: Don't use TopN when order by and offset are used without a limit specified.
    
    closes #1313
---
 .../exec/planner/physical/PushLimitToTopN.java     | 21 +++++++++----
 .../physical/impl/limit/TestLimitOperator.java     | 23 ++++++++++++++
 .../physical/impl/limit/TestLimitPlanning.java     | 32 ++++++++++++++++++++
 .../org/apache/drill/test/DrillTestWrapper.java    | 24 ++++++++++++---
 .../java/org/apache/drill/test/TestBuilder.java    | 35 ++++++++++------------
 5 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
index 66126ec..1053941 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/physical/PushLimitToTopN.java
@@ -32,19 +32,28 @@ public class PushLimitToTopN  extends Prule{
 
   @Override
   public boolean matches(RelOptRuleCall call) {
-    return PrelUtil.getPlannerSettings(call.getPlanner()).getOptions()
-      .getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+    boolean topNEnabled = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(PlannerSettings.TOPN.getOptionName()).bool_val;
+
+    if (!topNEnabled) {
+      return false;
+    } else {
+      // If no limit is defined it doesn't make sense to use TopN since it could use unbounded
memory in this case.
+      // We should use the sort and limit operator in this case.
+      // This also fixes DRILL-6474
+      final LimitPrel limit = call.rel(0);
+      return limit.getFetch() != null;
+    }
   }
 
   @Override
   public void onMatch(RelOptRuleCall call) {
-    final LimitPrel limit = (LimitPrel) call.rel(0);
-    final SingleMergeExchangePrel smex = (SingleMergeExchangePrel) call.rel(1);
-    final SortPrel sort = (SortPrel) call.rel(2);
+    final LimitPrel limit = call.rel(0);
+    final SingleMergeExchangePrel smex = call.rel(1);
+    final SortPrel sort = call.rel(2);
 
     // First offset to include into results (inclusive). Null implies it is starting from
offset 0
     int offset = limit.getOffset() != null ? Math.max(0, RexLiteral.intValue(limit.getOffset()))
: 0;
-    int fetch = limit.getFetch() != null?  Math.max(0, RexLiteral.intValue(limit.getFetch()))
: 0;
+    int fetch = Math.max(0, RexLiteral.intValue(limit.getFetch()));
 
     final TopNPrel topN = new TopNPrel(limit.getCluster(), sort.getTraitSet(), sort.getInput(),
offset + fetch, sort.getCollation());
     final LimitPrel newLimit = new LimitPrel(limit.getCluster(), limit.getTraitSet(),
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
index 22c0013..7225edc 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitOperator.java
@@ -20,12 +20,35 @@ package org.apache.drill.exec.physical.impl.limit;
 import com.google.common.collect.Lists;
 import org.apache.drill.exec.physical.config.Limit;
 import org.apache.drill.exec.physical.unit.PhysicalOpUnitTestBase;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.drill.test.ClientFixture;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.junit.Rule;
 import org.junit.Test;
 
 import java.util.List;
 
 public class TestLimitOperator extends PhysicalOpUnitTestBase {
 
+  @Rule
+  public BaseDirTestWatcher baseDirTestWatcher = new BaseDirTestWatcher();
+
+  // DRILL-6474
+  @Test
+  public void testLimitIntegrationTest() throws Exception {
+    final ClusterFixtureBuilder builder = new ClusterFixtureBuilder(baseDirTestWatcher);
+
+    try (ClusterFixture clusterFixture = builder.build();
+         ClientFixture clientFixture = clusterFixture.clientFixture()) {
+      clientFixture.testBuilder()
+        .sqlQuery("select name_s10 from `mock`.`employees_100000` order by name_s10 offset
100")
+        .expectsNumRecords(99900)
+        .build()
+        .run();
+    }
+  }
+
   @Test
   public void testLimitMoreRecords() {
     Limit limitConf = new Limit(null, 0, 10);
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
new file mode 100644
index 0000000..3f5fee2
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/limit/TestLimitPlanning.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.physical.impl.limit;
+
+import org.apache.drill.PlanTestBase;
+import org.junit.Test;
+
+public class TestLimitPlanning extends PlanTestBase {
+
+  // DRILL-6474
+  @Test
+  public void dontPushdownIntoTopNWhenNoLimit() throws Exception {
+    String query = "select full_name from cp.`employee.json` order by full_name offset 10";
+
+    PlanTestBase.testPlanMatchingPatterns(query, new String[]{".*Sort\\(.*"}, new String[]{".*TopN\\(.*"});
+  }
+}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
index 0dfc1f7..051f4b3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/DrillTestWrapper.java
@@ -33,6 +33,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
@@ -85,6 +86,7 @@ public class DrillTestWrapper {
 
   // Unit test doesn't expect any specific batch count
   public static final int EXPECTED_BATCH_COUNT_NOT_SET = -1;
+  public static final int EXPECTED_NUM_RECORDS_NOT_SET = - 1;
 
   // The motivation behind the TestBuilder was to provide a clean API for test writers. The
model is mostly designed to
   // prepare all of the components necessary for running the tests, before the TestWrapper
is initialized. There is however
@@ -119,11 +121,13 @@ public class DrillTestWrapper {
   private List<Map<String, Object>> baselineRecords;
 
   private int expectedNumBatches;
+  private int expectedNumRecords;
 
   public DrillTestWrapper(TestBuilder testBuilder, TestServices services, Object query, QueryType
queryType,
       String baselineOptionSettingQueries, String testOptionSettingQueries,
       QueryType baselineQueryType, boolean ordered, boolean highPerformanceComparison,
-      String[] baselineColumns, List<Map<String, Object>> baselineRecords, int
expectedNumBatches) {
+      String[] baselineColumns, List<Map<String, Object>> baselineRecords, int
expectedNumBatches,
+      int expectedNumRecords) {
     this.testBuilder = testBuilder;
     this.services = services;
     this.query = query;
@@ -136,6 +140,13 @@ public class DrillTestWrapper {
     this.baselineColumns = baselineColumns;
     this.baselineRecords = baselineRecords;
     this.expectedNumBatches = expectedNumBatches;
+    this.expectedNumRecords = expectedNumRecords;
+
+    Preconditions.checkArgument(!(baselineRecords != null && !ordered &&
highPerformanceComparison));
+    Preconditions.checkArgument((baselineRecords != null && expectedNumRecords ==
DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineRecords == null,
+      "Cannot define both baselineRecords and the expectedNumRecords.");
+    Preconditions.checkArgument((baselineQueryType != null && expectedNumRecords
== DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) || baselineQueryType == null,
+      "Cannot define both a baselineQueryType and the expectedNumRecords.");
   }
 
   public void run() throws Exception {
@@ -527,9 +538,14 @@ public class DrillTestWrapper {
       // If baseline data was not provided to the test builder directly, we must run a query
for the baseline, this includes
       // the cases where the baseline is stored in a file.
       if (baselineRecords == null) {
-        test(baselineOptionSettingQueries);
-        expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
-        addToMaterializedResults(expectedRecords, expected, loader);
+        if (expectedNumRecords != DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET) {
+          Assert.assertEquals(expectedNumRecords, actualRecords.size());
+          return;
+        } else {
+          test(baselineOptionSettingQueries);
+          expected = testRunAndReturn(baselineQueryType, testBuilder.getValidationQuery());
+          addToMaterializedResults(expectedRecords, expected, loader);
+        }
       } else {
         expectedRecords = baselineRecords;
       }
diff --git a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
index e40f86d..98a0a9a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/test/TestBuilder.java
@@ -91,6 +91,7 @@ public class TestBuilder {
   private List<Map<String, Object>> baselineRecords;
 
   private int expectedNumBatches = DrillTestWrapper.EXPECTED_BATCH_COUNT_NOT_SET;
+  private int expectedNumRecords = DrillTestWrapper.EXPECTED_NUM_RECORDS_NOT_SET;
 
   public TestBuilder(TestServices services) {
     this.services = services;
@@ -127,12 +128,9 @@ public class TestBuilder {
     return this;
   }
 
-  public DrillTestWrapper build() throws Exception {
-    if ( ! ordered && highPerformanceComparison ) {
-      throw new Exception("High performance comparison only available for ordered checks,
to enforce this restriction, ordered() must be called first.");
-    }
+  public DrillTestWrapper build() {
     return new DrillTestWrapper(this, services, query, queryType, baselineOptionSettingQueries,
testOptionSettingQueries,
-        getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords,
expectedNumBatches);
+        getValidationQueryType(), ordered, highPerformanceComparison, baselineColumns, baselineRecords,
expectedNumBatches, expectedNumRecords);
   }
 
   public List<Pair<SchemaPath, TypeProtos.MajorType>> getExpectedSchema() {
@@ -248,17 +246,8 @@ public class TestBuilder {
     throw new RuntimeException("Must provide some kind of baseline, either a baseline file
or another query");
   }
 
-  protected UserBitShared.QueryType getValidationQueryType() throws Exception {
-    if (singleExplicitBaselineRecord()) {
-      return null;
-    }
-
-    if (ordered) {
-      // If there are no base line records or no baseline query then we will check to make
sure that the records are in ascending order
-      return null;
-    }
-
-    throw new RuntimeException("Must provide some kind of baseline, either a baseline file
or another query");
+  protected UserBitShared.QueryType getValidationQueryType() {
+    return null;
   }
 
   public JSONTestBuilder jsonBaselineFile(String filePath) {
@@ -329,6 +318,12 @@ public class TestBuilder {
     return this;
   }
 
+  public TestBuilder expectsNumRecords(int expectedNumRecords) {
+    this.expectedNumRecords = expectedNumRecords;
+    this.ordered = false;
+    return this;
+  }
+
   /**
    * This method is used to pass in a simple list of values for a single record verification
without
    * the need to create a CSV or JSON file to store the baseline.
@@ -544,7 +539,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return UserBitShared.QueryType.SQL;
     }
   }
@@ -577,7 +572,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return null;
     }
 
@@ -608,7 +603,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return UserBitShared.QueryType.SQL;
     }
 
@@ -639,7 +634,7 @@ public class TestBuilder {
     }
 
     @Override
-    protected UserBitShared.QueryType getValidationQueryType() throws Exception {
+    protected UserBitShared.QueryType getValidationQueryType() {
       return baselineQueryType;
     }
 

-- 
To stop receiving notification emails like this one, please contact
timothyfarkas@apache.org.

Mime
View raw message