This is an automated email from the ASF dual-hosted git repository. timothyfarkas pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git commit 0ac082589171c1e1b2216d6c4b2bbb8f713737de Author: Vlad Rozov AuthorDate: Wed Jun 6 15:03:42 2018 -0700 DRILL-6481: Refactor ParquetXXXPredicate classes closes #1312 --- .../exec/expr/stat/ParquetBooleanPredicates.java | 88 +++-- .../expr/stat/ParquetComparisonPredicates.java | 357 ++++++++------------- .../exec/expr/stat/ParquetFilterPredicate.java | 4 +- .../drill/exec/expr/stat/ParquetIsPredicates.java | 246 ++++++-------- .../exec/expr/stat/ParquetPredicatesHelper.java | 15 +- .../drill/exec/expr/stat/RangeExprEvaluator.java | 96 +++--- .../exec/store/parquet/ParquetFilterBuilder.java | 43 +-- .../exec/store/parquet/stat/ColumnStatistics.java | 8 +- 8 files changed, 360 insertions(+), 497 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java index e5de34f..b062f87 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java @@ -27,50 +27,72 @@ import java.util.List; /** * Boolean predicates for parquet filter pushdown. */ -public class ParquetBooleanPredicates { - public static abstract class ParquetBooleanPredicate extends BooleanOperator implements ParquetFilterPredicate { - public ParquetBooleanPredicate(String name, List args, ExpressionPosition pos) { - super(name, args, pos); - } +public abstract class ParquetBooleanPredicates> extends BooleanOperator + implements ParquetFilterPredicate { - @Override - public T accept(ExprVisitor visitor, V value) throws E { - return visitor.visitBooleanOperator(this, value); - } + private ParquetBooleanPredicates(String name, List args, ExpressionPosition pos) { + super(name, args, pos); } - public static class AndPredicate extends ParquetBooleanPredicate { - public AndPredicate(String name, List args, ExpressionPosition pos) { - super(name, args, pos); - } + @Override + public T accept(ExprVisitor visitor, V value) throws E { + return visitor.visitBooleanOperator(this, value); + } - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - // "and" : as long as one branch is OK to drop, we can drop it. - for (LogicalExpression child : this) { - if (child instanceof ParquetFilterPredicate && ((ParquetFilterPredicate) child).canDrop(evaluator)) { - return true; + @SuppressWarnings("unchecked") + private static > LogicalExpression createAndPredicate( + String name, + List args, + ExpressionPosition pos + ) { + return new ParquetBooleanPredicates(name, args, pos) { + @Override + public boolean canDrop(RangeExprEvaluator evaluator) { + // "and" : as long as one branch is OK to drop, we can drop it. + for (LogicalExpression child : this) { + if (child instanceof ParquetFilterPredicate && ((ParquetFilterPredicate)child).canDrop(evaluator)) { + return true; + } } + return false; } - return false; - } + }; } - public static class OrPredicate extends ParquetBooleanPredicate { - public OrPredicate(String name, List args, ExpressionPosition pos) { - super(name, args, pos); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - for (LogicalExpression child : this) { - // "long" : as long as one branch is NOT ok to drop, we can NOT drop it. - if (! ((ParquetFilterPredicate) child).canDrop(evaluator)) { - return false; + @SuppressWarnings("unchecked") + private static > LogicalExpression createOrPredicate( + String name, + List args, + ExpressionPosition pos + ) { + return new ParquetBooleanPredicates(name, args, pos) { + @Override + public boolean canDrop(RangeExprEvaluator evaluator) { + for (LogicalExpression child : this) { + // "or" : as long as one branch is NOT ok to drop, we can NOT drop it. + if (!(child instanceof ParquetFilterPredicate) || !((ParquetFilterPredicate)child).canDrop(evaluator)) { + return false; + } } + return true; } + }; + } - return true; + public static > LogicalExpression createBooleanPredicate( + String function, + String name, + List args, + ExpressionPosition pos + ) { + switch (function) { + case "booleanOr": + return ParquetBooleanPredicates.createOrPredicate(name, args, pos); + case "booleanAnd": + return ParquetBooleanPredicates.createAndPredicate(name, args, pos); + default: + logger.warn("Unknown Boolean '{}' predicate.", function); + return null; } } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java index 5ba597c..3cb8877 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java @@ -20,274 +20,199 @@ package org.apache.drill.exec.expr.stat; import org.apache.drill.common.expression.LogicalExpression; import org.apache.drill.common.expression.LogicalExpressionBase; import org.apache.drill.common.expression.visitors.ExprVisitor; +import org.apache.drill.exec.expr.fn.FunctionGenerationHelper; import org.apache.parquet.column.statistics.Statistics; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.BiPredicate; + +import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isNullOrEmpty; +import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isAllNulls; /** * Comparison predicates for parquet filter pushdown. */ -public class ParquetComparisonPredicates { - public static abstract class ParquetCompPredicate extends LogicalExpressionBase implements ParquetFilterPredicate { - protected final LogicalExpression left; - protected final LogicalExpression right; +public class ParquetComparisonPredicates> extends LogicalExpressionBase + implements ParquetFilterPredicate { + private final LogicalExpression left; + private final LogicalExpression right; + private final BiPredicate, Statistics> predicate; + + private ParquetComparisonPredicates( + LogicalExpression left, + LogicalExpression right, + BiPredicate, Statistics> predicate + ) { + super(left.getPosition()); + this.left = left; + this.right = right; + this.predicate = predicate; + } - public ParquetCompPredicate(LogicalExpression left, LogicalExpression right) { - super(left.getPosition()); - this.left = left; - this.right = right; + @Override + public Iterator iterator() { + final List args = new ArrayList<>(); + args.add(left); + args.add(right); + return args.iterator(); + } + + @Override + public T accept(ExprVisitor visitor, V value) throws E { + return visitor.visitUnknown(this, value); + } + + /** + * Semantics of canDrop() is very similar to what is implemented in Parquet library's + * {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and + * {@link org.apache.parquet.filter2.predicate.FilterPredicate} + * + * Main difference : + * 1. A RangeExprEvaluator is used to compute the min/max of an expression, such as CAST function + * of a column. CAST function could be explicitly added by Drill user (It's recommended to use CAST + * function after DRILL-4372, if user wants to reduce planning time for limit 0 query), or implicitly + * inserted by Drill, when the types of compare operands are not identical. Therefore, it's important + * to allow CAST function to appear in the filter predicate. + * 2. We do not require list of ColumnChunkMetaData to do the evaluation, while Parquet library's + * StatisticsFilter has such requirement. Drill's ParquetTableMetaData does not maintain ColumnChunkMetaData, + * making it impossible to directly use Parquet library's StatisticFilter in query planning time. + * 3. We allows both sides of comparison operator to be a min/max range. As such, we support + * expression_of(Column1) < expression_of(Column2), + * where Column1 and Column2 are from same parquet table. + */ + @Override + public boolean canDrop(RangeExprEvaluator evaluator) { + Statistics leftStat = left.accept(evaluator, null); + if (isNullOrEmpty(leftStat)) { + return false; } - @Override - public Iterator iterator() { - final List args = new ArrayList<>(); - args.add(left); - args.add(right); - return args.iterator(); + Statistics rightStat = right.accept(evaluator, null); + if (isNullOrEmpty(rightStat)) { + return false; } - @Override - public T accept(ExprVisitor visitor, V value) throws E { - return visitor.visitUnknown(this, value); + // if either side is ALL null, = is evaluated to UNKNOWN -> canDrop + if (isAllNulls(leftStat, evaluator.getRowCount()) || isAllNulls(rightStat, evaluator.getRowCount())) { + return true; } + return (leftStat.hasNonNullValue() && rightStat.hasNonNullValue()) && predicate.test(leftStat, rightStat); } /** * EQ (=) predicate */ - public static class EqualPredicate extends ParquetCompPredicate { - public EqualPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - /** - Semantics of canDrop() is very similar to what is implemented in Parquet library's - {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and - {@link org.apache.parquet.filter2.predicate.FilterPredicate} - - Main difference : - 1. A RangeExprEvaluator is used to compute the min/max of an expression, such as CAST function - of a column. CAST function could be explicitly added by Drill user (It's recommended to use CAST - function after DRILL-4372, if user wants to reduce planning time for limit 0 query), or implicitly - inserted by Drill, when the types of compare operands are not identical. Therefore, it's important - to allow CAST function to appear in the filter predicate. - 2. We do not require list of ColumnChunkMetaData to do the evaluation, while Parquet library's - StatisticsFilter has such requirement. Drill's ParquetTableMetaData does not maintain ColumnChunkMetaData, - making it impossible to directly use Parquet library's StatisticFilter in query planning time. - 3. We allows both sides of comparison operator to be a min/max range. As such, we support - expression_of(Column1) < expression_of(Column2), - where Column1 and Column2 are from same parquet table. - */ - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, = is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createEqualPredicate( + LogicalExpression left, + LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when left's max < right's min, or right's max < left's min - if ( ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0 - || rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0)) { - return true; - } else { - return false; - } - } - - @Override - public String toString() { - return left.toString() + " = " + right.toString(); - } + final C leftMin = leftStat.genericGetMin(); + final C rightMin = rightStat.genericGetMin(); + return leftStat.genericGetMax().compareTo(rightMin) < 0 || rightStat.genericGetMax().compareTo(leftMin) < 0; + }) { + @Override + public String toString() { + return left + " = " + right; + } + }; } /** * GT (>) predicate. */ - public static class GTPredicate extends ParquetCompPredicate { - public GTPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, = is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createGTPredicate( + LogicalExpression left, + LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when left's max <= right's min. - if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) <= 0 ) { - return true; - } else { - return false; - } - } + final C rightMin = rightStat.genericGetMin(); + return leftStat.genericGetMax().compareTo(rightMin) <= 0; + }); } /** * GE (>=) predicate. */ - public static class GEPredicate extends ParquetCompPredicate { - public GEPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, = is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createGEPredicate( + LogicalExpression left, + LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when left's max < right's min. - if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0 ) { - return true; - } else { - return false; - } - } + final C rightMin = rightStat.genericGetMin(); + return leftStat.genericGetMax().compareTo(rightMin) < 0; + }); } /** * LT (<) predicate. */ - public static class LTPredicate extends ParquetCompPredicate { - public LTPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, = is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createLTPredicate( + LogicalExpression left, + LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when right's max <= left's min. - if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) <= 0 ) { - return true; - } else { - return false; - } - } + final C leftMin = leftStat.genericGetMin(); + return rightStat.genericGetMax().compareTo(leftMin) <= 0; + }); } /** * LE (<=) predicate. */ - public static class LEPredicate extends ParquetCompPredicate { - public LEPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, = is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createLEPredicate( + LogicalExpression left, LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when right's max < left's min. - if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0 ) { - return true; - } else { - return false; - } - } + final C leftMin = leftStat.genericGetMin(); + return rightStat.genericGetMax().compareTo(leftMin) < 0; + }); } /** * NE (!=) predicate. */ - public static class NEPredicate extends ParquetCompPredicate { - public NEPredicate(LogicalExpression left, LogicalExpression right) { - super(left, right); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics leftStat = left.accept(evaluator, null); - Statistics rightStat = right.accept(evaluator, null); - - if (leftStat == null || - rightStat == null || - leftStat.isEmpty() || - rightStat.isEmpty()) { - return false; - } - - // if either side is ALL null, comparison is evaluated to UNKNOW -> canDrop - if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) || - ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) { - return true; - } - + private static > LogicalExpression createNEPredicate( + LogicalExpression left, + LogicalExpression right + ) { + return new ParquetComparisonPredicates(left, right, (leftStat, rightStat) -> { // can drop when there is only one unique value. - if ( leftStat.genericGetMin().compareTo(leftStat.genericGetMax()) == 0 && - rightStat.genericGetMin().compareTo(rightStat.genericGetMax()) ==0 && - leftStat.genericGetMax().compareTo(rightStat.genericGetMax()) == 0) { - return true; - } else { - return false; - } + final C leftMax = leftStat.genericGetMax(); + final C rightMax = rightStat.genericGetMax(); + return leftStat.genericGetMin().compareTo(leftMax) == 0 && rightStat.genericGetMin().compareTo(rightMax) == 0 && + leftStat.genericGetMax().compareTo(rightMax) == 0; + }); + } + + public static > LogicalExpression createComparisonPredicate( + String function, + LogicalExpression left, + LogicalExpression right + ) { + switch (function) { + case FunctionGenerationHelper.EQ: + return ParquetComparisonPredicates.createEqualPredicate(left, right); + case FunctionGenerationHelper.GT: + return ParquetComparisonPredicates.createGTPredicate(left, right); + case FunctionGenerationHelper.GE: + return ParquetComparisonPredicates.createGEPredicate(left, right); + case FunctionGenerationHelper.LT: + return ParquetComparisonPredicates.createLTPredicate(left, right); + case FunctionGenerationHelper.LE: + return ParquetComparisonPredicates.createLEPredicate(left, right); + case FunctionGenerationHelper.NE: + return ParquetComparisonPredicates.createNEPredicate(left, right); + default: + return null; } } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java index 898dc71..1b7e9e5 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java @@ -17,6 +17,6 @@ */ package org.apache.drill.exec.expr.stat; -public interface ParquetFilterPredicate { - boolean canDrop(RangeExprEvaluator evaluator); +public interface ParquetFilterPredicate> { + boolean canDrop(RangeExprEvaluator evaluator); } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java index ef2b940..d83f389 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java @@ -22,207 +22,149 @@ import org.apache.drill.common.expression.LogicalExpressionBase; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.common.expression.TypedFieldExpr; import org.apache.drill.common.expression.visitors.ExprVisitor; +import org.apache.drill.exec.expr.fn.FunctionGenerationHelper; import org.apache.parquet.column.statistics.Statistics; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.function.BiPredicate; + +import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.hasNoNulls; +import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isAllNulls; +import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isNullOrEmpty; /** * IS predicates for parquet filter pushdown. */ -public class ParquetIsPredicates { +public class ParquetIsPredicates > extends LogicalExpressionBase + implements ParquetFilterPredicate { - public static abstract class ParquetIsPredicate extends LogicalExpressionBase implements ParquetFilterPredicate { - protected final LogicalExpression expr; + private final LogicalExpression expr; + private final BiPredicate, RangeExprEvaluator> predicate; - public ParquetIsPredicate(LogicalExpression expr) { - super(expr.getPosition()); - this.expr = expr; - } + private ParquetIsPredicates(LogicalExpression expr, BiPredicate, RangeExprEvaluator> predicate) { + super(expr.getPosition()); + this.expr = expr; + this.predicate = predicate; + } - @Override - public Iterator iterator() { - final List args = new ArrayList<>(); - args.add(expr); - return args.iterator(); - } + @Override + public Iterator iterator() { + final List args = new ArrayList<>(); + args.add(expr); + return args.iterator(); + } - @Override - public T accept(ExprVisitor visitor, V value) throws E { - return visitor.visitUnknown(this, value); + @Override + public T accept(ExprVisitor visitor, V value) throws E { + return visitor.visitUnknown(this, value); + } + + @Override + public boolean canDrop(RangeExprEvaluator evaluator) { + Statistics exprStat = expr.accept(evaluator, null); + if (isNullOrEmpty(exprStat)) { + return false; } + + return predicate.test(exprStat, evaluator); } /** * IS NULL predicate. */ - public static class IsNullPredicate extends ParquetIsPredicate { - private final boolean isArray; - - public IsNullPredicate(LogicalExpression expr) { - super(expr); - this.isArray = isArray(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - - // for arrays we are not able to define exact number of nulls - // [1,2,3] vs [1,2] -> in second case 3 is absent and thus it's null but statistics shows no nulls - if (isArray) { - return false; - } - - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { + private static > LogicalExpression createIsNullPredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if there are no nulls -> canDrop + (exprStat, evaluator) -> hasNoNulls(exprStat)) { + private final boolean isArray = isArray(expr); + + private boolean isArray(LogicalExpression expression) { + if (expression instanceof TypedFieldExpr) { + TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expression; + SchemaPath schemaPath = typedFieldExpr.getPath(); + return schemaPath.isArray(); + } return false; } - //if there are no nulls -> canDrop - if (!ParquetPredicatesHelper.hasNulls(exprStat)) { - return true; - } else { - return false; + @Override + public boolean canDrop(RangeExprEvaluator evaluator) { + // for arrays we are not able to define exact number of nulls + // [1,2,3] vs [1,2] -> in second case 3 is absent and thus it's null but statistics shows no nulls + return !isArray && super.canDrop(evaluator); } - } - - private boolean isArray(LogicalExpression expression) { - if (expression instanceof TypedFieldExpr) { - TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expression; - SchemaPath schemaPath = typedFieldExpr.getPath(); - return schemaPath.isArray(); - } - return false; - } - + }; } /** * IS NOT NULL predicate. */ - public static class IsNotNullPredicate extends ParquetIsPredicate { - public IsNotNullPredicate(LogicalExpression expr) { - super(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { - return false; - } - - //if there are all nulls -> canDrop - if (ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) { - return true; - } else { - return false; - } - } + private static > LogicalExpression createIsNotNullPredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if there are all nulls -> canDrop + (exprStat, evaluator) -> isAllNulls(exprStat, evaluator.getRowCount()) + ); } /** * IS TRUE predicate. */ - public static class IsTruePredicate extends ParquetIsPredicate { - public IsTruePredicate(LogicalExpression expr) { - super(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { - return false; - } - - //if max value is not true or if there are all nulls -> canDrop - if (exprStat.genericGetMax().compareTo(true) != 0 || - ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) { - return true; - } else { - return false; - } - } + private static LogicalExpression createIsTruePredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if max value is not true or if there are all nulls -> canDrop + (exprStat, evaluator) -> !exprStat.genericGetMax().equals(Boolean.TRUE) || isAllNulls(exprStat, evaluator.getRowCount()) + ); } /** * IS FALSE predicate. */ - public static class IsFalsePredicate extends ParquetIsPredicate { - public IsFalsePredicate(LogicalExpression expr) { - super(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { - return false; - } - - //if min value is not false or if there are all nulls -> canDrop - if (exprStat.genericGetMin().compareTo(false) != 0 || - ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) { - return true; - } else { - return false; - } - } + private static LogicalExpression createIsFalsePredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if min value is not false or if there are all nulls -> canDrop + (exprStat, evaluator) -> !exprStat.genericGetMin().equals(Boolean.FALSE) || isAllNulls(exprStat, evaluator.getRowCount()) + ); } /** * IS NOT TRUE predicate. */ - public static class IsNotTruePredicate extends ParquetIsPredicate { - public IsNotTruePredicate(LogicalExpression expr) { - super(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { - return false; - } - - //if min value is not false or if there are no nulls -> canDrop - if (exprStat.genericGetMin().compareTo(false) != 0 && !ParquetPredicatesHelper.hasNulls(exprStat)) { - return true; - } else { - return false; - } - } + private static LogicalExpression createIsNotTruePredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if min value is not false or if there are no nulls -> canDrop + (exprStat, evaluator) -> !exprStat.genericGetMin().equals(Boolean.FALSE) && hasNoNulls(exprStat) + ); } /** * IS NOT FALSE predicate. */ - public static class IsNotFalsePredicate extends ParquetIsPredicate { - public IsNotFalsePredicate(LogicalExpression expr) { - super(expr); - } - - @Override - public boolean canDrop(RangeExprEvaluator evaluator) { - Statistics exprStat = expr.accept(evaluator, null); - - if (!ParquetPredicatesHelper.hasStats(exprStat)) { - return false; - } + private static LogicalExpression createIsNotFalsePredicate(LogicalExpression expr) { + return new ParquetIsPredicates(expr, + //if max value is not true or if there are no nulls -> canDrop + (exprStat, evaluator) -> !exprStat.genericGetMax().equals(Boolean.TRUE) && hasNoNulls(exprStat) + ); + } - //if max value is not true or if there are no nulls -> canDrop - if (exprStat.genericGetMax().compareTo(true) != 0 && !ParquetPredicatesHelper.hasNulls(exprStat)) { - return true; - } else { - return false; - } + public static > LogicalExpression createIsPredicate(String function, LogicalExpression expr) { + switch (function) { + case FunctionGenerationHelper.IS_NULL: + return ParquetIsPredicates.createIsNullPredicate(expr); + case FunctionGenerationHelper.IS_NOT_NULL: + return ParquetIsPredicates.createIsNotNullPredicate(expr); + case FunctionGenerationHelper.IS_TRUE: + return createIsTruePredicate(expr); + case FunctionGenerationHelper.IS_NOT_TRUE: + return createIsNotTruePredicate(expr); + case FunctionGenerationHelper.IS_FALSE: + return createIsFalsePredicate(expr); + case FunctionGenerationHelper.IS_NOT_FALSE: + return createIsNotFalsePredicate(expr); + default: + logger.warn("Unhandled IS function. Function name: {}", function); + return null; } } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java index e83d393..7ff1036 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java @@ -22,15 +22,16 @@ import org.apache.parquet.column.statistics.Statistics; /** * Parquet predicates class helper for filter pushdown. */ -@SuppressWarnings("rawtypes") -public class ParquetPredicatesHelper { +class ParquetPredicatesHelper { + private ParquetPredicatesHelper() { + } /** * @param stat statistics object * @return true if the input stat object has valid statistics; false otherwise */ - public static boolean hasStats(Statistics stat) { - return stat != null && !stat.isEmpty(); + static boolean isNullOrEmpty(Statistics stat) { + return stat == null || stat.isEmpty(); } /** @@ -41,7 +42,7 @@ public class ParquetPredicatesHelper { * @return True if all rows are null in the parquet file * False if at least one row is not null. */ - public static boolean isAllNulls(Statistics stat, long rowCount) { + static boolean isAllNulls(Statistics stat, long rowCount) { return stat.getNumNulls() == rowCount; } @@ -52,8 +53,8 @@ public class ParquetPredicatesHelper { * @return True if the parquet file has nulls * False if the parquet file hasn't nulls. */ - public static boolean hasNulls(Statistics stat) { - return stat.getNumNulls() > 0; + static boolean hasNoNulls(Statistics stat) { + return stat.getNumNulls() == 0; } } diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java index d2fa0cf..f127f0b 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java @@ -45,6 +45,8 @@ import org.apache.parquet.column.statistics.FloatStatistics; import org.apache.parquet.column.statistics.IntStatistics; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.parquet.column.statistics.Statistics; +import org.apache.parquet.schema.PrimitiveType; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,13 +55,13 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; -public class RangeExprEvaluator extends AbstractExprVisitor { +public class RangeExprEvaluator> extends AbstractExprVisitor, Void, RuntimeException> { static final Logger logger = LoggerFactory.getLogger(RangeExprEvaluator.class); - private final Map columnStatMap; + private final Map> columnStatMap; private final long rowCount; - public RangeExprEvaluator(final Map columnStatMap, long rowCount) { + public RangeExprEvaluator(final Map> columnStatMap, long rowCount) { this.columnStatMap = columnStatMap; this.rowCount = rowCount; } @@ -69,70 +71,71 @@ public class RangeExprEvaluator extends AbstractExprVisitor visitUnknown(LogicalExpression e, Void value) throws RuntimeException { // do nothing for the unknown expression return null; } + @SuppressWarnings("unchecked") @Override - public Statistics visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Void value) throws RuntimeException { - final ColumnStatistics columnStatistics = columnStatMap.get(typedFieldExpr.getPath()); + public Statistics visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Void value) throws RuntimeException { + final ColumnStatistics columnStatistics = columnStatMap.get(typedFieldExpr.getPath()); if (columnStatistics != null) { return columnStatistics.getStatistics(); } else if (typedFieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) { // field does not exist. - IntStatistics intStatistics = new IntStatistics(); - intStatistics.setNumNulls(rowCount); // all values are nulls - return intStatistics; + Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT32); + statistics.setNumNulls(rowCount); // all values are nulls + return statistics; } return null; } @Override - public Statistics visitIntConstant(ValueExpressions.IntExpression expr, Void value) throws RuntimeException { + public Statistics visitIntConstant(ValueExpressions.IntExpression expr, Void value) throws RuntimeException { return getStatistics(expr.getInt()); } @Override - public Statistics visitBooleanConstant(ValueExpressions.BooleanExpression expr, Void value) throws RuntimeException { + public Statistics visitBooleanConstant(ValueExpressions.BooleanExpression expr, Void value) throws RuntimeException { return getStatistics(expr.getBoolean()); } @Override - public Statistics visitLongConstant(ValueExpressions.LongExpression expr, Void value) throws RuntimeException { + public Statistics visitLongConstant(ValueExpressions.LongExpression expr, Void value) throws RuntimeException { return getStatistics(expr.getLong()); } @Override - public Statistics visitFloatConstant(ValueExpressions.FloatExpression expr, Void value) throws RuntimeException { + public Statistics visitFloatConstant(ValueExpressions.FloatExpression expr, Void value) throws RuntimeException { return getStatistics(expr.getFloat()); } @Override - public Statistics visitDoubleConstant(ValueExpressions.DoubleExpression expr, Void value) throws RuntimeException { + public Statistics visitDoubleConstant(ValueExpressions.DoubleExpression expr, Void value) throws RuntimeException { return getStatistics(expr.getDouble()); } @Override - public Statistics visitDateConstant(ValueExpressions.DateExpression expr, Void value) throws RuntimeException { + public Statistics visitDateConstant(ValueExpressions.DateExpression expr, Void value) throws RuntimeException { long dateInMillis = expr.getDate(); return getStatistics(dateInMillis); } @Override - public Statistics visitTimeStampConstant(ValueExpressions.TimeStampExpression tsExpr, Void value) throws RuntimeException { + public Statistics visitTimeStampConstant(ValueExpressions.TimeStampExpression tsExpr, Void value) throws RuntimeException { long tsInMillis = tsExpr.getTimeStamp(); return getStatistics(tsInMillis); } @Override - public Statistics visitTimeConstant(ValueExpressions.TimeExpression timeExpr, Void value) throws RuntimeException { + public Statistics visitTimeConstant(ValueExpressions.TimeExpression timeExpr, Void value) throws RuntimeException { int milliSeconds = timeExpr.getTime(); return getStatistics(milliSeconds); } @Override - public Statistics visitFunctionHolderExpression(FunctionHolderExpression holderExpr, Void value) throws RuntimeException { + public Statistics visitFunctionHolderExpression(FunctionHolderExpression holderExpr, Void value) throws RuntimeException { FuncHolder funcHolder = holderExpr.getHolder(); if (! (funcHolder instanceof DrillSimpleFuncHolder)) { @@ -151,57 +154,62 @@ public class RangeExprEvaluator extends AbstractExprVisitor getStatistics(int value) { return getStatistics(value, value); } - private IntStatistics getStatistics(int min, int max) { - final IntStatistics intStatistics = new IntStatistics(); - intStatistics.setMinMax(min, max); - return intStatistics; + @SuppressWarnings("unchecked") + private Statistics getStatistics(int min, int max) { + final Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT32); + ((IntStatistics)statistics).setMinMax(min, max); + return statistics; } - private BooleanStatistics getStatistics(boolean value) { + private Statistics getStatistics(boolean value) { return getStatistics(value, value); } - private BooleanStatistics getStatistics(boolean min, boolean max) { - final BooleanStatistics booleanStatistics = new BooleanStatistics(); - booleanStatistics.setMinMax(min, max); - return booleanStatistics; + @SuppressWarnings("unchecked") + private Statistics getStatistics(boolean min, boolean max) { + Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.BOOLEAN); + ((BooleanStatistics)statistics).setMinMax(min, max); + return statistics; } - private LongStatistics getStatistics(long value) { + private Statistics getStatistics(long value) { return getStatistics(value, value); } - private LongStatistics getStatistics(long min, long max) { - final LongStatistics longStatistics = new LongStatistics(); - longStatistics.setMinMax(min, max); - return longStatistics; + @SuppressWarnings("unchecked") + private Statistics getStatistics(long min, long max) { + final Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.INT64); + ((LongStatistics)statistics).setMinMax(min, max); + return statistics; } - private DoubleStatistics getStatistics(double value) { + private Statistics getStatistics(double value) { return getStatistics(value, value); } - private DoubleStatistics getStatistics(double min, double max) { - final DoubleStatistics doubleStatistics = new DoubleStatistics(); - doubleStatistics.setMinMax(min, max); - return doubleStatistics; + @SuppressWarnings("unchecked") + private Statistics getStatistics(double min, double max) { + final Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.DOUBLE); + ((DoubleStatistics)statistics).setMinMax(min, max); + return statistics; } - private FloatStatistics getStatistics(float value) { + private Statistics getStatistics(float value) { return getStatistics(value, value); } - private FloatStatistics getStatistics(float min, float max) { - final FloatStatistics floatStatistics = new FloatStatistics(); - floatStatistics.setMinMax(min, max); - return floatStatistics; + @SuppressWarnings("unchecked") + private Statistics getStatistics(float min, float max) { + final Statistics statistics = Statistics.getStatsBasedOnType(PrimitiveType.PrimitiveTypeName.FLOAT); + ((FloatStatistics)statistics).setMinMax(min, max); + return statistics; } - private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input) { + private Statistics evalCastFunc(FunctionHolderExpression holderExpr, Statistics input) { try { DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder) holderExpr.getHolder(); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java index a8e101d..b062bcc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java @@ -159,7 +159,7 @@ public class ParquetFilterBuilder extends AbstractExprVisitor value) { @@ -302,23 +283,7 @@ public class ParquetFilterBuilder extends AbstractExprVisitor> { + private final Statistics statistics; private final TypeProtos.MajorType majorType; - public ColumnStatistics(final Statistics statistics, final TypeProtos.MajorType majorType) { + public ColumnStatistics(final Statistics statistics, final TypeProtos.MajorType majorType) { this.statistics = statistics; this.majorType = majorType; } - public Statistics getStatistics() { + public Statistics getStatistics() { return this.statistics; } -- To stop receiving notification emails like this one, please contact timothyfarkas@apache.org.