carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kumarvisha...@apache.org
Subject carbondata git commit: [CARBONDATA-1887] block pruning not happening is carbon for ShortType and SmallIntType columns
Date Thu, 14 Dec 2017 13:48:18 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 714d05c12 -> 1675be671


[CARBONDATA-1887] block pruning not happening is carbon for ShortType and SmallIntType columns

Block pruning not working for filter on Short and smallint type column.In PushedFilters it
goes in CastExpr as below. PushedFilters: [IsNotNull(dob), CastExpr((cast(dob#230 as Integer)
= 12))]. This is why it is getting pushed to spark.CastExpressionOptimization should handle
the removing the cast from the expression for smallint and short type columns.

This closes #1647


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1675be67
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1675be67
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1675be67

Branch: refs/heads/master
Commit: 1675be671c326d477d43d501466e1281b33ad7af
Parents: 714d05c
Author: mohammadshahidkhan <mohdshahidkhan1987@gmail.com>
Authored: Tue Dec 12 18:09:51 2017 +0530
Committer: kumarvishal <kumarvishal.1802@gmail.com>
Committed: Thu Dec 14 19:17:42 2017 +0530

----------------------------------------------------------------------
 .../execution/CastExpressionOptimization.scala  | 303 ++++++++++---------
 1 file changed, 164 insertions(+), 139 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/1675be67/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
index b5285f8..046e17d 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
@@ -26,8 +26,9 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, EmptyRow, EqualTo, Expression,
GreaterThan, GreaterThanOrEqual, In, LessThan, LessThanOrEqual, Literal, Not}
 import org.apache.spark.sql.CastExpr
 import org.apache.spark.sql.sources
-import org.apache.spark.sql.types.{DoubleType, IntegerType, StringType, TimestampType}
+import org.apache.spark.sql.types.{DoubleType, IntegerType, ShortType, StringType, TimestampType}
 import org.apache.spark.sql.CarbonExpressions.{MatchCast => Cast}
+import org.apache.spark.sql.sources.Filter
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
@@ -97,6 +98,20 @@ object CastExpressionOptimization {
     }
   }
 
+  def typeCastIntToShortList(list: Seq[Expression]): Seq[Expression] = {
+    val tempList = new util.ArrayList[Expression]()
+    list.foreach { value =>
+      val output = value.asInstanceOf[Integer].toShort
+      if (value.asInstanceOf[Integer].toShort.equals(output)) {
+        tempList.add(output.asInstanceOf[Expression])
+      }
+    }
+    if (tempList.size() != list.size) {
+      list
+    } else {
+      tempList.asScala
+    }
+  }
   /**
    * This routines tries to apply rules on Cast Filter Predicates and if the rules applied
and the
    * values can be toss back to native datatypes the cast is removed. Current two rules are
applied
@@ -116,73 +131,41 @@ object CastExpressionOptimization {
       case c@EqualTo(Cast(a: Attribute, _), Literal(v, t)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.EqualTo(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.EqualTo(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@EqualTo(Literal(v, t), Cast(a: Attribute, _)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.EqualTo(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.EqualTo(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@Not(EqualTo(Cast(a: Attribute, _), Literal(v, t))) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.Not(sources.EqualTo(a.name, value)))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.Not(sources.EqualTo(a.name, value)))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@Not(EqualTo(Literal(v, t), Cast(a: Attribute, _))) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.Not(sources.EqualTo(a.name, value)))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.Not(sources.EqualTo(a.name, value)))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@Not(In(Cast(a: Attribute, _), list)) =>
@@ -203,6 +186,14 @@ object CastExpressionOptimization {
             } else {
               Some(CastExpr(c))
             }
+          case i: ShortType if list.head.dataType.sameType(IntegerType) =>
+            val value = typeCastIntToShortList(list)
+            if (!value.equals(list)) {
+              val hSet = value.map(e => e.eval(EmptyRow))
+              Some(sources.Not(sources.In(a.name, hSet.toArray)))
+            } else {
+              Some(CastExpr(c))
+            }
           case _ => Some(CastExpr(c))
         }
       case c@In(Cast(a: Attribute, _), list) =>
@@ -223,114 +214,74 @@ object CastExpressionOptimization {
             } else {
               Some(CastExpr(c))
             }
+          case s: ShortType if list.head.dataType.sameType(IntegerType) =>
+            val value = typeCastIntToShortList(list)
+            if (!value.equals(list)) {
+              val hSet = value.map(e => e.eval(EmptyRow))
+              Some(sources.In(a.name, hSet.toArray))
+            } else {
+              Some(CastExpr(c))
+            }
           case _ => Some(CastExpr(c))
         }
       case c@GreaterThan(Cast(a: Attribute, _), Literal(v, t)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.GreaterThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.GreaterThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@GreaterThan(Literal(v, t), Cast(a: Attribute, _)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.LessThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.LessThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@LessThan(Cast(a: Attribute, _), Literal(v, t)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.LessThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.LessThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@LessThan(Literal(v, t), Cast(a: Attribute, _)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.GreaterThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.GreaterThan(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@GreaterThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.GreaterThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.GreaterThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@GreaterThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.LessThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.LessThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@LessThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
@@ -343,32 +294,106 @@ object CastExpressionOptimization {
               Some(CastExpr(c))
             }
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.LessThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
       case c@LessThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
         a.dataType match {
           case ts: TimestampType if t.sameType(StringType) =>
-            val value = typeCastStringToLong(v)
-            if (!value.equals(v)) {
-              Some(sources.GreaterThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForTimeStamp(v, c)
           case i: IntegerType if t.sameType(DoubleType) =>
-            val value = v.asInstanceOf[Double].toInt
-            if (value.toDouble.equals(v)) {
-              Some(sources.GreaterThanOrEqual(a.name, value))
-            } else {
-              Some(CastExpr(c))
-            }
+            updateFilterForInt(v, c)
+          case s: ShortType if t.sameType(IntegerType) =>
+            updateFilterForShort(v, c)
           case _ => Some(CastExpr(c))
         }
     }
   }
+
+  /**
+   * the method removes the cast for short type columns
+   * @param actualValue
+   * @param exp
+   * @return
+   */
+  def updateFilterForShort(actualValue: Any, exp: Expression): Option[sources.Filter] = {
+    val newValue = actualValue.asInstanceOf[Integer].toShort
+    if (newValue.toInt.equals(actualValue)) {
+      updateFilterBasedOnFilterType(exp, newValue)
+    } else {
+      Some(CastExpr(exp))
+    }
+  }
+
+  /**
+   * the method removes the cast for int type columns
+   *
+   * @param actualValue
+   * @param exp
+   * @return
+   */
+  def updateFilterForInt(actualValue: Any, exp: Expression): Option[sources.Filter] = {
+    val newValue = actualValue.asInstanceOf[Double].toInt
+    if (newValue.toDouble.equals(actualValue)) {
+      updateFilterBasedOnFilterType(exp, newValue)
+    } else {
+      Some(CastExpr(exp))
+    }
+  }
+
+  /**
+   * the method removes the cast for timestamp type columns
+   *
+   * @param actualValue
+   * @param exp
+   * @return
+   */
+  def updateFilterForTimeStamp(actualValue: Any, exp: Expression): Option[sources.Filter]
= {
+    val newValue = typeCastStringToLong(actualValue)
+    if (!newValue.equals(actualValue)) {
+      updateFilterBasedOnFilterType(exp, newValue)
+    } else {
+      Some(CastExpr(exp))
+    }
+  }
+
+  /**
+   * the method removes the cast for the respective filter type
+   *
+   * @param exp
+   * @param newValue
+   * @return
+   */
+  def updateFilterBasedOnFilterType(exp: Expression,
+      newValue: Any): Some[Filter with Product with Serializable] = {
+    exp match {
+      case c@EqualTo(Cast(a: Attribute, _), Literal(v, t)) =>
+        Some(sources.EqualTo(a.name, newValue))
+      case c@EqualTo(Literal(v, t), Cast(a: Attribute, _)) =>
+        Some(sources.EqualTo(a.name, newValue))
+      case c@Not(EqualTo(Cast(a: Attribute, _), Literal(v, t))) =>
+        Some(sources.Not(sources.EqualTo(a.name, newValue)))
+      case c@Not(EqualTo(Literal(v, t), Cast(a: Attribute, _))) =>
+        Some(sources.Not(sources.EqualTo(a.name, newValue)))
+      case GreaterThan(Cast(a: Attribute, _), Literal(v, t)) =>
+        Some(sources.GreaterThan(a.name, newValue))
+      case GreaterThan(Literal(v, t), Cast(a: Attribute, _)) =>
+        Some(sources.LessThan(a.name, newValue))
+      case c@LessThan(Cast(a: Attribute, _), Literal(v, t)) =>
+        Some(sources.LessThan(a.name, newValue))
+      case c@LessThan(Literal(v, t), Cast(a: Attribute, _)) =>
+        Some(sources.GreaterThan(a.name, newValue))
+      case c@GreaterThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
+        Some(sources.GreaterThanOrEqual(a.name, newValue))
+      case c@GreaterThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
+        Some(sources.LessThanOrEqual(a.name, newValue))
+      case c@LessThanOrEqual(Cast(a: Attribute, _), Literal(v, t)) =>
+        Some(sources.LessThanOrEqual(a.name, newValue))
+      case c@LessThanOrEqual(Literal(v, t), Cast(a: Attribute, _)) =>
+        Some(sources.GreaterThanOrEqual(a.name, newValue))
+      case _ => Some(CastExpr(exp))
+    }
+  }
 }


Mime
View raw message