flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [flink] godfreyhe commented on a change in pull request #8499: [FLINK-12575] [table-planner-blink] Introduce planner rules to remove redundant shuffle and collation
Date Fri, 24 May 2019 07:20:26 GMT
godfreyhe commented on a change in pull request #8499: [FLINK-12575] [table-planner-blink]
Introduce planner rules to remove redundant shuffle and collation
URL: https://github.com/apache/flink/pull/8499#discussion_r287241795
 
 

 ##########
 File path: flink-table/flink-table-planner-blink/src/main/scala/org/apache/flink/table/plan/nodes/physical/batch/BatchExecHashAggregate.scala
 ##########
 @@ -94,6 +99,53 @@ class BatchExecHashAggregate(
         isGlobal = true))
   }
 
+  override def satisfyTraits(requiredTraitSet: RelTraitSet): RelNode = {
+    val requiredDistribution = requiredTraitSet.getTrait(FlinkRelDistributionTraitDef.INSTANCE)
+    val canSatisfy = requiredDistribution.getType match {
+      case SINGLETON => grouping.length == 0
+      case HASH_DISTRIBUTED =>
+        val shuffleKeys = requiredDistribution.getKeys
+        val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*)
+        if (requiredDistribution.requireStrict) {
+          shuffleKeys == groupKeysList
+        } else if (Util.startsWith(shuffleKeys, groupKeysList)) {
+          // If required distribution is not strict, Hash[a] can satisfy Hash[a, b].
+          // so return true if shuffleKeys(Hash[a, b]) start with groupKeys(Hash[a])
+          true
+        } else {
+          // If partialKey is enabled, try to use partial key to satisfy the required distribution
+          val tableConfig = FlinkRelOptUtil.getTableConfigFromContext(this)
+          val partialKeyEnabled = tableConfig.getConf.getBoolean(
+            PlannerConfigOptions.SQL_OPTIMIZER_SHUFFLE_PARTIAL_KEY_ENABLED)
+          partialKeyEnabled && groupKeysList.containsAll(shuffleKeys)
+        }
+      case _ => false
+    }
+    if (!canSatisfy) {
+      return null
+    }
+
+    val inputRequiredDistribution = requiredDistribution.getType match {
+      case SINGLETON => requiredDistribution
+      case HASH_DISTRIBUTED =>
+        val shuffleKeys = requiredDistribution.getKeys
+        val groupKeysList = ImmutableIntList.of(grouping.indices.toArray: _*)
+        if (requiredDistribution.requireStrict) {
+          FlinkRelDistribution.hash(grouping, requireStrict = true)
+        } else if (Util.startsWith(shuffleKeys, groupKeysList)) {
+          // Hash[a] can satisfy Hash[a, b]
+          FlinkRelDistribution.hash(grouping, requireStrict = false)
+        } else {
+          // use partial key to satisfy the required distribution
+          FlinkRelDistribution.hash(shuffleKeys.map(grouping(_)).toArray, requireStrict =
false)
 
 Review comment:
   converts aggregate output fields to aggregate input fields, 
   e.g. aggregate input fields: a, b, c, d
   aggregate grouping: b, d
   aggregate function: max(a)
   aggregate output fields: b, d, max(a)
   the shuffle keys are b, d, shuffleKeys = Array(0, 1)
   so, the input hash keys are 2, 4

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message