spark-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Yin Huai (JIRA)" <j...@apache.org>
Subject [jira] [Created] (SPARK-6247) Certain self joins cannot be analyzed
Date Tue, 10 Mar 2015 17:13:38 GMT
Yin Huai created SPARK-6247:
-------------------------------

             Summary: Certain self joins cannot be analyzed
                 Key: SPARK-6247
                 URL: https://issues.apache.org/jira/browse/SPARK-6247
             Project: Spark
          Issue Type: Bug
          Components: SQL
            Reporter: Yin Huai


When you try the following code
{code}
val df =
   (1 to 10)
      .map(i => (i, i.toDouble, i.toLong, i.toString, i.toString))
      .toDF("intCol", "doubleCol", "longCol", "stringCol1", "stringCol2")

df.registerTempTable("test")

sql(
  """
  |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol)
  |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1)
  |GROUP BY x.stringCol2
  """.stripMargin).explain()
{code}

The following exception will be thrown.
{code}
[info]   java.util.NoSuchElementException: next on empty iterator
[info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:39)
[info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:37)
[info]   at scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64)
[info]   at scala.collection.IterableLike$class.head(IterableLike.scala:91)
[info]   at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47)
[info]   at scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120)
[info]   at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47)
[info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247)
[info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
[info]   at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263)
[info]   at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
[info]   at scala.collection.Iterator$class.foreach(Iterator.scala:727)
[info]   at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
[info]   at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
[info]   at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
[info]   at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
[info]   at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
[info]   at scala.collection.AbstractIterator.to(Iterator.scala:1157)
[info]   at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
[info]   at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
[info]   at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
[info]   at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292)
[info]   at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247)
[info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197)
[info]   at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196)
[info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
[info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
[info]   at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
[info]   at scala.collection.immutable.List.foldLeft(List.scala:84)
[info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
[info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
[info]   at scala.collection.immutable.List.foreach(List.scala:318)
[info]   at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
[info]   at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071)
[info]   at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071)
[info]   at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069)
[info]   at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
[info]   at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
[info]   at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:915)
[info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply$mcV$sp(SparkSqlSerializer2Suite.scala:66)
[info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
[info]   at org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
[info]   at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
[info]   at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
[info]   at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
[info]   at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
[info]   at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
[info]   at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
[info]   at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
[info]   at org.scalatest.FunSuite.runTest(FunSuite.scala:1555)
[info]   at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
[info]   at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
[info]   at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
[info]   at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
[info]   at scala.collection.immutable.List.foreach(List.scala:318)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
[info]   at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
[info]   at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
[info]   at org.scalatest.Suite$class.run(Suite.scala:1424)
[info]   at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
[info]   at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
[info]   at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
[info]   at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
[info]   at org.scalatest.FunSuite.run(FunSuite.scala:1555)
[info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:462)
[info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:671)
[info]   at sbt.ForkMain$Run$2.call(ForkMain.java:294)
[info]   at sbt.ForkMain$Run$2.call(ForkMain.java:284)
[info]   at java.util.concurrent.FutureTask.run(FutureTask.java:262)
[info]   at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
[info]   at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
[info]   at java.lang.Thread.run(Thread.java:745)
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org


Mime
View raw message