spark-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Aviral Agarwal <aviral12...@gmail.com>
Subject Fwd: [SparkSQL] Project using NamedExpression
Date Wed, 22 Mar 2017 03:51:25 GMT
Hi guys,

I want transform Row using NamedExpression.

Below is the code snipped that I am using :


def apply(dataFrame: DataFrame, selectExpressions:
java.util.List[String]): RDD[UnsafeRow] = {

    val exprArray = selectExpressions.map(s =>
      Column(SqlParser.parseExpression(s)).named
    )

    val inputSchema = dataFrame.logicalPlan.output

    val transformedRDD = dataFrame.mapPartitions(
      iter => {
        val project = UnsafeProjection.create(exprArray,inputSchema)
        iter.map{
          row =>
            project(InternalRow.fromSeq(row.toSeq))
        }
    })

    transformedRDD
  }


The problem is that expression becomes unevaluable :

Caused by: java.lang.UnsupportedOperationException: Cannot evaluate
expression: 'a
        at org.apache.spark.sql.catalyst.expressions.Unevaluable$class.
genCode(Expression.scala:233)
        at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.g
enCode(unresolved.scala:53)
        at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
n$gen$2.apply(Expression.scala:106)
        at org.apache.spark.sql.catalyst.expressions.Expression$$anonfu
n$gen$2.apply(Expression.scala:102)
        at scala.Option.getOrElse(Option.scala:120)
        at org.apache.spark.sql.catalyst.expressions.Expression.gen(Exp
ression.scala:102)
        at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
        at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text$$anonfun$generateExpressions$1.apply(CodeGenerator.scala:464)
        at scala.collection.TraversableLike$$anonfun$map$1.apply(
TraversableLike.scala:244)
        at scala.collection.TraversableLike$$anonfun$map$1.apply(
TraversableLike.scala:244)
        at scala.collection.mutable.ResizableArray$class.foreach(Resiza
bleArray.scala:59)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.
scala:47)
        at scala.collection.TraversableLike$class.map(TraversableLike.
scala:244)
        at scala.collection.AbstractTraversable.map(Traversable.scala:105)
        at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenCon
text.generateExpressions(CodeGenerator.scala:464)
        at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.createCode(GenerateUnsafeProjection.scala:281)
        at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:324)
        at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:317)
        at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUn
safeProjection$.create(GenerateUnsafeProjection.scala:32)
        at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenera
tor.generate(CodeGenerator.scala:635)
        at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
create(Projection.scala:125)
        at org.apache.spark.sql.catalyst.expressions.UnsafeProjection$.
create(Projection.scala:135)
        at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
ScalaTransform.scala:31)
        at org.apache.spark.sql.ScalaTransform$$anonfun$3.apply(
ScalaTransform.scala:30)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
apply$20.apply(RDD.scala:710)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$
apply$20.apply(RDD.scala:710)
        at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsR
DD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.sca
la:66)
        at org.apache.spark.scheduler.Task.run(Task.scala:89)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.
scala:214)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPool
Executor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoo
lExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)


This might be because the Expression is unresolved.

Any help would be appreciated.

Thanks and Regards,
Aviral Agarwal

Mime
View raw message