spark-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Vadim Semenov <vadim.seme...@datadoghq.com>
Subject Re: RDD[internalRow] -> DataSet
Date Tue, 12 Dec 2017 15:55:18 GMT
not possible, but you can add your own object in your project to the
spark's package that would give you access to private methods

package org.apache.spark.sql

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.LogicalRDD
import org.apache.spark.sql.types.StructType

object DataFrameUtil {
  /**
    * Creates a DataFrame out of RDD[InternalRow] that you can get
using `df.queryExection.toRdd`
    */
  def createFromInternalRows(sparkSession: SparkSession, schema:
StructType, rdd: RDD[InternalRow]): DataFrame = {
    val logicalPlan = LogicalRDD(schema.toAttributes, rdd)(sparkSession)
    Dataset.ofRows(sparkSession, logicalPlan)
  }
}

Mime
View raw message