spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Gourav Sengupta <gourav.sengu...@gmail.com>
Subject closure issues: wholeTextFiles
Date Tue, 27 Mar 2018 22:58:39 GMT
Hi,

I can understand facing closure issues while executing this code:

========================================================================================

package spark

//this package is about understanding closures as mentioned in:
http://spark.apache.org/docs/latest/rdd-programming-guide.html#understanding-closures-


import org.apache.spark.sql.SparkSession


object understandClosures extends  App {

  var counter = 0

  //the error thrown is removed in case we use local[*] as master
  val sparkSession = SparkSession
    .builder
    .master("spark://Gouravs-iMac:7077")
    //.master("local[*]")
    .appName("test")
    .getOrCreate()


  val valueRDD = sparkSession.sparkContext.parallelize(1 until 1000000)

  println(valueRDD.count())

  valueRDD.foreach(x => counter += x)

  //but even if we use the master as local[*] the total appears as
some random number as -1234435435
  println("the value is " + counter.toString())


  sparkSession.close()

}


========================================================================================


Can anyone explain me why am I facing closure issue while executing this
code?

package spark

import org.apache.spark.sql.SparkSession
// Import this utility for working with URLs. Unlike Java the
semicolon ';' is not required.
import java.net.URL
// Use {...} to provide a list of things to import, when you don't
want to import everything
// in a package and you don't want to write a separate line for each type.
import java.io.{File, BufferedInputStream, BufferedOutputStream,
FileOutputStream}




object justenoughTest extends App {

  val sparkSession = SparkSession
    .builder
    .master("spark://Gouravs-iMac:7077")
    //.master("local[*]")
    .appName("test")
    .getOrCreate()

  println(sparkSession.version)

  println("Spark version:      " + sparkSession.version)
  println("Spark master:       " + sparkSession.sparkContext.master)
  println("Running 'locally'?: " + sparkSession.sparkContext.isLocal)

  val pathSeparator = File.separator

  // The target directory, which we'll now create, if necessary.
  val shakespeare = new
File("/Users/gouravsengupta/Development/data/shakespeare")

  println(sparkSession.version)

  //val fileContents =
sparkSession.read.text("file:///Users/gouravsengupta/Development/data/shakespeare/")
  //val fileContents = sparkSession.read.text(shakespeare.toString)
  val fileContents =
sparkSession.sparkContext.wholeTextFiles(shakespeare.toString)
  println(fileContents.count())

  //I am facing  the closure issues below

  val testThis = fileContents.foreach(x => "printing value" + x._1)


sparkSession.close()

}


Regards,
Gourav Sengupta

Mime
View raw message