Hi,
I wrote a small spark application to generate some random data. It works fine if I use "local[n]" but when I use "mesos://..." the vals of outer object that I am using in my function which is passed to RDD.foreach are being set to zero.

import java.io._

import math.rint

import org.apache.spark.SparkContext

import org.apache.spark.SparkContext._

object DataGen extends App {

  val nClusters = 10

  val nCols = 10000

  val nRows = 10000

  val rgen = new util.Random

  System.setProperty("spark.executor.uri", "hdfs://1b/spark/spark-0.8.0-incubating.tar.gz")

  System.setProperty("spark.mesos.coarse", "true")

  val sc = new SparkContext("mesos://10.0.1.128:5050", "Data Generator",

    "/home/yuzr/spark/spark-0.8.0-incubating",

    List("/home/yuzr/datagen/DataGen-assembly-0.1.jar"))


  val clusters = sc.parallelize(1 to nClusters)

  val nRowsInCluster = nRows/nClusters

  println ("nRowsInCluster=" + nRowsInCluster)  //---> prints 1000 in spark driver

  clusters foreach { x => writePart(x, nRowsInCluster) }

  //clusters foreach writePart --> had this originally

  def writePart(nCluster: Int, nRowsInCluster: Int): Unit = {

    val partFile = "/tmp/y" + nCluster + ".txt"

    val partWriter = new java.io.PrintWriter(partFile)

  ...

    println("Cluster #" + nCluster) --> prints 1 to 10

    println ("nRowsInCluster=" + nRowsInCluster) --> prints 0 ??

  ...

    }


    partWriter.close

  }

}


What am I doing wrong?

Mohit.