Ryan Williams created SPARK-22328:
-------------------------------------
Summary: ClosureCleaner misses referenced superclass fields, gives them null
values
Key: SPARK-22328
URL: https://issues.apache.org/jira/browse/SPARK-22328
Project: Spark
Issue Type: Bug
Components: Spark Core
Affects Versions: 2.2.0
Reporter: Ryan Williams
[Runnable repro here|https://github.com/ryan-williams/spark-bugs/tree/closure]:
Superclass with some fields:
{code}
abstract class App extends Serializable {
// SparkContext stub
@transient lazy val sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local[4]").set("spark.ui.showConsoleProgress",
"false"))
// These fields get missed by the ClosureCleaner in some situations
val n1 = 111
val s1 = "aaa"
// Simple scaffolding to exercise passing a closure to RDD.foreach in subclasses
def rdd = sc.parallelize(1 to 1)
def run(name: String): Unit = {
print(s"$name:\t")
body()
sc.stop()
}
def body(): Unit
}
{code}
Running a simple Spark job with various instantiations of this class:
{code}
object Main {
/** [[App]]s generated this way will not correctly detect references to [[App.n1]] in Spark
closures */
val fn = () ⇒ new App {
val n2 = 222
val s2 = "bbb"
def body(): Unit = rdd.foreach { _ ⇒ println(s"$n1, $n2, $s1, $s2") }
}
/** Doesn't serialize closures correctly */
val app1 = fn()
/** Works fine */
val app2 =
new App {
val n2 = 222
val s2 = "bbb"
def body(): Unit = rdd.foreach { _ ⇒ println(s"$n1, $n2, $s1, $s2") }
}
/** [[App]]s created this way also work fine */
def makeApp(): App =
new App {
val n2 = 222
val s2 = "bbb"
def body(): Unit = rdd.foreach { _ ⇒ println(s"$n1, $n2, $s1, $s2") }
}
val app3 = makeApp() // ok
val fn2 = () ⇒ makeApp() // ok
def main(args: Array[String]): Unit = {
fn().run("fn") // bad: n1 → 0, s1 → null
app1.run("app1") // bad: n1 → 0, s1 → null
app2.run("app2") // ok
app3.run("app3") // ok
fn2().run("fn2") // ok
}
}
{code}
Build + Run:
{code}
$ sbt run
…
fn: 0, 222, null, bbb
app1: 0, 222, null, bbb
app2: 111, 222, aaa, bbb
app3: 111, 222, aaa, bbb
fn2: 111, 222, aaa, bbb
{code}
The first two versions have {{0}} and {{null}}, resp., for the {{A.n1}} and {{A.s1}} fields.
Something about this syntax causes the problem:
{code}
() => new App { … }
{code}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org
|