spark-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Alexander Pivovarov <apivova...@gmail.com>
Subject Spark fails after 6000s because of akka
Date Sun, 20 Dec 2015 18:42:57 GMT
I run Spark 1.5.2 on YARN (EMR)

I noticed that my long running jobs always failed after 1h 40 min  (6000s)
with the exceptions below.

Then I found that Spark has spark.akka.heartbeat.pauses=6000s by default

I changed the settings to the following and it solve my issue.

"spark.akka.heartbeat.pauses": "60000s",
"spark.akka.heartbeat.interval": "10000s"



RROR ErrorMonitor - Uncaught fatal error from thread
[sparkDriver-akka.remote.default-remote-dispatcher-6] shutting down
ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
	at com.google.protobuf.ByteString.copyFrom(ByteString.java:192)
	at com.google.protobuf.ByteString.copyFrom(ByteString.java:204)
	at akka.remote.MessageSerializer$.serialize(MessageSerializer.scala:36)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
	at akka.remote.EndpointWriter.serializeMessage(Endpoint.scala:842)
	at akka.remote.EndpointWriter.writeSend(Endpoint.scala:743)
	at akka.remote.EndpointWriter$$anonfun$2.applyOrElse(Endpoint.scala:718)
	at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
	at akka.remote.EndpointActor.aroundReceive(Endpoint.scala:411)
	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
	at akka.actor.ActorCell.invoke(ActorCell.scala:487)
	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
	at akka.dispatch.Mailbox.run(Mailbox.scala:220)
	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
ERROR ActorSystemImpl - Uncaught fatal error from thread
[sparkDriver-akka.remote.default-remote-dispatcher-6] shutting down
ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
	at com.google.protobuf.ByteString.copyFrom(ByteString.java:192)
	at com.google.protobuf.ByteString.copyFrom(ByteString.java:204)
	at akka.remote.MessageSerializer$.serialize(MessageSerializer.scala:36)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
	at akka.remote.EndpointWriter.serializeMessage(Endpoint.scala:842)
	at akka.remote.EndpointWriter.writeSend(Endpoint.scala:743)
	at akka.remote.EndpointWriter$$anonfun$2.applyOrElse(Endpoint.scala:718)
	at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
	at akka.remote.EndpointActor.aroundReceive(Endpoint.scala:411)
	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
	at akka.actor.ActorCell.invoke(ActorCell.scala:487)
	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
	at akka.dispatch.Mailbox.run(Mailbox.scala:220)
	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
ERROR ActorSystemImpl - Uncaught fatal error from thread
[sparkDriver-akka.remote.default-remote-dispatcher-5] shutting down
ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
	at java.util.Arrays.copyOf(Arrays.java:2271)
	at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:118)
	at java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
	at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
	at java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1876)
	at java.io.ObjectOutputStream$BlockDataOutputStream.setBlockDataMode(ObjectOutputStream.java:1785)
	at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1188)
	at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347)
	at akka.serialization.JavaSerializer$$anonfun$toBinary$1.apply$mcV$sp(Serializer.scala:129)
	at akka.serialization.JavaSerializer$$anonfun$toBinary$1.apply(Serializer.scala:129)
	at akka.serialization.JavaSerializer$$anonfun$toBinary$1.apply(Serializer.scala:129)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
	at akka.serialization.JavaSerializer.toBinary(Serializer.scala:129)
	at akka.remote.MessageSerializer$.serialize(MessageSerializer.scala:36)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at akka.remote.EndpointWriter$$anonfun$serializeMessage$1.apply(Endpoint.scala:843)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:57)
	at akka.remote.EndpointWriter.serializeMessage(Endpoint.scala:842)
	at akka.remote.EndpointWriter.writeSend(Endpoint.scala:743)
	at akka.remote.EndpointWriter$$anonfun$2.applyOrElse(Endpoint.scala:718)
	at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
	at akka.remote.EndpointActor.aroundReceive(Endpoint.scala:411)
	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
	at akka.actor.ActorCell.invoke(ActorCell.scala:487)
	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
	at akka.dispatch.Mailbox.run(Mailbox.scala:220)
	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
	at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
ERROR ActorSystemImpl - Uncaught fatal error from thread
[sparkDriver-akka.remote.default-remote-dispatcher-6] shutting down
ActorSystem [sparkDriver]
java.lang.OutOfMemoryError: Java heap space
	at com.google.protobuf.AbstractMessageLite.toByteArray(AbstractMessageLite.java:62)
	at akka.remote.transport.AkkaPduProtobufCodec$.constructMessage(AkkaPduCodec.scala:138)
	at akka.remote.EndpointWriter.writeSend(Endpoint.scala:740)
	at akka.remote.EndpointWriter$$anonfun$2.applyOrElse(Endpoint.scala:718)
	at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
	at akka.remote.EndpointActor.aroundReceive(Endpoint.scala:411)
	at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
	at akka.actor.ActorCell.invoke(ActorCell.scala:487)
	at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
	at akka.dispatch.Mailbox.run(Mailbox.scala:220)
	at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
	at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
	at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
	at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)

at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)

Mime
View raw message