I’m sorry, here’s the formatted message text:



I'm running an ETL process that joins table1 with other tables (CSV files), one table at time (for example table1 with table2, table1 with table3, and so on). The join is written inside a PostgreSQL istance using JDBC.

The entire process runs successfully if I use table2, table3 and table4. If I add table5, table6, table7, the process run successfully with table5, table6 and table7 but as soon as it reaches table2 it starts displaying a lot of messagges like this:

16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again.
16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again.
16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again.
...
16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again.
...
Traceback (most recent call last):
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/etl.py", line 1200, in <module>
    sparkdf2database(flusso['sparkdf'], schema + "." + postgresql_tabella, "append")
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/etl.py", line 144, in sparkdf2database
    properties={"ApplicationName":info["nome"] + " - Scrittura della tabella " + dest, "disableColumnSanitiser":"true", "reWriteBatchedInserts":"true"}
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 762, in jdbc
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py", line 1133, in __call__
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
  File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o301.jdbc.
: org.apache.spark.SparkException: Exception thrown in awaitResult:
        at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:194)
        at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.doExecuteBroadcast(BroadcastExchangeExec.scala:120)
        at org.apache.spark.sql.execution.InputAdapter.doExecuteBroadcast(WholeStageCodegenExec.scala:229)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:125)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:125)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.SparkPlan.executeBroadcast(SparkPlan.scala:124)
        at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.prepareBroadcast(BroadcastHashJoinExec.scala:98)
        at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.codegenSemi(BroadcastHashJoinExec.scala:318)
        at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doConsume(BroadcastHashJoinExec.scala:84)
        at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:153)
        at org.apache.spark.sql.execution.FilterExec.consume(basicPhysicalOperators.scala:79)
        at org.apache.spark.sql.execution.FilterExec.doConsume(basicPhysicalOperators.scala:194)
        at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:153)
        at org.apache.spark.sql.execution.RowDataSourceScanExec.consume(ExistingRDD.scala:150)
        at org.apache.spark.sql.execution.RowDataSourceScanExec.doProduce(ExistingRDD.scala:217)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.RowDataSourceScanExec.produce(ExistingRDD.scala:150)
        at org.apache.spark.sql.execution.FilterExec.doProduce(basicPhysicalOperators.scala:113)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.FilterExec.produce(basicPhysicalOperators.scala:79)
        at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doProduce(BroadcastHashJoinExec.scala:77)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.produce(BroadcastHashJoinExec.scala:38)
        at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:40)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83)
        at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78)
        at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:30)
        at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:309)
        at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:347)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114)
        at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:88)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114)
        at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:86)
        at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:86)
        at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2357)
        at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2354)
        at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply$mcV$sp(Dataset.scala:2127)
        at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply(Dataset.scala:2127)
        at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply(Dataset.scala:2127)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
        at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546)
        at org.apache.spark.sql.Dataset.foreachPartition(Dataset.scala:2126)
        at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.saveTable(JdbcUtils.scala:299)
        at org.apache.spark.sql.DataFrameWriter.jdbc(DataFrameWriter.scala:441)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:280)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:214)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.util.concurrent.TimeoutException: Futures timed out after [300 seconds]
        at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
        at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
        at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:190)
        at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53)
        at scala.concurrent.Await$.result(package.scala:190)
        at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:190)
        ... 86 more



With smaller datasets the entire process runs without any problem. What does this mean and how can I solve the issue?

Thank you
 Pietro         

Il giorno 27 ott 2016, alle ore 18:13, pietrop <pietro.pugni@gmail.com> ha scritto:

I'm running an ETL process that joins table1 with other tables (CSV files), one table at time (for example table1 with table2, table1 with table3, and so on). The join is written inside a PostgreSQL istance using JDBC. The entire process runs successfully if I use table2, table3 and table4. If I add table5, table6, table7, the process run successfully with table5, table6 and table7 but as soon as it reaches table2 it starts displaying a lot of messagges like this: 16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again. 16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again. 16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again. ... 16/10/27 17:33:47 WARN TaskMemoryManager: Failed to allocate a page (33554432 bytes), try again. ... Traceback (most recent call last): File "/Volumes/Data/www/beaver/tmp/ETL_Spark/etl.py", line 1200, in sparkdf2database(flusso['sparkdf'], schema + "." + postgresql_tabella, "append") File "/Volumes/Data/www/beaver/tmp/ETL_Spark/etl.py", line 144, in sparkdf2database properties={"ApplicationName":info["nome"] + " - Scrittura della tabella " + dest, "disableColumnSanitiser":"true", "reWriteBatchedInserts":"true"} File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line 762, in jdbc File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py", line 1133, in __call__ File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco File "/Volumes/Data/www/beaver/tmp/ETL_Spark/spark/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py", line 319, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o301.jdbc. : org.apache.spark.SparkException: Exception thrown in awaitResult: at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:194) at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.doExecuteBroadcast(BroadcastExchangeExec.scala:120) at org.apache.spark.sql.execution.InputAdapter.doExecuteBroadcast(WholeStageCodegenExec.scala:229) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:125) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:125) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.SparkPlan.executeBroadcast(SparkPlan.scala:124) at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.prepareBroadcast(BroadcastHashJoinExec.scala:98) at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.codegenSemi(BroadcastHashJoinExec.scala:318) at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doConsume(BroadcastHashJoinExec.scala:84) at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:153) at org.apache.spark.sql.execution.FilterExec.consume(basicPhysicalOperators.scala:79) at org.apache.spark.sql.execution.FilterExec.doConsume(basicPhysicalOperators.scala:194) at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:153) at org.apache.spark.sql.execution.RowDataSourceScanExec.consume(ExistingRDD.scala:150) at org.apache.spark.sql.execution.RowDataSourceScanExec.doProduce(ExistingRDD.scala:217) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.RowDataSourceScanExec.produce(ExistingRDD.scala:150) at org.apache.spark.sql.execution.FilterExec.doProduce(basicPhysicalOperators.scala:113) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.FilterExec.produce(basicPhysicalOperators.scala:79) at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doProduce(BroadcastHashJoinExec.scala:77) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.produce(BroadcastHashJoinExec.scala:38) at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:40) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:83) at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:78) at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:30) at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:309) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:347) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114) at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:88) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:86) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:86) at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2357) at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2354) at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply$mcV$sp(Dataset.scala:2127) at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply(Dataset.scala:2127) at org.apache.spark.sql.Dataset$$anonfun$foreachPartition$1.apply(Dataset.scala:2127) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57) at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546) at org.apache.spark.sql.Dataset.foreachPartition(Dataset.scala:2126) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.saveTable(JdbcUtils.scala:299) at org.apache.spark.sql.DataFrameWriter.jdbc(DataFrameWriter.scala:441) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:280) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.lang.Thread.run(Thread.java:745) Caused by: java.util.concurrent.TimeoutException: Futures timed out after [300 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:190) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:190) at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:190) ... 86 more With smaller datasets the entire process runs without any problem. What does this mean and how can I solve the issue? Thank you Pietro

View this message in context: TaskMemoryManager: Failed to allocate a page
Sent from the Apache Spark User List mailing list archive at Nabble.com.