spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jay <jayadeep.jayara...@gmail.com>
Subject Re: Reg:- Py4JError in Windows 10 with Spark
Date Wed, 06 Jun 2018 14:32:50 GMT
Are you running this in local mode or cluster mode ? If you are running in
cluster mode have you ensured that numpy is present on all nodes ?

On Tue 5 Jun, 2018, 2:43 AM @Nandan@, <nandanpriyadarshi298@gmail.com>
wrote:

> Hi ,
> I am getting error :-
>
> ---------------------------------------------------------------------------
> Py4JError Traceback (most recent call last)
> <ipython-input-2-a208cdba2c46> in <module>()
> 3 TOTAL = 1000000
> 4 dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in range(
> TOTAL)]).cache()
> ----> 5 print("Number of random points:", dots.count())
> 6
> 7 stats = dots.stats()
> C:\opt\spark\python\pyspark\rdd.py in count(self)
> 1039 3
> 1040 """
> -> 1041 return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
> 1042
> 1043 def stats(self):
> C:\opt\spark\python\pyspark\rdd.py in sum(self)
> 1030 6.0
> 1031 """
> -> 1032 return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add
> )
> 1033
> 1034 def count(self):
> C:\opt\spark\python\pyspark\rdd.py in fold(self, zeroValue, op)
> 904 # zeroValue provided to each partition is unique from the one provided
> 905 # to the final reduce call
> --> 906 vals = self.mapPartitions(func).collect()
> 907 return reduce(op, vals, zeroValue)
> 908
> C:\opt\spark\python\pyspark\rdd.py in collect(self)
> 807 """
> 808 with SCCallSiteSync(self.context) as css:
> --> 809 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
> 810 return list(_load_from_socket(port, self._jrdd_deserializer))
> 811
> C:\opt\spark\python\pyspark\rdd.py in _jrdd(self)
> 2453
> 2454 wrapped_func = _wrap_function(self.ctx, self.func,
> self._prev_jrdd_deserializer,
> -> 2455 self._jrdd_deserializer, profiler)
> 2456 python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(),
> wrapped_func,
> 2457 self.preservesPartitioning)
> C:\opt\spark\python\pyspark\rdd.py in _wrap_function(sc, func,
> deserializer, serializer, profiler)
> 2388 pickled_command, broadcast_vars, env, includes =
> _prepare_for_python_RDD(sc, command)
> 2389 return sc._jvm.PythonFunction(bytearray(pickled_command), env,
> includes, sc.pythonExec,
> -> 2390 sc.pythonVer, broadcast_vars, sc._javaAccumulator)
> 2391
> 2392
> C:\ProgramData\Anaconda3\lib\site-packages\py4j\java_gateway.py in
> __call__(self, *args)
> 1426 answer = self._gateway_client.send_command(command)
> 1427 return_value = get_return_value(
> -> 1428 answer, self._gateway_client, None, self._fqn)
> 1429
> 1430 for temp_arg in temp_args:
> C:\opt\spark\python\pyspark\sql\utils.py in deco(*a, **kw)
> 61 def deco(*a, **kw):
> 62 try:
> ---> 63 return f(*a, **kw)
> 64 except py4j.protocol.Py4JJavaError as e:
> 65 s = e.java_exception.toString()
> C:\ProgramData\Anaconda3\lib\site-packages\py4j\protocol.py in
> get_return_value(answer, gateway_client, target_id, name)
> 322 raise Py4JError(
> 323 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
> --> 324 format(target_id, ".", name, value))
> 325 else:
> 326 raise Py4JError(
> Py4JError: An error occurred while calling
> None.org.apache.spark.api.python.PythonFunction. Trace:
> py4j.Py4JException: Constructor
> org.apache.spark.api.python.PythonFunction([class [B, class
> java.util.HashMap, class java.util.ArrayList, class java.lang.String, class
> java.lang.String, class java.util.ArrayList, class
> org.apache.spark.api.python.PythonAccumulatorV2]) does not exist
> at
> py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:179)
> at
> py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:196)
> at py4j.Gateway.invoke(Gateway.java:235)
> at
> py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
> at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
> at py4j.GatewayConnection.run(GatewayConnection.java:214)
> at java.lang.Thread.run(Thread.java:745)
>
>
> I installed Spark 2.0.2 on windows 10 and code is as below:-
>
>> sc = SparkContext.getOrCreate()
>> sc
>> import numpy as np
>> TOTAL = 1000000
>> dots = sc.parallelize([2.0 * np.random.random(2) - 1.0 for i in
>> range(TOTAL)]).cache()
>> print("Number of random points:", dots.count())
>> stats = dots.stats()
>> print('Mean:', stats.mean())
>> print('stdev:', stats.stdev())
>
>
> Getting error , when I am running Numphy code.
> Please tell me what's wrong in this.
>

Mime
View raw message