spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Andrew Holway <andrew.hol...@otternetworks.de>
Subject Re: createDataFrame causing a strange error.
Date Sun, 27 Nov 2016 19:33:32 GMT
I get a slight different error when not specifying a schema:

Traceback (most recent call last):
  File "/home/centos/fun-functions/spark-parrallel-read-from-s3/tick.py",
line 61, in <module>
    df = sqlContext.createDataFrame(foo)
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/context.py",
line 299, in createDataFrame
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py",
line 520, in createDataFrame
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py",
line 360, in _createFromRDD
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py",
line 331, in _inferSchema
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line
1328, in first
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line
1310, in take
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/context.py",
line 941, in runJob
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line
2403, in _jrdd
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line
2336, in _wrap_function
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line
2315, in _prepare_for_python_RDD
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/serializers.py",
line 428, in dumps
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 657, in dumps
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 107, in dump
  File "/usr/lib64/python2.7/pickle.py", line 224, in dump
    self.save(obj)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 562, in save_tuple
    save(element)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 204, in save_function
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 241, in save_function_tuple
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
    save(element)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
    self._batch_appends(iter(obj))
  File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends
    save(x)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 204, in save_function
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 241, in save_function_tuple
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
    save(element)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
    self._batch_appends(iter(obj))
  File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends
    save(x)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 204, in save_function
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 241, in save_function_tuple
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
    save(element)
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
    self._batch_appends(iter(obj))
  File "/usr/lib64/python2.7/pickle.py", line 636, in _batch_appends
    save(tmp[0])
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 198, in save_function
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
line 246, in save_function_tuple
  File "/usr/lib64/python2.7/pickle.py", line 286, in save
    f(self, obj) # Call unbound method with explicit self
  File "/usr/lib64/python2.7/pickle.py", line 649, in save_dict
    self._batch_setitems(obj.iteritems())
  File "/usr/lib64/python2.7/pickle.py", line 681, in _batch_setitems
    save(v)
  File "/usr/lib64/python2.7/pickle.py", line 306, in save
    rv = reduce(self.proto)
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py",
line 933, in __call__
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/utils.py",
line 63, in deco
  File
"/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py",
line 316, in get_return_value
py4j.protocol.Py4JError: An error occurred while calling
o33.__getnewargs__. Trace:
py4j.Py4JException: Method __getnewargs__([]) does not exist
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
at py4j.Gateway.invoke(Gateway.java:272)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:211)
at java.lang.Thread.run(Thread.java:745)


On Sun, Nov 27, 2016 at 8:32 PM, Andrew Holway <
andrew.holway@otternetworks.de> wrote:

> Hi,
>
> Can anyone tell me what is causing this error
> Spark 2.0.0
> Python 2.7.5
>
> df = sqlContext.createDataFrame(foo, schema)
> https://gist.github.com/mooperd/368e3453c29694c8b2c038d6b7b4413a
>
> Traceback (most recent call last):
>   File "/home/centos/fun-functions/spark-parrallel-read-from-s3/tick.py",
> line 61, in <module>
>     df = sqlContext.createDataFrame(foo, schema)
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/context.py",
> line 299, in createDataFrame
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py",
> line 523, in createDataFrame
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py",
> line 2220, in _to_java_object_rdd
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py",
> line 2403, in _jrdd
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py",
> line 2336, in _wrap_function
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py",
> line 2315, in _prepare_for_python_RDD
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/serializers.py",
> line 428, in dumps
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 657, in dumps
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 107, in dump
>   File "/usr/lib64/python2.7/pickle.py", line 224, in dump
>     self.save(obj)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 562, in save_tuple
>     save(element)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 204, in save_function
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 241, in save_function_tuple
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
>     save(element)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
>     self._batch_appends(iter(obj))
>   File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends
>     save(x)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 204, in save_function
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 241, in save_function_tuple
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
>     save(element)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
>     self._batch_appends(iter(obj))
>   File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends
>     save(x)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 204, in save_function
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 241, in save_function_tuple
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple
>     save(element)
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 600, in save_list
>     self._batch_appends(iter(obj))
>   File "/usr/lib64/python2.7/pickle.py", line 636, in _batch_appends
>     save(tmp[0])
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 198, in save_function
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py",
> line 246, in save_function_tuple
>   File "/usr/lib64/python2.7/pickle.py", line 286, in save
>     f(self, obj) # Call unbound method with explicit self
>   File "/usr/lib64/python2.7/pickle.py", line 649, in save_dict
>     self._batch_setitems(obj.iteritems())
>   File "/usr/lib64/python2.7/pickle.py", line 681, in _batch_setitems
>     save(v)
>   File "/usr/lib64/python2.7/pickle.py", line 306, in save
>     rv = reduce(self.proto)
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py",
> line 933, in __call__
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/utils.py",
> line 63, in deco
>   File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py",
> line 316, in get_return_value
> py4j.protocol.Py4JError: An error occurred while calling
> o33.__getnewargs__. Trace:
> py4j.Py4JException: Method __getnewargs__([]) does not exist
> at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
> at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
> at py4j.Gateway.invoke(Gateway.java:272)
> at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
> at py4j.commands.CallCommand.execute(CallCommand.java:79)
> at py4j.GatewayConnection.run(GatewayConnection.java:211)
> at java.lang.Thread.run(Thread.java:745)
>
>
>
> --
> Otter Networks UG
> http://otternetworks.de
> Gotenstra├če 17
> 10829 Berlin
>



-- 
Otter Networks UG
http://otternetworks.de
Gotenstra├če 17
10829 Berlin

Mime
View raw message