spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Selvam Raman <sel...@gmail.com>
Subject Pyspark UDF/map fucntion throws pickling exception
Date Thu, 15 Feb 2018 11:44:07 GMT
import spacy

nlp = spacy.load('en')



def getPhrases(content):
    phrases = []
    doc = nlp(str(content))
    for chunks in doc.noun_chunks:
        phrases.append(chunks.text)
    return phrases

the above function will retrieve the noun phrases from the content and
return list of phrases.


def f(x) : print(x)


description = xmlData.filter(col("dcterms:description").isNotNull()).select(col("dcterms:description").alias("desc"))

description.rdd.flatMap(lambda row: getPhrases(row.desc)).foreach(f)

when i am trying to access getphrases i am getting below exception

"""if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule
is None:
AttributeError: 'builtin_function_or_method' object has no attribute
'__code__' """

Full stack trace is below

Traceback (most recent call last):
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 148, in dump
    return Pickler.dump(self, obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 409, in dump
    self.save(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 751, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
    save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
    self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
    save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
    save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
    self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
    save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
    save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
    self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
    save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
    save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
    self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 805, in _batch_appends
    save(x)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 255, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 292, in save_function_tuple
    save((code, closure, base_globals))
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 736, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 781, in save_list
    self._batch_appends(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 808, in _batch_appends
    save(tmp[0])
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 249, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 297, in save_function_tuple
    save(f_globals)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 821, in save_dict
    self._batch_setitems(obj.items())
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 852, in _batch_setitems
    save(v)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 249, in save_function
    self.save_function_tuple(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 297, in save_function_tuple
    save(f_globals)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 821, in save_dict
    self._batch_setitems(obj.items())
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 852, in _batch_setitems
    save(v)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 521, in save
    self.save_reduce(obj=obj, *rv)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 600, in save_reduce
    save(state)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 821, in save_dict
    self._batch_setitems(obj.items())
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 847, in _batch_setitems
    save(v)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 521, in save
    self.save_reduce(obj=obj, *rv)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 582, in save_reduce
    save(args)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 751, in save_tuple
    save(element)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pickle.py",
line 476, in save
    f(self, obj) # Call unbound method with explicit self
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 368, in save_builtin_function
    return self.save_function(obj)
  File
"/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pyspark/cloudpickle.py",
line 247, in save_function
    if islambda(obj) or obj.__code__.co_filename == '<stdin>' or themodule
is None:
AttributeError: 'builtin_function_or_method' object has no attribute
'__code__'



please help me.



-- 
Selvam Raman
"லஞ்சம் தவிர்த்து நெஞ்சம் நிமிர்த்து"

Mime
View raw message