spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jörn Franke <jornfra...@gmail.com>
Subject Re: How to read remote HDFS from Spark using username?
Date Wed, 03 Oct 2018 07:44:45 GMT
Looks like a firewall issue

> Am 03.10.2018 um 09:34 schrieb Aakash Basu <aakash.spark.raj@gmail.com>:
> 
> The stacktrace is below -
> 
>> ---------------------------------------------------------------------------
>> Py4JJavaError Traceback (most recent call last)
>> <ipython-input-50-554fc507e3b2> in <module>()
>> ----> 1 df = spark.read.load("hdfs://35.154.242.76:9000/auto-ml/projects/auto-ml-test__8503cdc4-21fc-4fae-87c1-5b879cafff71/data/breast-cancer-wisconsin.csv")
>> /opt/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)
>>  164 self.options(**options)
>>  165 if isinstance(path, basestring):
>> --> 166 return self._df(self._jreader.load(path))
>>  167 elif path is not None:
>>  168 if type(path) != list:
>> /opt/spark/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py in __call__(self,
*args)
>>  1158 answer = self.gateway_client.send_command(command)
>>  1159 return_value = get_return_value(
>> -> 1160 answer, self.gateway_client, self.target_id, self.name)
>>  1161 
>>  1162 for temp_arg in temp_args:
>> /opt/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
>>  61 def deco(*a, **kw):
>>  62 try:
>> ---> 63 return f(*a, **kw)
>>  64 except py4j.protocol.Py4JJavaError as e:
>>  65 s = e.java_exception.toString()
>> /opt/spark/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py in get_return_value(answer,
gateway_client, target_id, name)
>>  318 raise Py4JJavaError(
>>  319 "An error occurred while calling {0}{1}{2}.\n".
>> --> 320 format(target_id, ".", name), value)
>>  321 else:
>>  322 raise Py4JError(
>> Py4JJavaError: An error occurred while calling o244.load.
>> : java.net.ConnectException: Call From Sandeeps-MacBook-Pro.local/192.168.50.188
to ec2-35-154-242-76.ap-south-1.compute.amazonaws.com:9000 failed on connection exception:
java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
>> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
>> 	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>> 	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
>> 	at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>> 	at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>> 	at org.apache.hadoop.ipc.Client.call(Client.java:1479)
>> 	at org.apache.hadoop.ipc.Client.call(Client.java:1412)
>> 	at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>> 	at com.sun.proxy.$Proxy17.getFileInfo(Unknown Source)
>> 	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:771)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> 	at java.lang.reflect.Method.invoke(Method.java:498)
>> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
>> 	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>> 	at com.sun.proxy.$Proxy18.getFileInfo(Unknown Source)
>> 	at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:2108)
>> 	at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1305)
>> 	at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1301)
>> 	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>> 	at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)
>> 	at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1426)
>> 	at org.apache.spark.sql.execution.datasources.DataSource$.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:714)
>> 	at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(DataSource.scala:389)
>> 	at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(DataSource.scala:389)
>> 	at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>> 	at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
>> 	at scala.collection.immutable.List.foreach(List.scala:381)
>> 	at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
>> 	at scala.collection.immutable.List.flatMap(List.scala:344)
>> 	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:388)
>>  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
>> 	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
>> 	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:174)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
>> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>> 	at java.lang.reflect.Method.invoke(Method.java:498)
>> 	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
>> 	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
>> 	at py4j.Gateway.invoke(Gateway.java:282)
>> 	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
>> 	at py4j.commands.CallCommand.execute(CallCommand.java:79)
>> 	at py4j.GatewayConnection.run(GatewayConnection.java:214)
>> 	at java.lang.Thread.run(Thread.java:745)
>> Caused by: java.net.ConnectException: Connection refused
>> 	at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>> 	at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>> 	at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>> 	at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>> 	at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>> 	at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>> 	at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:712)
>> 	at org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:375)
>> 	at org.apache.hadoop.ipc.Client.getConnection(Client.java:1528)
>> 	at org.apache.hadoop.ipc.Client.call(Client.java:1451)
>> 	... 40 more
>> 
> 
>> On Wed, Oct 3, 2018 at 12:32 PM Aakash Basu <aakash.spark.raj@gmail.com> wrote:
>> Hi,
>> 
>> I have to read data stored in HDFS of a different machine and needs to be accessed
through Spark for being read.
>> 
>> How to do that? Full HDFS address along with port doesn't seem to work.
>> 
>> Anyone did it before?
>> 
>> Thanks,
>> AB.

Mime
View raw message