spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Nirmal Fernando <nir...@wso2.com>
Subject Re: path to hdfs
Date Mon, 08 Jun 2015 10:54:16 GMT
HDFS path should be something like; hdfs://
127.0.0.1:8020/user/cloudera/inputs/

On Mon, Jun 8, 2015 at 4:15 PM, Pa Rö <paul.roewer1990@googlemail.com>
wrote:

> hello,
>
> i submit my spark job with the following parameters:
>
> ./spark-1.1.0-bin-hadoop2.4/bin/spark-submit \
>   --class mgm.tp.bigdata.ma_spark.SparkMain \
>   --master spark://quickstart.cloudera:7077 \
>   ma-spark.jar \
>   1000
>
> and get the following exception:
>
> java.io.IOException: Mkdirs failed to create file:/
> 127.0.0.1:8020/user/cloudera/outputs/output_spark
>     at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:438)
>     at
> org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:906)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:887)
>     at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:784)
>     at mgm.tp.bigdata.ma_spark.Helper.writeCenterHistory(Helper.java:35)
>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:100)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>     at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:606)
>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative
> path in absolute URI: 127.0.0.1:8020
>     at org.apache.hadoop.fs.Path.initialize(Path.java:206)
>     at org.apache.hadoop.fs.Path.<init>(Path.java:172)
>     at org.apache.hadoop.fs.Path.<init>(Path.java:94)
>     at org.apache.hadoop.fs.Globber.glob(Globber.java:211)
>     at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642)
>     at
> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257)
>     at
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
>     at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304)
>     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135)
>     at org.apache.spark.rdd.RDD.count(RDD.scala:904)
>     at org.apache.spark.rdd.RDD.takeSample(RDD.scala:401)
>     at
> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:426)
>     at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32)
>     at
> org.apache.spark.api.java.JavaRDDLike$class.takeSample(JavaRDDLike.scala:422)
>     at org.apache.spark.api.java.JavaRDD.takeSample(JavaRDD.scala:32)
>     at mgm.tp.bigdata.ma_spark.SparkMain.kmeans(SparkMain.java:123)
>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:102)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>     at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:606)
>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.net.URISyntaxException: Relative path in absolute URI:
> 127.0.0.1:8020
>     at java.net.URI.checkPath(URI.java:1804)
>     at java.net.URI.<init>(URI.java:752)
>     at org.apache.hadoop.fs.Path.initialize(Path.java:203)
>     ... 43 more
> Exception in thread "main" java.lang.IllegalArgumentException:
> java.net.URISyntaxException: Relative path in absolute URI: 127.0.0.1:8020
>     at org.apache.hadoop.fs.Path.initialize(Path.java:206)
>     at org.apache.hadoop.fs.Path.<init>(Path.java:172)
>     at org.apache.hadoop.fs.Path.<init>(Path.java:94)
>     at org.apache.hadoop.fs.Globber.glob(Globber.java:211)
>     at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1642)
>     at
> org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:257)
>     at
> org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
>     at
> org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:304)
>     at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:179)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.FilteredRDD.getPartitions(FilteredRDD.scala:29)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:204)
>     at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:202)
>     at scala.Option.getOrElse(Option.scala:120)
>     at org.apache.spark.rdd.RDD.partitions(RDD.scala:202)
>     at org.apache.spark.SparkContext.runJob(SparkContext.scala:1135)
>     at org.apache.spark.rdd.RDD.foreach(RDD.scala:759)
>     at
> org.apache.spark.api.java.JavaRDDLike$class.foreach(JavaRDDLike.scala:297)
>     at org.apache.spark.api.java.JavaPairRDD.foreach(JavaPairRDD.scala:44)
>     at mgm.tp.bigdata.ma_spark.SparkMain.saveResults(SparkMain.java:216)
>     at mgm.tp.bigdata.ma_spark.SparkMain.main(SparkMain.java:108)
>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>     at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>     at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>     at java.lang.reflect.Method.invoke(Method.java:606)
>     at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
>     at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
>     at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> Caused by: java.net.URISyntaxException: Relative path in absolute URI:
> 127.0.0.1:8020
>     at java.net.URI.checkPath(URI.java:1804)
>     at java.net.URI.<init>(URI.java:752)
>     at org.apache.hadoop.fs.Path.initialize(Path.java:203)
>     ... 45 more
>
> i set my path like:
> file:///127.0.0.1:8020/user/cloudera/inputs/
> (namenode of hadoop)
>
> how i must set the path to hdfs??
>
> best regards,
> paul
>



-- 

Thanks & regards,
Nirmal

Associate Technical Lead - Data Technologies Team, WSO2 Inc.
Mobile: +94715779733
Blog: http://nirmalfdo.blogspot.com/

Mime
View raw message