spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Reinis Vicups <sp...@orbit-x.de>
Subject Spark 1.1.0: weird spark-shell behavior
Date Mon, 01 Dec 2014 09:50:19 GMT
Hello,

I have two weird effects when working with spark-shell:


1. This code executed in spark-shell causes an exception below. At the 
same time it works perfectly when submitted with spark-submit! :

import org.apache.hadoop.hbase.{HConstants, HBaseConfiguration}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.mahout.math.VectorWritable
import com.google.common.io.ByteStreams
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext.rddToPairRDDFunctions
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

val hConf = HBaseConfiguration.create()
hConf.set("hbase.defaults.for.version.skip", "true")
hConf.set("hbase.defaults.for.version", "0.98.6-cdh5.2.0")
hConf.set(HConstants.ZOOKEEPER_QUORUM, "myserv")
hConf.set(HConstants.ZOOKEEPER_CLIENT_PORT, "2181")
hConf.set(TableInputFormat.INPUT_TABLE, "MyNS:MyTable")
val rdd = sc.newAPIHadoopRDD(hConf, classOf[TableInputFormat], 
classOf[ImmutableBytesWritable], classOf[Result])
rdd.count()

--- Exception ---

14/12/01 10:45:24 ERROR ExecutorUncaughtExceptionHandler: Uncaught 
exception in thread Thread[Executor task launch worker-0,5,main]
  java.lang.ExceptionInInitializerError
     at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:197)
     at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:159)
     at 
org.apache.hadoop.hbase.mapreduce.TableInputFormat.setConf(TableInputFormat.java:101)
     at 
org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:113)
     at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:104)
     at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:66)
     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
     at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
     at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
     at org.apache.spark.scheduler.Task.run(Task.scala:54)
     at 
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:180)
     at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
     at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
     at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: hbase-default.xml file seems to 
be for and old version of HBase (null), this version is 0.98.6-cdh5.2.0
     at 
org.apache.hadoop.hbase.HBaseConfiguration.checkDefaultsVersion(HBaseConfiguration.java:73)
     at 
org.apache.hadoop.hbase.HBaseConfiguration.addHbaseResources(HBaseConfiguration.java:105)
     at 
org.apache.hadoop.hbase.HBaseConfiguration.create(HBaseConfiguration.java:116)
     at 
org.apache.hadoop.hbase.client.HConnectionManager.<clinit>(HConnectionManager.java:222)
     ... 14 more

We have already checked most of the trivial stuff with class paths and 
existenceof tables and column groups, enabled HBase specific settings to 
avoid the version checking and so on. It appears that the supplied HBase 
configuration is completely ignored by context. We tried to solve this 
issue by instantiating own spark context and encountered the second 
weird effect:

2. when attempting to instantiate own SparkContext we get an exception 
below:

// imports block
...

|val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)

--- Exception ---

2014-12-01 10:42:24,966 WARN  o.e.j.u.c.AbstractLifeCycle - FAILED 
SelectChannelConnector@0.0.0.0:4040: java.net.BindException: Die Adresse 
wird bereits verwendet
java.net.BindException: Die Adresse wird bereits verwendet
         at sun.nio.ch.Net.bind0(Native Method)
         at sun.nio.ch.Net.bind(Net.java:444)
         at sun.nio.ch.Net.bind(Net.java:436)
         at 
sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:214)
         at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
         at 
org.eclipse.jetty.server.nio.SelectChannelConnector.open(SelectChannelConnector.java:187)
         at 
org.eclipse.jetty.server.AbstractConnector.doStart(AbstractConnector.java:316)
         at 
org.eclipse.jetty.server.nio.SelectChannelConnector.doStart(SelectChannelConnector.java:265)
         at 
org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
         at org.eclipse.jetty.server.Server.doStart(Server.java:293)
         at 
org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
         at 
org.apache.spark.ui.JettyUtils$.org$apache$spark$ui$JettyUtils$$connect$1(JettyUtils.scala:199)
         at 
org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209)
         at 
org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209)
         at 
org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1449)
         at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
         at 
org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1445)
         at 
org.apache.spark.ui.JettyUtils$.startJettyServer(JettyUtils.scala:209)
         at org.apache.spark.ui.WebUI.bind(WebUI.scala:102)
         at org.apache.spark.SparkContext.<init>(SparkContext.scala:224)
         at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:24)
         at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:29)
         at $line22.$read$$iwC$$iwC.<init>(<console>:31)
         at $line22.$read$$iwC.<init>(<console>:33)
         at $line22.$read.<init>(<console>:35)
         at $line22.$read$.<init>(<console>:39)
         at $line22.$read$.<clinit>(<console>)
         at $line22.$eval$.<init>(<console>:7)
         at $line22.$eval$.<clinit>(<console>)
         at $line22.$eval.$print(<console>)
         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
         at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
         at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
         at java.lang.reflect.Method.invoke(Method.java:606)
         at 
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:846)
         at 
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1119)
         at 
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:672)
         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:703)
         at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:667)
         at 
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:819)
         at 
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:864)
         at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:776)
         at 
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:619)
         at 
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:627)
         at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:632)
         at 
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:959)
         at 
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907)
         at 
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907)
         at 
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:907)
         at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1002)
         at org.apache.spark.repl.Main$.main(Main.scala:31)
         at org.apache.spark.repl.Main.main(Main.scala)
         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
         at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
         at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
         at java.lang.reflect.Method.invoke(Method.java:606)
         at 
org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:331)
         at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
         at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Has anyone already encountered these things? I recall that in earlier 
spark-shell versions there was no issue with instantiating own spark 
contexts, is this new to spark 1.1.0?

Thank you for your help and kind regards
reinis
|

Mime
View raw message