spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n0rb3rt <jnmee...@gmail.com>
Subject Re: Class not found in Kafka-Stream due to multi-thread without correct ClassLoader?
Date Wed, 14 May 2014 16:47:41 GMT
Any resolution to this?

I'm new to Spark and have had success running an application locally.  But
hitting this same error when submitting it to a standalone cluster.  Not
using kafka streaming in this case, just parsing proto messages wrapped in
an avro object file.  Have read all the threads about protobuf version
incompatibilities and tried about every combination of protoc and
protobuf-java that were mentioned, to no avail. 

Here's some of the relevant code.

class Sparcules(master: String, path: String) {
  val sc = new SparkContext(
    new SparkConf()
      .setMaster(master)
      .setAppName("Sparcules")
  )
  val people = readAvro[Person](sc, path+"people.avro",
Person.getDefaultInstance)
  val systems = readAvro[System](sc, path+"systems.avro",
System.getDefaultInstance)

  def readAvro[T: ClassTag](sparkContext: SparkContext, path: String, proto:
Message): RDD[T] = {
    val rdd = sparkContext.newAPIHadoopFile[
      AvroKey[ByteBuffer],
      NullWritable,
      AvroKeyInputFormat[ByteBuffer]](path)
    rdd.map{ row =>
      proto.newBuilderForType
        .mergeFrom(row._1.datum.array)
        .build
        .asInstanceOf[T]
    }
  }

  def person_system: RDD[(Person, System)] = {
    val person_kv = people.map{p: Person => (p.getId, p)}
    val system_kv = systems.map{s: System => (s.getPersonId, s)}
    person_kv.join(system_kv)
      .map{ case((person_id, (person, system))) =>
      (person, system)
    }
  }

}

object People {

  def main(args: Array[String]) {
    val prc = new Sparcules(args(0), args(1))
    prc.person_system.map{
      case((person, system)) => (
        (person.getName(0),
          system.getName(0))
        ,
        None
      )
    }.sortByKey(true)
     .map(_._1)
     .saveAsTextFile("person_system")
  }

}



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Class-not-found-in-Kafka-Stream-due-to-multi-thread-without-correct-ClassLoader-tp2398p5713.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

Mime
View raw message