spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yh18190 <>
Subject Re: Need suggestions
Date Wed, 02 Apr 2014 21:03:29 GMT
Thanks for response.Could you please look into my repo..Here Utils class is
the class.I cannot paste the entire code..Thaswhy..

I have other class from where I would be calling Utils class for object

package main.scala

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import com.codahale.jerkson.Json._
import scala.collection.JavaConversions._
import scala.collection.immutable._
import rtree._

object Utils {
    //Main project directory
  	val work_directory = "/Users/Meghana/Documents/workspace/assignment"
    //Location of spark home 
  	val spark_home = "/Users/Meghana/Downloads/spark-0.9.0-incubating"
  	//Location of Twitter data
	val data_home = "/Users/Meghana/Desktop/Twitter/Europe/2012/3/1"
	//CSV file that has the 
	val bbox_file = work_directory + "/cities_eu.csv"
	//Locations to store the intermediate data
	val intermediate_ucg_data = work_directory + "/ucg_int"
	val intermediate_rt_data = work_directory + "/rtc_int"
	val intermediate_wc_data = work_directory + "/wc_int"
	val intermediate_ucc_data = work_directory + "/ucc_int"
	//Create spark context
    val sc = new SparkContext("local", "Simple App", Utils.spark_home,
  	//RTree structure with key as the city name. 
  	//First initialize with an empty tree.
  	var rtree:RTree[String] = RTree.empty
    //default window size
  	var jumping_window_size:Integer = 1;
  	var sliding_window_size:Integer = 1;

    //We start from hour 1
	val initialHour:Integer = 1;
	//We calculate with this frequency (eg; every one hour)
	val calcFreq:Integer = 1;
    //Object representation of Tweet
  	//text -> Tweet text
  	//retweets -> number of re-tweets of the current tweet.
  	//country -> contry where the tweet appeared
  	//city -> from which city the tweet has appeared
  	//hour -> hour of the tweet.
  	class Tweet(val user:String, 
  	    val text: String, 
  	    val retweets:Integer, 
  	    val country:String, 
  	    val city:String,
  	    val hour:Int) {
		override def toString: String =
				"User: " + user + "\n" +
				"Text: " + text + "\n" + 
				"Retweets: " + retweets + "\n" +
				"Country: " + country + "\n" + 
				"City: " + city + "\n" +
				"hour: " + hour + "\n"

    //Function to parse a line of string to json object, and then create a
Tweet instance
	def parseTweet(s: String): Tweet = {
	    //Parse the given json line from the twitter dataset to the key:value
		val tweet_details_map = parse[Map[String, Any]](s)
		//Extract tweet string from the given line
		val text:String = tweet_details_map.get("text").get.asInstanceOf[String]
		//Extract the user data from the tweet line.
		val user_details =
		//Extract the retweet count from the given tweet line.
		val retweets:Integer =

View this message in context:
Sent from the Apache Spark User List mailing list archive at

View raw message