spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yh18190 <yh18...@gmail.com>
Subject Re: Need suggestions
Date Wed, 02 Apr 2014 21:03:29 GMT
Hi,
Thanks for response.Could you please look into my repo..Here Utils class is
the class.I cannot paste the entire code..Thaswhy..

I have other class from where I would be calling Utils class for object
creation..

package main.scala

import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import com.codahale.jerkson.Json._
import scala.collection.JavaConversions._
import scala.collection.immutable._
import scala.io.Source
import rtree._

object Utils {
    //Main project directory
  	val work_directory = "/Users/Meghana/Documents/workspace/assignment"
    //Location of spark home 
  	val spark_home = "/Users/Meghana/Downloads/spark-0.9.0-incubating"
  	  
  	//Location of Twitter data
	val data_home = "/Users/Meghana/Desktop/Twitter/Europe/2012/3/1"
	//CSV file that has the 
	val bbox_file = work_directory + "/cities_eu.csv"
	  
	//Locations to store the intermediate data
	val intermediate_ucg_data = work_directory + "/ucg_int"
	val intermediate_rt_data = work_directory + "/rtc_int"
	val intermediate_wc_data = work_directory + "/wc_int"
	val intermediate_ucc_data = work_directory + "/ucc_int"
	
	//Create spark context
    val sc = new SparkContext("local", "Simple App", Utils.spark_home,
	      List("target/scala-2.10/simple-project_2.10-1.0.jar"))
  	
  	//RTree structure with key as the city name. 
  	//First initialize with an empty tree.
  	var rtree:RTree[String] = RTree.empty
  	
    //default window size
  	var jumping_window_size:Integer = 1;
  	var sliding_window_size:Integer = 1;

  	
    //We start from hour 1
	val initialHour:Integer = 1;
	
	//We calculate with this frequency (eg; every one hour)
	val calcFreq:Integer = 1;
	
    //Object representation of Tweet
  	//text -> Tweet text
  	//retweets -> number of re-tweets of the current tweet.
  	//country -> contry where the tweet appeared
  	//city -> from which city the tweet has appeared
  	//hour -> hour of the tweet.
  	class Tweet(val user:String, 
  	    val text: String, 
  	    val retweets:Integer, 
  	    val country:String, 
  	    val city:String,
  	    val hour:Int) {
		override def toString: String =
				"User: " + user + "\n" +
				"Text: " + text + "\n" + 
				"Retweets: " + retweets + "\n" +
				"Country: " + country + "\n" + 
				"City: " + city + "\n" +
				"hour: " + hour + "\n"
	} 

    //Function to parse a line of string to json object, and then create a
Tweet instance
	def parseTweet(s: String): Tweet = {
	    //Parse the given json line from the twitter dataset to the key:value
map.
		val tweet_details_map = parse[Map[String, Any]](s)
		//Extract tweet string from the given line
		val text:String = tweet_details_map.get("text").get.asInstanceOf[String]
		//Extract the user data from the tweet line.
		val user_details =
tweet_details_map.get("user").get.asInstanceOf[java.util.LinkedHashMap[String,Any]]
		//Extract the retweet count from the given tweet line.
		val retweets:Integer =
tweet_details_map.get("retweet_count").get.asInstanceOf[Integer]

https://bitbucket.org/smartmetersproject/twitterdatasets1/src/c379405f1437a9eb4fc7fa0f3f9a2834e766ad2d/src/main/scala/Utils.scala?at=master



--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Need-suggestions-tp3650p3652.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

Mime
View raw message