toree-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chipsenkb...@apache.org
Subject [1/6] incubator-toree git commit: Added support for interpreters to share sqlcontext
Date Thu, 31 Mar 2016 14:13:11 GMT
Repository: incubator-toree
Updated Branches:
  refs/heads/master 7129ce222 -> f8f3022ae


Added support for interpreters to share sqlcontext


Project: http://git-wip-us.apache.org/repos/asf/incubator-toree/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-toree/commit/bbf28661
Tree: http://git-wip-us.apache.org/repos/asf/incubator-toree/tree/bbf28661
Diff: http://git-wip-us.apache.org/repos/asf/incubator-toree/diff/bbf28661

Branch: refs/heads/master
Commit: bbf2866123385691f969842d2cc86a546171f5dd
Parents: 7129ce2
Author: Gino Bustelo <lbustelo@apache.org>
Authored: Tue Mar 15 17:18:50 2016 -0500
Committer: Gino Bustelo <lbustelo@apache.org>
Committed: Wed Mar 30 11:42:41 2016 -0500

----------------------------------------------------------------------
 etc/examples/notebooks/cars.json                |  5 +
 etc/examples/notebooks/people.json              |  3 +
 etc/examples/notebooks/sqlcontext_sharing.ipynb | 96 ++++++++++++++++++++
 .../main/resources/PySpark/pyspark_runner.py    | 13 +--
 4 files changed, 111 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/bbf28661/etc/examples/notebooks/cars.json
----------------------------------------------------------------------
diff --git a/etc/examples/notebooks/cars.json b/etc/examples/notebooks/cars.json
new file mode 100644
index 0000000..5617879
--- /dev/null
+++ b/etc/examples/notebooks/cars.json
@@ -0,0 +1,5 @@
+{"manufacturer": "Porsche","model": "911","price": 135000,"wiki":"http://en.wikipedia.org/wiki/Porsche_997"}
+{"manufacturer": "Nissan","model": "GT-R","price": 80000,"wiki":"http://en.wikipedia.org/wiki/Nissan_Gt-r"}
+{"manufacturer": "BMW","model": "M3","price": 60500,"wiki":"http://en.wikipedia.org/wiki/Bmw_m3"}
+{"manufacturer": "Audi","model": "S5","price": 53000,"wiki":"http://en.wikipedia.org/wiki/Audi_S5#Audi_S5"}
+{"manufacturer": "Audi","model": "TT","price": 40000,"wiki":"http://en.wikipedia.org/wiki/Audi_TT"}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/bbf28661/etc/examples/notebooks/people.json
----------------------------------------------------------------------
diff --git a/etc/examples/notebooks/people.json b/etc/examples/notebooks/people.json
new file mode 100644
index 0000000..0168759
--- /dev/null
+++ b/etc/examples/notebooks/people.json
@@ -0,0 +1,3 @@
+{"name":"Michael"}
+{"name":"Andy", "age":30}
+{"name":"Justin", "age":19}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/bbf28661/etc/examples/notebooks/sqlcontext_sharing.ipynb
----------------------------------------------------------------------
diff --git a/etc/examples/notebooks/sqlcontext_sharing.ipynb b/etc/examples/notebooks/sqlcontext_sharing.ipynb
new file mode 100644
index 0000000..7213c19
--- /dev/null
+++ b/etc/examples/notebooks/sqlcontext_sharing.ipynb
@@ -0,0 +1,96 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create a DataFrame in Scala"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "val people = sqlContext.read.json(\"people.json\")\n",
+    "people.registerTempTable(\"people\")\n",
+    "people.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read DataFrame in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%PySpark\n",
+    "people= sqlContext.table(\"people\")\n",
+    "people.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create a DataFrame in Python"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "%%PySpark\n",
+    "cars = sqlContext.read.json(\"cars.json\")\n",
+    "cars.registerTempTable(\"cars\")\n",
+    "cars.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read DataFrame in Scala"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "val cars = sqlContext.table(\"cars\")\n",
+    "cars.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Toree",
+   "language": "",
+   "name": "toree"
+  },
+  "language_info": {
+   "name": "scala"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

http://git-wip-us.apache.org/repos/asf/incubator-toree/blob/bbf28661/pyspark-interpreter/src/main/resources/PySpark/pyspark_runner.py
----------------------------------------------------------------------
diff --git a/pyspark-interpreter/src/main/resources/PySpark/pyspark_runner.py b/pyspark-interpreter/src/main/resources/PySpark/pyspark_runner.py
index 22569ea..52cf927 100644
--- a/pyspark-interpreter/src/main/resources/PySpark/pyspark_runner.py
+++ b/pyspark-interpreter/src/main/resources/PySpark/pyspark_runner.py
@@ -69,12 +69,8 @@ java_import(gateway.jvm, "scala.Tuple2")
 
 
 sc = None
+sqlContext = None
 
-#jconf = bridge.sparkConf()
-#conf = SparkConf(_jvm = gateway.jvm, _jconf = jconf)
-#sc = SparkContext(jsc = jsc, gateway = gateway, conf = conf)
-#sqlc = SQLContext(sc, bridge.sqlContext())
-#sqlContext = sqlc
 kernel = bridge.kernel()
 
 class Logger(object):
@@ -122,11 +118,16 @@ while True :
 
     if sc is None:
       jsc = kernel.javaSparkContext()
-      if jsc != None:
+      if jsc is not None:
         jconf = kernel.sparkConf()
         conf = SparkConf(_jvm = gateway.jvm, _jconf = jconf)
         sc = SparkContext(jsc = jsc, gateway = gateway, conf = conf)
 
+    if sqlContext is None:
+      jsqlContext = kernel.sqlContext()
+      if jsqlContext is not None and sc is not None:
+        sqlContext = SQLContext(sc, sqlContext=jsqlContext)
+
     if final_code:
       compiled_code = compile(final_code, "<string>", "exec")
       #sc.setJobGroup(jobGroup, "Spark Kernel")


Mime
View raw message