carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [4/9] incubator-carbondata git commit: Create DataFrame example in example/spark2, read carbon data to dataframe
Date Fri, 17 Feb 2017 14:01:28 GMT
Create DataFrame example in example/spark2, read carbon data to dataframe

Create DataFrame example in example/spark2, read carbon data to dataframe

Create DataFrame example in example/spark2, read carbon data to dataframe

Create CarbonDataFrameExample in example/spark2

fix scalastyle

trigger travis ci


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ec4ec129
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ec4ec129
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ec4ec129

Branch: refs/heads/branch-1.0
Commit: ec4ec1294de330d85e1ea9b7422bb161c19905c3
Parents: b79ec78
Author: chenliang613 <chenliang613@huawei.com>
Authored: Wed Feb 8 00:06:29 2017 -0500
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Fri Feb 17 19:28:20 2017 +0530

----------------------------------------------------------------------
 .../examples/CarbonDataFrameExample.scala       | 89 ++++++++++++++++++++
 1 file changed, 89 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ec4ec129/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonDataFrameExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonDataFrameExample.scala
b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonDataFrameExample.scala
new file mode 100644
index 0000000..e4d1646
--- /dev/null
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonDataFrameExample.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.examples
+
+import java.io.File
+
+import org.apache.spark.sql.{SaveMode, SparkSession}
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+// scalastyle:off println
+object CarbonDataFrameExample {
+
+  def main(args: Array[String]) {
+    val rootPath = new File(this.getClass.getResource("/").getPath
+                            + "../../../..").getCanonicalPath
+    val storeLocation = s"$rootPath/examples/spark2/target/store"
+    val warehouse = s"$rootPath/examples/spark2/target/warehouse"
+    val metastoredb = s"$rootPath/examples/spark2/target"
+
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+
+    import org.apache.spark.sql.CarbonSession._
+    val spark = SparkSession
+      .builder()
+      .master("local")
+      .appName("CarbonDataFrameExample")
+      .config("spark.sql.warehouse.dir", warehouse)
+      .getOrCreateCarbonSession(storeLocation, metastoredb)
+
+    spark.sparkContext.setLogLevel("ERROR")
+
+    // Writes Dataframe to CarbonData file:
+    import spark.implicits._
+    val df = spark.sparkContext.parallelize(1 to 100)
+      .map(x => ("a", "b", x))
+      .toDF("c1", "c2", "number")
+
+    // Saves dataframe to carbondata file
+    df.write
+      .format("carbondata")
+      .option("tableName", "carbon_table")
+      .option("compress", "true")
+      .option("tempCSV", "false")
+      .mode(SaveMode.Overwrite)
+      .save()
+
+    spark.sql(""" SELECT * FROM carbon_table """).show()
+
+    // Specify schema
+    import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}
+    val customSchema = StructType(Array(
+      StructField("c1", StringType),
+      StructField("c2", StringType),
+      StructField("number", IntegerType)))
+
+    // Reads carbondata to dataframe
+    val carbondf = spark.read
+      .format("carbondata")
+      .schema(customSchema)
+      .option("tableName", "carbon_table")
+      .load()
+
+    // Dataframe operations
+    carbondf.printSchema()
+    carbondf.select($"c1", $"number" + 10).show()
+    carbondf.filter($"number" > 31).show()
+
+    spark.sql("DROP TABLE IF EXISTS carbon_table")
+  }
+}
+// scalastyle:on println


Mime
View raw message