carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chenliang...@apache.org
Subject carbondata git commit: [CARBONDATA-1779] GenericVectorizedReader
Date Fri, 24 Nov 2017 02:53:33 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master b0b7fc1a5 -> f1463ed13


[CARBONDATA-1779] GenericVectorizedReader

This closes #1538


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f1463ed1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f1463ed1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f1463ed1

Branch: refs/heads/master
Commit: f1463ed13bef7df637114ddd15d820e5b0cb0084
Parents: b0b7fc1
Author: Jin Zhou <xaprice@yeah.net>
Authored: Tue Nov 21 22:57:58 2017 +0800
Committer: chenliang613 <chenliang613@huawei.com>
Committed: Fri Nov 24 10:53:18 2017 +0800

----------------------------------------------------------------------
 .../examples/DataManagementExample.scala        | 106 +++++++++++++++++++
 1 file changed, 106 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f1463ed1/examples/spark2/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
b/examples/spark2/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
new file mode 100644
index 0000000..1447a1a
--- /dev/null
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.examples
+
+import java.io.File
+
+object DataManagementExample {
+
+  def main(args: Array[String]) {
+    val spark = ExampleUtils.createCarbonSession("DataManagementExample")
+    spark.sparkContext.setLogLevel("WARN")
+
+    spark.sql("DROP TABLE IF EXISTS carbon_table")
+
+    // Create table
+    spark.sql(
+      s"""
+         | CREATE TABLE IF NOT EXISTS carbon_table(
+         | ID Int,
+         | date Date,
+         | country String,
+         | name String,
+         | phonetype String,
+         | serialname String,
+         | salary Int,
+         | floatField float
+         | ) STORED BY 'carbondata'
+       """.stripMargin)
+
+    val rootPath = new File(this.getClass.getResource("/").getPath
+      + "../../../..").getCanonicalPath
+    val path = s"$rootPath/examples/spark2/src/main/resources/dataSample.csv"
+
+    // load data 5 times, each load of data is called a segment in CarbonData
+    // scalastyle:off
+    (1 to 5).foreach(_ => spark.sql(
+      s"""
+         | LOAD DATA LOCAL INPATH '$path'
+         | INTO TABLE carbon_table
+         | OPTIONS('HEADER'='true')
+       """.stripMargin))
+    // scalastyle:on
+
+    // show all segments, there will be 5 segments
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+
+    // 50 rows loaded
+    spark.sql("SELECT count(*) FROM carbon_table").show()
+
+    // delete the first segment
+    spark.sql("DELETE FROM TABLE carbon_table WHERE SEGMENT.ID IN (0)")
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+
+    // this query will be executed on last 4 segments, it should return 40 rows
+    spark.sql("SELECT count(*) FROM carbon_table").show()
+
+    // force a major compaction to compact all segments into one
+    spark.sql("ALTER TABLE carbon_table COMPACT 'MAJOR'")
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+
+    // load again, add another 10 rows
+    spark.sql(
+      s"""
+         | LOAD DATA LOCAL INPATH '$path'
+         | INTO TABLE carbon_table
+         | OPTIONS('HEADER'='true')
+       """.stripMargin)
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+
+    // this query will be executed on 2 segments, it should return 50 rows
+    spark.sql("SELECT count(*) FROM carbon_table").show()
+
+    // delete all segments whose loading time is before '2099-01-01 01:00:00'
+    spark.sql("DELETE FROM TABLE carbon_table WHERE SEGMENT.STARTTIME BEFORE '2099-01-01
01:00:00'")
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table ").show()
+
+    // this query will be executed on 0 segments, it should return 0 rows
+    spark.sql("SELECT count(*) FROM carbon_table").show()
+
+    // force clean up all 'MARKED_FOR_DELETE' and 'COMPACTED' segments immediately
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+    spark.sql("CLEAN FILES FOR TABLE carbon_table")
+    spark.sql("SHOW SEGMENTS FOR TABLE carbon_table").show()
+
+    // Drop table
+    spark.sql("DROP TABLE IF EXISTS carbon_table")
+
+    spark.stop()
+  }
+
+}


Mime
View raw message