carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [1/2] incubator-carbondata git commit: Fix wrong comments of load data in CarbonDataRDDFactory.scala
Date Sun, 15 Jan 2017 16:53:05 GMT
Repository: incubator-carbondata
Updated Branches:
  refs/heads/master e35001e18 -> 11c2b33c2


Fix wrong comments of load data in CarbonDataRDDFactory.scala

Fix wrong comments of load data in CarbonDataRDDFactory.scala

fix build command for thrift download address


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/2ec55278
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/2ec55278
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/2ec55278

Branch: refs/heads/master
Commit: 2ec5527852ca2b92e1811f1e6c51b49b5056cbef
Parents: e35001e
Author: chenliang613 <chenliang613@huawei.com>
Authored: Fri Jan 13 17:12:55 2017 +0800
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Sun Jan 15 22:21:36 2017 +0530

----------------------------------------------------------------------
 build/README.md                                 |  4 ++--
 .../spark/rdd/CarbonDataLoadRDD.scala           |  8 ++++----
 .../spark/rdd/CarbonDataRDDFactory.scala        | 20 ++++++++------------
 .../execution/command/carbonTableSchema.scala   |  6 +++---
 .../spark/rdd/CarbonDataRDDFactory.scala        | 20 ++++++++------------
 5 files changed, 25 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2ec55278/build/README.md
----------------------------------------------------------------------
diff --git a/build/README.md b/build/README.md
index 9764cdc..a150a90 100644
--- a/build/README.md
+++ b/build/README.md
@@ -24,10 +24,10 @@
 * Git
 * [Apache Maven (Recommend version 3.3 or later)](https://maven.apache.org/download.cgi)
 * [Oracle Java 7 or 8](http://www.oracle.com/technetwork/java/javase/downloads/index.html)
-* [Apache Thrift 0.9.3](https://thrift.apache.org/download)
+* [Apache Thrift 0.9.3](http://archive.apache.org/dist/thrift/0.9.3/)
 
 ## Build release version
-Note:Need install Apache Thrift
+Note:Need install Apache Thrift 0.9.3
 ```
 mvn clean -DskipTests -Pbuild-with-format -Pspark-1.6 install
 ```

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2ec55278/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala
index 14a0930..ff3a174 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala
@@ -172,7 +172,7 @@ class SparkPartitionLoader(model: CarbonLoadModel,
  * @param carbonLoadModel       Carbon load model which contain the load info
  * @param storePath             The store location
  * @param kettleHomePath        The kettle home path
- * @param columinar             whether it is columinar
+ * @param columnar             whether it is columnar
  * @param loadCount             Current load count
  * @param tableCreationTime     Time of creating table
  * @param schemaLastUpdatedTime Time of last schema update
@@ -187,7 +187,7 @@ class DataFileLoaderRDD[K, V](
     carbonLoadModel: CarbonLoadModel,
     storePath: String,
     kettleHomePath: String,
-    columinar: Boolean,
+    columnar: Boolean,
     loadCount: Integer,
     tableCreationTime: Long,
     schemaLastUpdatedTime: Long,
@@ -475,7 +475,7 @@ class DataFileLoaderRDD[K, V](
  * @param carbonLoadModel
  * @param storePath
  * @param kettleHomePath
- * @param columinar
+ * @param columnar
  * @param loadCount
  * @param tableCreationTime
  * @param schemaLastUpdatedTime
@@ -489,7 +489,7 @@ class DataFrameLoaderRDD[K, V](
     carbonLoadModel: CarbonLoadModel,
     storePath: String,
     kettleHomePath: String,
-    columinar: Boolean,
+    columnar: Boolean,
     loadCount: Integer,
     tableCreationTime: Long,
     schemaLastUpdatedTime: Long,

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2ec55278/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
index cede7b1..7a7aa64 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
@@ -539,13 +539,14 @@ object CarbonDataRDDFactory {
           }
         } else {
           /*
-         * when data load handle by node partition
-         * 1)clone the hadoop configuration,and set the file path to the configuration
-         * 2)use NewHadoopRDD to get split,size:Math.max(minSize, Math.min(maxSize, blockSize))
-         * 3)use DummyLoadRDD to group blocks by host,and let spark balance the block location
-         * 4)DummyLoadRDD output (host,Array[BlockDetails])as the parameter to CarbonDataLoadRDD
-         *   which parititon by host
-         */
+           * when data load handle by node partition
+           * 1)clone the hadoop configuration,and set the file path to the configuration
+           * 2)use org.apache.hadoop.mapreduce.lib.input.TextInputFormat to get splits,size
info
+           * 3)use CarbonLoaderUtil.nodeBlockMapping to get mapping info of node and block,
+           *   for locally writing carbondata files(one file one block) in nodes
+           * 4)use kettle: use DataFileLoaderRDD to load data and write to carbondata files
+           *   non kettle: use NewCarbonDataLoadRDD to load data and write to carbondata
files
+           */
           val hadoopConfiguration = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
           // FileUtils will skip file which is no csv, and return all file path which split
by ','
           val filePaths = carbonLoadModel.getFactFilePath
@@ -559,11 +560,6 @@ object CarbonDataRDDFactory {
           CommonUtil.configSplitMaxSize(sqlContext.sparkContext, filePaths, hadoopConfiguration)
 
           val inputFormat = new org.apache.hadoop.mapreduce.lib.input.TextInputFormat
-          inputFormat match {
-            case configurable: Configurable =>
-              configurable.setConf(hadoopConfiguration)
-            case _ =>
-          }
           val jobContext = new Job(hadoopConfiguration)
           val rawSplits = inputFormat.getSplits(jobContext).toArray
           val blockList = rawSplits.map { inputSplit =>

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2ec55278/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 0f70aae..33ac4e2 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -387,7 +387,7 @@ case class LoadTable(
                               relation.tableMeta.carbonTableIdentifier.getTableName + "/"
 
 
-      val columinar = sqlContext.getConf("carbon.is.columnar.storage", "true").toBoolean
+      val columnar = sqlContext.getConf("carbon.is.columnar.storage", "true").toBoolean
 
       // TODO It will be removed after kettle is removed.
       val useKettle = options.get("use_kettle") match {
@@ -534,7 +534,7 @@ case class LoadTable(
             carbonLoadModel,
             relation.tableMeta.storePath,
             kettleHomePath,
-            columinar,
+            columnar,
             partitionStatus,
             useKettle,
             result,
@@ -581,7 +581,7 @@ case class LoadTable(
             carbonLoadModel,
             relation.tableMeta.storePath,
             kettleHomePath,
-            columinar,
+            columnar,
             partitionStatus,
             useKettle,
             result,

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/2ec55278/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
index df06138..89b9d40 100644
--- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
+++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataRDDFactory.scala
@@ -562,13 +562,14 @@ object CarbonDataRDDFactory {
           }
         } else {
           /*
-         * when data load handle by node partition
-         * 1)clone the hadoop configuration,and set the file path to the configuration
-         * 2)use NewHadoopRDD to get split,size:Math.max(minSize, Math.min(maxSize, blockSize))
-         * 3)use DummyLoadRDD to group blocks by host,and let spark balance the block location
-         * 4)DummyLoadRDD output (host,Array[BlockDetails])as the parameter to CarbonDataLoadRDD
-         *   which parititon by host
-         */
+           * when data load handle by node partition
+           * 1)clone the hadoop configuration,and set the file path to the configuration
+           * 2)use org.apache.hadoop.mapreduce.lib.input.TextInputFormat to get splits,size
info
+           * 3)use CarbonLoaderUtil.nodeBlockMapping to get mapping info of node and block,
+           *   for locally writing carbondata files(one file one block) in nodes
+           * 4)use kettle: use DataFileLoaderRDD to load data and write to carbondata files
+           *   non kettle: use NewCarbonDataLoadRDD to load data and write to carbondata
files
+           */
           val hadoopConfiguration = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
           // FileUtils will skip file which is no csv, and return all file path which split
by ','
           val filePaths = carbonLoadModel.getFactFilePath
@@ -582,11 +583,6 @@ object CarbonDataRDDFactory {
           CommonUtil.configSplitMaxSize(sqlContext.sparkContext, filePaths, hadoopConfiguration)
 
           val inputFormat = new org.apache.hadoop.mapreduce.lib.input.TextInputFormat
-          inputFormat match {
-            case configurable: Configurable =>
-              configurable.setConf(hadoopConfiguration)
-            case _ =>
-          }
           val jobContext = new Job(hadoopConfiguration)
           val rawSplits = inputFormat.getSplits(jobContext).toArray
           val blockList = rawSplits.map { inputSplit =>


Mime
View raw message