carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jack...@apache.org
Subject [carbondata] branch master updated: [CARBONDATA-3627] C++ SDK support write data withSchemaFile
Date Wed, 15 Jan 2020 08:06:47 GMT
This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new f40b349  [CARBONDATA-3627] C++ SDK support write data withSchemaFile
f40b349 is described below

commit f40b34901a5c4d80734dd2ef28c2191e6c5d12c8
Author: xubo245 <601450868@qq.com>
AuthorDate: Mon Dec 23 00:47:51 2019 +0800

    [CARBONDATA-3627] C++ SDK support write data withSchemaFile
    
    C++ SDK support write data withSchemaFile, which can be used for add segment for transactional
table
    
    This closes #3526
---
 store/CSDK/src/CarbonWriter.cpp |  40 +++++++++++++++
 store/CSDK/src/CarbonWriter.h   |  13 +++++
 store/CSDK/test/main.cpp        | 105 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+)

diff --git a/store/CSDK/src/CarbonWriter.cpp b/store/CSDK/src/CarbonWriter.cpp
index 243e533..bad31d6 100644
--- a/store/CSDK/src/CarbonWriter.cpp
+++ b/store/CSDK/src/CarbonWriter.cpp
@@ -90,6 +90,12 @@ void CarbonWriter::sortBy(int argc, char **argv) {
     }
 }
 
+/**
+ * configure the schema with json style schema
+ *
+ * @param jsonSchema json style schema
+ * @return updated CarbonWriterBuilder
+ */
 void CarbonWriter::withCsvInput(char *jsonSchema) {
     if (jsonSchema == NULL) {
         throw std::runtime_error("jsonSchema parameter can't be NULL.");
@@ -110,6 +116,20 @@ void CarbonWriter::withCsvInput(char *jsonSchema) {
     }
 };
 
+void CarbonWriter::withCsvInput() {
+    checkBuilder();
+    jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject);
+    jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withCsvInput",
+                                             "()Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;");
+    if (methodID == NULL) {
+        throw std::runtime_error("Can't find the method in java: withCsvInput");
+    }
+    carbonWriterBuilderObject = jniEnv->CallObjectMethod(carbonWriterBuilderObject, methodID);
+    if (jniEnv->ExceptionCheck()) {
+        throw jniEnv->ExceptionOccurred();
+    }
+};
+
 void CarbonWriter::withHadoopConf(char *key, char *value) {
     if (key == NULL) {
         throw std::runtime_error("key parameter can't be NULL.");
@@ -271,6 +291,26 @@ void CarbonWriter::withBlockletSize(int blockletSize) {
     }
 }
 
+/**
+ * To set the path of carbon schema file
+ * @param schemaFilePath The path of carbon schema file
+ */
+void CarbonWriter::withSchemaFile(char *schemaFilePath) {
+    checkBuilder();
+    jclass carbonWriterBuilderClass = jniEnv->GetObjectClass(carbonWriterBuilderObject);
+    jmethodID methodID = jniEnv->GetMethodID(carbonWriterBuilderClass, "withSchemaFile",
+                                             "(Ljava/lang/String;)Lorg/apache/carbondata/sdk/file/CarbonWriterBuilder;");
+    if (methodID == NULL) {
+        throw std::runtime_error("Can't find the method in java: withSchemaFile");
+    }
+    jvalue args[1];
+    args[0].l = jniEnv->NewStringUTF(schemaFilePath);
+    carbonWriterBuilderObject = jniEnv->CallObjectMethodA(carbonWriterBuilderObject, methodID,
args);
+    if (jniEnv->ExceptionCheck()) {
+        throw jniEnv->ExceptionOccurred();
+    }
+}
+
 void CarbonWriter::localDictionaryThreshold(int localDictionaryThreshold) {
     if (localDictionaryThreshold < 1) {
         throw std::runtime_error("localDictionaryThreshold parameter should be positive number.");
diff --git a/store/CSDK/src/CarbonWriter.h b/store/CSDK/src/CarbonWriter.h
index fdb0bc6..afb695f 100644
--- a/store/CSDK/src/CarbonWriter.h
+++ b/store/CSDK/src/CarbonWriter.h
@@ -96,6 +96,13 @@ public:
     void withCsvInput(char *jsonSchema);
 
     /**
+     * configure the schema
+     *
+     * @return updated CarbonWriterBuilder
+     */
+    void withCsvInput( );
+
+    /**
     * configure parameter, including ak,sk and endpoint
     *
     * @param key key word
@@ -188,6 +195,12 @@ public:
     void withBlockletSize(int blockletSize);
 
     /**
+     * To set the path of carbon schema file
+     * @param schemaFilePath The path of carbon schema file
+     */
+    void withSchemaFile(char *schemaFilePath);
+
+    /**
      * @param localDictionaryThreshold is localDictionaryThreshold, default is 10000
      * @return updated CarbonWriterBuilder
      */
diff --git a/store/CSDK/test/main.cpp b/store/CSDK/test/main.cpp
index 5128c4b..2e1b5e5 100644
--- a/store/CSDK/test/main.cpp
+++ b/store/CSDK/test/main.cpp
@@ -740,6 +740,110 @@ bool testWriteData(JNIEnv *env, char *path, int argc, char *argv[])
{
     }
 }
 
+bool testWriteDataWithSchemaFile(JNIEnv *env, char *path, int argc, char *argv[]) {
+
+    CarbonReader carbonReader;
+    CarbonWriter writer;
+    try {
+        writer.builder(env);
+        writer.outputPath(path);
+        writer.withCsvInput();
+        writer.withSchemaFile("../../../integration/spark-common/target/warehouse/add_segment_test/Metadata/schema");
+        writer.writtenBy("CSDK");
+        writer.taskNo(15541554.81);
+        writer.withThreadSafe(1);
+        writer.uniqueIdentifier(1549911814000000);
+        writer.withBlockSize(1);
+        writer.withBlockletSize(16);
+        writer.enableLocalDictionary(true);
+        writer.localDictionaryThreshold(10000);
+        if (argc > 3) {
+            writer.withHadoopConf("fs.s3a.access.key", argv[1]);
+            writer.withHadoopConf("fs.s3a.secret.key", argv[2]);
+            writer.withHadoopConf("fs.s3a.endpoint", argv[3]);
+        }
+        writer.build();
+
+        int rowNum = 10;
+        int size = 14;
+        jclass objClass = env->FindClass("java/lang/String");
+        for (int i = 0; i < rowNum; ++i) {
+            jobjectArray arr = env->NewObjectArray(size, objClass, 0);
+            char ctrInt[10];
+            gcvt(i, 10, ctrInt);
+
+            char a[15] = "robot";
+            strcat(a, ctrInt);
+            jobject intField = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 0, intField);
+            jobject stringField = env->NewStringUTF(a);
+            env->SetObjectArrayElement(arr, 1, stringField);
+            jobject string2Field = env->NewStringUTF(a);
+            env->SetObjectArrayElement(arr, 2, string2Field);
+            jobject timeField = env->NewStringUTF("2019-02-12 03:03:34");
+            env->SetObjectArrayElement(arr, 3, timeField);
+            jobject int4Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 4, int4Field);
+            jobject string5Field = env->NewStringUTF(a);
+            env->SetObjectArrayElement(arr, 5, string5Field);
+            jobject int6Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 6, int6Field);
+            jobject string7Field = env->NewStringUTF(a);
+            env->SetObjectArrayElement(arr, 7, string7Field);
+            jobject int8Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 8, int8Field);
+            jobject time9Field = env->NewStringUTF("2019-02-12 03:03:34");
+            env->SetObjectArrayElement(arr, 9, time9Field);
+            jobject dateField = env->NewStringUTF(" 2019-03-02");
+            env->SetObjectArrayElement(arr, 10, dateField);
+            jobject int11Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 11, int11Field);
+            jobject int12Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 12, int12Field);
+            jobject int13Field = env->NewStringUTF(ctrInt);
+            env->SetObjectArrayElement(arr, 13, int13Field);
+
+            writer.write(arr);
+
+            env->DeleteLocalRef(stringField);
+            env->DeleteLocalRef(string2Field);
+            env->DeleteLocalRef(intField);
+            env->DeleteLocalRef(int4Field);
+            env->DeleteLocalRef(string5Field);
+            env->DeleteLocalRef(int6Field);
+            env->DeleteLocalRef(dateField);
+            env->DeleteLocalRef(timeField);
+            env->DeleteLocalRef(string7Field);
+            env->DeleteLocalRef(int8Field);
+            env->DeleteLocalRef(int11Field);
+            env->DeleteLocalRef(int12Field);
+            env->DeleteLocalRef(int13Field);
+            env->DeleteLocalRef(arr);
+        }
+
+        carbonReader.builder(env, path);
+        carbonReader.build();
+        int i = 0;
+        int printNum = 10;
+        CarbonRow carbonRow(env);
+        while (carbonReader.hasNext()) {
+            jobject row = carbonReader.readNextRow();
+            i++;
+            carbonRow.setCarbonRow(row);
+            if (i < printNum) {
+                printf("%s\t%d\t%ld\t", carbonRow.getString(1));
+            }
+            env->DeleteLocalRef(row);
+        }
+    } catch (jthrowable ex) {
+        env->ExceptionDescribe();
+        env->ExceptionClear();
+    }
+    finally:
+        carbonReader.close();
+        writer.close();
+}
+
 void writeData(JNIEnv *env, CarbonWriter writer, int size, jclass objClass, char *stringField,
short shortField) {
     jobjectArray arr = env->NewObjectArray(size, objClass, 0);
 
@@ -932,6 +1036,7 @@ int main(int argc, char *argv[]) {
         testValidateEscapeCharWithImproperValue(env, "./test");
         testWriteData(env, "./data", 1, argv);
         testWriteData(env, "./dataLoadOption", 1, argv);
+        testWriteDataWithSchemaFile(env, "./data122301", 1, argv);
         readFromLocalWithoutProjection(env, smallFilePath);
         readFromLocalWithProjection(env, smallFilePath);
         testWithTableProperty(env, "./dataProperty", 1, argv);


Mime
View raw message