carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From manishgupt...@apache.org
Subject carbondata git commit: [CARBONDATA-1893] Data load with multiple QUOTECHAR characters in syntax should fail
Date Sat, 16 Dec 2017 06:25:33 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 399db8f11 -> eb7cf54ef


[CARBONDATA-1893] Data load with multiple QUOTECHAR characters in syntax should fail

During data load operation if quote character provided by user has lenght greater than 1,
then data load should fail

This closes #1653


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/eb7cf54e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/eb7cf54e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/eb7cf54e

Branch: refs/heads/master
Commit: eb7cf54ef352acf7c58372beddbedeb9698c48e6
Parents: 399db8f
Author: dhatchayani <dhatcha.official@gmail.com>
Authored: Wed Dec 13 16:54:49 2017 +0530
Committer: manishgupta88 <tomanishgupta18@gmail.com>
Committed: Sat Dec 16 11:58:40 2017 +0530

----------------------------------------------------------------------
 .../carbondata/core/enums/EscapeSequences.java  | 48 ++++++++++++
 .../src/test/resources/dataretention11.csv      | 13 ++++
 .../testsuite/dataload/TestLoadOptions.scala    | 80 ++++++++++++++++++++
 .../carbondata/spark/util/DataLoadingUtil.scala |  5 +-
 .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 26 ++++++-
 .../processing/util/CarbonLoaderUtil.java       | 35 ++++++++-
 6 files changed, 204 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/core/src/main/java/org/apache/carbondata/core/enums/EscapeSequences.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/enums/EscapeSequences.java b/core/src/main/java/org/apache/carbondata/core/enums/EscapeSequences.java
new file mode 100644
index 0000000..e1fcca7
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/enums/EscapeSequences.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.enums;
+
+public enum EscapeSequences {
+
+  NEW_LINE("\\n", '\n'), BACKSPACE("\\b", '\b'), TAB("\\t", '\t'), CARRIAGE_RETURN("\\r",
'\r');
+
+  /**
+   * name of the function
+   */
+  private String name;
+
+  /**
+   * unicode of the escapechar
+   */
+  private char escapeChar;
+
+
+  EscapeSequences(String name, char escapeChar) {
+    this.name = name;
+    this.escapeChar = escapeChar;
+  }
+
+  public String getName() {
+    return this.name;
+  }
+
+  public String getEscapeChar() {
+    return String.valueOf(this.escapeChar);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/integration/spark-common-test/src/test/resources/dataretention11.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/dataretention11.csv b/integration/spark-common-test/src/test/resources/dataretention11.csv
new file mode 100644
index 0000000..7cba62e
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/dataretention11.csv
@@ -0,0 +1,13 @@
+ID,date,country,name,phonetype,serialname,salary
+1,2015/7/23,ind,aaa1,phone197,"ASD69643
+a",15000
+2,2015/7/24,ind,aaa2,phone756,"ASD42892
+b",15001
+3,2015/7/25,ind,aaa3,phone1904,ASD37014,15002
+4,2015/7/26,ind,aaa4,phone2435,ASD66902,15003
+5,2015/7/27,ind,aaa5,phone2441,ASD90633,15004
+6,2015/7/28,ind,aaa6,phone294,ASD59961,15005
+7,2015/7/29,ind,aaa7,phone610,ASD14875,15006
+8,2015/7/30,ind,aaa8,phone1848,ASD57308,15007
+9,2015/7/18,ind,aaa9,phone706,ASD86717,15008
+10,2015/7/19,usa,aaa10,phone685,ASD30505,15009
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadOptions.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadOptions.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadOptions.scala
new file mode 100644
index 0000000..d2c7e63
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadOptions.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.dataload
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
+
+class TestLoadOptions extends QueryTest with BeforeAndAfterAll{
+
+  override def beforeAll {
+    sql("drop table if exists TestLoadTableOptions")
+    sql("CREATE table TestLoadTableOptions (ID int, date String, country String, name String,"
+
+        "phonetype String, serialname String, salary int) stored by 'org.apache.carbondata.format'")
+  }
+
+  override def afterAll {
+    sql("drop table if exists TestLoadTableOptions")
+  }
+
+
+  test("test load data with more than one char in quotechar option") {
+    val errorMessage = intercept[MalformedCarbonCommandException] {
+      sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE " +
+          s"TestLoadTableOptions OPTIONS('QUOTECHAR'='\\\\')")
+    }.getMessage
+    assert(errorMessage.equals("QUOTECHAR cannot be more than one character."))
+  }
+
+  test("test load data with more than one char in commentchar option") {
+    val errorMessage = intercept[MalformedCarbonCommandException] {
+      sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE " +
+          s"TestLoadTableOptions OPTIONS('COMMENTCHAR'='##')")
+      assert(false)
+    }.getMessage
+    assert(errorMessage.equals("COMMENTCHAR cannot be more than one character."))
+  }
+
+  test("test load data with more than one char in escapechar option") {
+    val errorMessage = intercept[MalformedCarbonCommandException] {
+      sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE " +
+          s"TestLoadTableOptions OPTIONS('ESCAPECHAR'='\\\\')")
+      assert(false)
+    }.getMessage
+    assert(errorMessage.equals("ESCAPECHAR cannot be more than one character."))
+  }
+
+  test("test load data with invalid escape sequence in escapechar option") {
+    val errorMessage = intercept[MalformedCarbonCommandException] {
+      sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE " +
+          s"TestLoadTableOptions OPTIONS('ESCAPECHAR'='\\y')")
+    }.getMessage
+    assert(errorMessage.equals("ESCAPECHAR cannot be more than one character."))
+  }
+
+  test("test load data with with valid escape sequence in escapechar option") {
+    sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention11.csv' INTO TABLE " +
+        s"TestLoadTableOptions OPTIONS('ESCAPECHAR'='\\n')")
+    checkAnswer(sql("select * from TestLoadTableOptions where serialname='ASD69643a'"),
+      Row(1, "2015/7/23", "ind", "aaa1", "phone197", "ASD69643a", 15000))
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
index 69c9fe4..faba26d 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/DataLoadingUtil.scala
@@ -55,9 +55,12 @@ object DataLoadingUtil {
     optionsFinal.put("delimiter", options.getOrElse("delimiter", ","))
     optionsFinal.put("quotechar", options.getOrElse("quotechar", "\""))
     optionsFinal.put("fileheader", options.getOrElse("fileheader", ""))
-    optionsFinal.put("escapechar", options.getOrElse("escapechar", "\\"))
     optionsFinal.put("commentchar", options.getOrElse("commentchar", "#"))
     optionsFinal.put("columndict", options.getOrElse("columndict", null))
+
+    optionsFinal.put("escapechar",
+      CarbonLoaderUtil.getEscapeChar(options.getOrElse("escapechar", "\\")))
+
     optionsFinal.put(
       "serialization_null_format",
       options.getOrElse("serialization_null_format", "\\N"))

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 9b3d969..1c0e58b 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -28,7 +28,6 @@ import scala.util.matching.Regex
 
 import org.apache.hadoop.hive.ql.lib.Node
 import org.apache.hadoop.hive.ql.parse._
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.util.CarbonException
@@ -42,6 +41,7 @@ import org.apache.carbondata.core.metadata.schema.PartitionInfo
 import org.apache.carbondata.core.metadata.schema.partition.PartitionType
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema
 import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
+import org.apache.carbondata.processing.util.CarbonLoaderUtil
 import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
 import org.apache.carbondata.spark.util.{CommonUtil, DataTypeConverterUtil}
 
@@ -873,6 +873,30 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser
{
       throw new MalformedCarbonCommandException(errorMessage)
     }
 
+    // Validate QUOTECHAR length
+    if (options.exists(_._1.equalsIgnoreCase("QUOTECHAR"))) {
+      val quoteChar: String = options.get("quotechar").get.head._2
+      if (quoteChar.length > 1 ) {
+        throw new MalformedCarbonCommandException("QUOTECHAR cannot be more than one character.")
+      }
+    }
+
+    // Validate COMMENTCHAR length
+    if (options.exists(_._1.equalsIgnoreCase("COMMENTCHAR"))) {
+      val commentChar: String = options.get("commentchar").get.head._2
+      if (commentChar.length > 1) {
+        throw new MalformedCarbonCommandException("COMMENTCHAR cannot be more than one character.")
+      }
+    }
+
+    // Validate ESCAPECHAR length
+    if (options.exists(_._1.equalsIgnoreCase("ESCAPECHAR"))) {
+      val escapechar: String = options.get("escapechar").get.head._2
+      if (escapechar.length > 1 && !CarbonLoaderUtil.isValidEscapeSequence(escapechar))
{
+        throw new MalformedCarbonCommandException("ESCAPECHAR cannot be more than one character.")
+      }
+    }
+
     //  COLUMNDICT and ALL_DICTIONARY_PATH can not be used together.
     if (options.exists(_._1.equalsIgnoreCase("COLUMNDICT")) &&
         options.exists(_._1.equalsIgnoreCase("ALL_DICTIONARY_PATH"))) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb7cf54e/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
index 9e6a73e..6ac8307 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java
@@ -24,7 +24,17 @@ import java.net.InetAddress;
 import java.net.UnknownHostException;
 import java.nio.charset.Charset;
 import java.text.SimpleDateFormat;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 import org.apache.carbondata.common.logging.LogService;
 import org.apache.carbondata.common.logging.LogServiceFactory;
@@ -58,9 +68,11 @@ import org.apache.carbondata.core.util.path.CarbonTablePath;
 import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
 import org.apache.carbondata.processing.merger.NodeBlockRelation;
 import org.apache.carbondata.processing.merger.NodeMultiBlockRelation;
+import static org.apache.carbondata.core.enums.EscapeSequences.*;
 
 import com.google.gson.Gson;
 
+
 public final class CarbonLoaderUtil {
 
   private static final LogService LOGGER =
@@ -329,6 +341,27 @@ public final class CarbonLoaderUtil {
     return date;
   }
 
+  public static boolean isValidEscapeSequence(String escapeChar) {
+    return escapeChar.equalsIgnoreCase(NEW_LINE.getName()) ||
+        escapeChar.equalsIgnoreCase(CARRIAGE_RETURN.getName()) ||
+        escapeChar.equalsIgnoreCase(TAB.getName()) ||
+        escapeChar.equalsIgnoreCase(BACKSPACE.getName());
+  }
+
+  public static String getEscapeChar(String escapeCharacter) {
+    if (escapeCharacter.equalsIgnoreCase(NEW_LINE.getName())) {
+      return NEW_LINE.getEscapeChar();
+    } else if (escapeCharacter.equalsIgnoreCase(BACKSPACE.getName())) {
+      return BACKSPACE.getEscapeChar();
+    } else if (escapeCharacter.equalsIgnoreCase(TAB.getName())) {
+      return TAB.getEscapeChar();
+    } else if (escapeCharacter.equalsIgnoreCase(CARRIAGE_RETURN.getName())) {
+      return CARRIAGE_RETURN.getEscapeChar();
+    }
+    return escapeCharacter;
+  }
+
+
   public static Dictionary getDictionary(DictionaryColumnUniqueIdentifier columnIdentifier)
       throws IOException {
     Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache =


Mime
View raw message