DRILL-4919: Fix select count(1) / count(*) on csv with header
This closes #714
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34969583
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34969583
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34969583
Branch: refs/heads/master
Commit: 34969583bfab410c80cb14a1c20249f097d5f7a7
Parents: 535623b
Author: Arina Ielchiieva <arina.yelchiyeva@gmail.com>
Authored: Thu Dec 29 15:42:53 2016 +0000
Committer: Parth Chandra <parthc@apache.org>
Committed: Fri Jan 13 17:46:13 2017 -0800
----------------------------------------------------------------------
.../compliant/CompliantTextRecordReader.java | 18 +++++++++++++++-
.../drill/exec/store/text/TestCsvHeader.java | 22 ++++++++++++++++++--
2 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index d324270..ac4abb9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.easy.text.compliant;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.univocity.parsers.common.TextParsingException;
import io.netty.buffer.DrillBuf;
@@ -51,8 +52,12 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompliantTextRecordReader.class);
private static final int MAX_RECORDS_PER_BATCH = 8096;
- static final int READ_BUFFER = 1024*1024;
+ private static final int READ_BUFFER = 1024*1024;
private static final int WHITE_SPACE_BUFFER = 64*1024;
+ // When no named column is required, ask SCAN to return a DEFAULT column.
+ // If such column does not exist, it will be returned as a nullable-int column.
+ private static final List<SchemaPath> DEFAULT_NAMED_TEXT_COLS_TO_READ =
+ ImmutableList.of(SchemaPath.getSimplePath("_DEFAULT_COL_TO_READ_"));
// settings to be used while parsing
private TextParsingSettings settings;
@@ -89,8 +94,19 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
return super.isStarQuery();
}
+ /**
+ * Returns list of default columns to read to replace empty list of columns.
+ * For text files without headers returns "columns[0]".
+ * Text files with headers do not support columns syntax,
+ * so when header extraction is enabled, returns fake named column "_DEFAULT_COL_TO_READ_".
+ *
+ * @return list of default columns to read
+ */
@Override
protected List<SchemaPath> getDefaultColumnsToRead() {
+ if (settings.isHeaderExtractionEnabled()) {
+ return DEFAULT_NAMED_TEXT_COLS_TO_READ;
+ }
return DEFAULT_TEXT_COLS_TO_READ;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index a2e548b..cf54bb0 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -17,6 +17,7 @@
*/
package org.apache.drill.exec.store.text;
+import com.google.common.collect.Lists;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.TestBuilder;
import org.apache.drill.common.util.FileUtils;
@@ -24,14 +25,14 @@ import org.apache.drill.common.util.FileUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
+import java.util.List;
import org.junit.Before;
import org.junit.Test;
public class TestCsvHeader extends BaseTestQuery{
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCsvHeader.class);
- String root;
+ private String root;
@Before
public void initialize() throws Exception {
@@ -185,4 +186,21 @@ public class TestCsvHeader extends BaseTestQuery{
}
builder.go();
}
+
+ @Test
+ public void testCountOnCsvWithHeader() throws Exception {
+ final String query = "select count(%s) as cnt from %s.`%s`";
+ final List<Object> options = Lists.<Object>newArrayList("*", 1, "'A'");
+
+ for (Object option : options) {
+ testBuilder()
+ .sqlQuery(query, option, TEMP_SCHEMA, root)
+ .unOrdered()
+ .baselineColumns("cnt")
+ .baselineValues(4L)
+ .build()
+ .run();
+ }
+ }
+
}
|