orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-233 Allow `orc.include.columns` to be empty
Date Tue, 29 Aug 2017 17:07:09 GMT
Repository: orc
Updated Branches:
  refs/heads/branch-1.4 719ec5992 -> 5e767644f


ORC-233 Allow `orc.include.columns` to be empty

Fixes #160

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/5e767644
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/5e767644
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/5e767644

Branch: refs/heads/branch-1.4
Commit: 5e767644f9a7531f42f3bb9fe84cc3887680a1e9
Parents: 719ec59
Author: Ajay Yadava <ajayyadava@apache.org>
Authored: Mon Aug 21 10:35:46 2017 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Tue Aug 29 10:06:55 2017 -0700

----------------------------------------------------------------------
 .../org/apache/orc/mapred/OrcInputFormat.java   |  7 +++-
 .../mapreduce/TestMapreduceOrcOutputFormat.java | 42 ++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/5e767644/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
index ef92fd1..9e011de 100644
--- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
+++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcInputFormat.java
@@ -22,6 +22,7 @@ import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
@@ -30,7 +31,6 @@ import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.RecordReader;
 
-  
 import java.io.IOException;
 import java.util.List;
 
@@ -62,8 +62,13 @@ public class OrcInputFormat<V extends WritableComparable>
         schema.getCategory() != TypeDescription.Category.STRUCT) {
       return null;
     }
+
     boolean[] result = new boolean[schema.getMaximumId() + 1];
     result[0] = true;
+    if (StringUtils.isBlank(columnsStr)) {
+      return result;
+    }
+
     List<TypeDescription> types = schema.getChildren();
     for(String idString: columnsStr.split(",")) {
       TypeDescription type = types.get(Integer.parseInt(idString));

http://git-wip-us.apache.org/repos/asf/orc/blob/5e767644/java/mapreduce/src/test/org/apache/orc/mapreduce/TestMapreduceOrcOutputFormat.java
----------------------------------------------------------------------
diff --git a/java/mapreduce/src/test/org/apache/orc/mapreduce/TestMapreduceOrcOutputFormat.java
b/java/mapreduce/src/test/org/apache/orc/mapreduce/TestMapreduceOrcOutputFormat.java
index 27543c1..6a139ec 100644
--- a/java/mapreduce/src/test/org/apache/orc/mapreduce/TestMapreduceOrcOutputFormat.java
+++ b/java/mapreduce/src/test/org/apache/orc/mapreduce/TestMapreduceOrcOutputFormat.java
@@ -153,6 +153,48 @@ public class TestMapreduceOrcOutputFormat {
     assertEquals(false, reader.nextKeyValue());
   }
 
+  @Test
+  public void testColumnSelectionBlank() throws Exception {
+    String typeStr = "struct<i:int,j:int,k:int>";
+    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
+    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
+    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
+    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
+    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
+    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
+    OutputFormat<NullWritable, OrcStruct> outputFormat =
+        new OrcOutputFormat<OrcStruct>();
+    RecordWriter<NullWritable, OrcStruct> writer =
+        outputFormat.getRecordWriter(attemptContext);
+
+    // write 3000 rows with the integer and the binary string
+    TypeDescription type = TypeDescription.fromString(typeStr);
+    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
+    NullWritable nada = NullWritable.get();
+    for (int r = 0; r < 3000; ++r) {
+      row.setFieldValue(0, new IntWritable(r));
+      row.setFieldValue(1, new IntWritable(r * 2));
+      row.setFieldValue(2, new IntWritable(r * 3));
+      writer.write(nada, row);
+    }
+    writer.close(attemptContext);
+
+    conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "");
+    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"),
+        0, 1000000, new String[0]);
+    RecordReader<NullWritable, OrcStruct> reader =
+        new OrcInputFormat<OrcStruct>().createRecordReader(split,
+            attemptContext);
+    // the sarg should cause it to skip over the rows except 1000 to 2000
+    for (int r = 0; r < 3000; ++r) {
+      assertEquals(true, reader.nextKeyValue());
+      row = reader.getCurrentValue();
+      assertEquals(null, row.getFieldValue(0));
+      assertEquals(null, row.getFieldValue(1));
+      assertEquals(null, row.getFieldValue(2));
+    }
+    assertEquals(false, reader.nextKeyValue());
+  }
 
   /**
    * Make sure that the writer ignores the OrcKey


Mime
View raw message