orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-162. Handle 0 byte files as empty ORC files. [Forced Update!]
Date Tue, 29 Aug 2017 20:44:50 GMT
Repository: orc
Updated Branches:
  refs/heads/orc-162 3d5561cc4 -> 0a2fb7065 (forced update)


ORC-162. Handle 0 byte files as empty ORC files.

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/0a2fb706
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/0a2fb706
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/0a2fb706

Branch: refs/heads/orc-162
Commit: 0a2fb7065d48c5fa47705b8252d019c8489ab4d5
Parents: 1b30e32
Author: Owen O'Malley <omalley@apache.org>
Authored: Tue Aug 29 11:07:39 2017 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Tue Aug 29 13:34:38 2017 -0700

----------------------------------------------------------------------
 examples/zero.orc                               |  0
 .../java/org/apache/orc/TypeDescription.java    | 11 +++--
 .../java/org/apache/orc/impl/ReaderImpl.java    | 44 ++++++++++++++++++--
 .../src/test/org/apache/orc/TestReader.java     |  5 ++-
 .../test/org/apache/orc/TestVectorOrcFile.java  | 26 +++++++++++-
 5 files changed, 77 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/0a2fb706/examples/zero.orc
----------------------------------------------------------------------
diff --git a/examples/zero.orc b/examples/zero.orc
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/orc/blob/0a2fb706/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index b0857f0..7f4d241 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -315,12 +315,17 @@ public class TypeDescription
 
   static void parseStruct(TypeDescription type, StringPosition source) {
     requireChar(source, '<');
-    do {
+    boolean needComma = false;
+    while (!consumeChar(source, '>')) {
+      if (needComma) {
+        requireChar(source, ',');
+      } else {
+        needComma = true;
+      }
       String fieldName = parseName(source);
       requireChar(source, ':');
       type.addField(fieldName, parseType(source));
-    } while (consumeChar(source, ','));
-    requireChar(source, '>');
+    }
   }
 
   static TypeDescription parseType(StringPosition source) {

http://git-wip-us.apache.org/repos/asf/orc/blob/0a2fb706/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 15d3c3f..130048b 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -491,6 +491,40 @@ public class ReaderImpl implements Reader {
     return new OrcTail(fileTailBuilder.build(), buffer.slice(), modificationTime);
   }
 
+  /**
+   * Build a virtual OrcTail for empty files.
+   * @return a new OrcTail
+   */
+  OrcTail buildEmptyTail() {
+    OrcProto.PostScript.Builder postscript = OrcProto.PostScript.newBuilder();
+    OrcFile.Version version = OrcFile.Version.CURRENT;
+    postscript.setMagic(OrcFile.MAGIC)
+        .setCompression(OrcProto.CompressionKind.NONE)
+        .setFooterLength(0)
+        .addVersion(version.getMajor())
+        .addVersion(version.getMinor())
+        .setMetadataLength(0)
+        .setWriterVersion(OrcFile.CURRENT_WRITER.getId());
+
+    // Use a struct with no fields
+    OrcProto.Type.Builder struct = OrcProto.Type.newBuilder();
+    struct.setKind(OrcProto.Type.Kind.STRUCT);
+
+    OrcProto.Footer.Builder footer = OrcProto.Footer.newBuilder();
+    footer.setHeaderLength(0)
+          .setContentLength(0)
+          .addTypes(struct)
+          .setNumberOfRows(0)
+          .setRowIndexStride(0);
+
+    OrcProto.FileTail.Builder result = OrcProto.FileTail.newBuilder();
+    result.setFooter(footer);
+    result.setPostscript(postscript);
+    result.setFileLength(0);
+    result.setPostscriptLength(0);
+    return new OrcTail(result.build(), null);
+  }
+
   protected OrcTail extractFileTail(FileSystem fs, Path path,
       long maxFileLength) throws IOException {
     FSDataInputStream file = fs.open(path);
@@ -509,9 +543,13 @@ public class ReaderImpl implements Reader {
         size = maxFileLength;
         modificationTime = -1;
       }
-      // Anything lesser than MAGIC header cannot be valid (valid ORC file is actually around
45 bytes, this is
-      // more conservative)
-      if (size <= OrcFile.MAGIC.length()) {
+      if (size == 0) {
+        // Hive often creates empty files (including ORC) and has an
+        // optimization to create a 0 byte file as an empty ORC file.
+        return buildEmptyTail();
+      } else if (size <= OrcFile.MAGIC.length()) {
+        // Anything smaller than MAGIC header cannot be valid (valid ORC files
+	// are actually around 40 bytes, this is more conservative)
         throw new FileFormatException("Not a valid ORC file " + path
           + " (maxFileLength= " + maxFileLength + ")");
       }

http://git-wip-us.apache.org/repos/asf/orc/blob/0a2fb706/java/core/src/test/org/apache/orc/TestReader.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestReader.java b/java/core/src/test/org/apache/orc/TestReader.java
index fc4ebdc..61fd6e2 100644
--- a/java/core/src/test/org/apache/orc/TestReader.java
+++ b/java/core/src/test/org/apache/orc/TestReader.java
@@ -49,13 +49,14 @@ public class TestReader {
     fs.delete(testFilePath, false);
   }
 
-  @Test(expected=FileFormatException.class)
+  @Test
   public void testReadZeroLengthFile() throws Exception {
     FSDataOutputStream fout = fs.create(testFilePath);
     fout.close();
     assertEquals(0, fs.getFileStatus(testFilePath).getLen());
-    OrcFile.createReader(testFilePath,
+    Reader reader = OrcFile.createReader(testFilePath,
         OrcFile.readerOptions(conf).filesystem(fs));
+    assertEquals(0, reader.getNumberOfRows());
   }
 
   @Test(expected=FileFormatException.class)

http://git-wip-us.apache.org/repos/asf/orc/blob/0a2fb706/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 9373216..6fe132b 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -3250,4 +3250,28 @@ public class TestVectorOrcFile {
     assertEquals(fromString("baz"), reader.getMetadataValue("c"));
     assertEquals(fromString("bat"), reader.getMetadataValue("d"));
   }
+
+  Path exampleDir = new Path(System.getProperty("example.dir",
+      "../../examples/"));
+
+  @Test
+  public void testZeroByteOrcFile() throws Exception {
+    Path zeroFile = new Path(exampleDir, "zero.orc");
+    Reader reader = OrcFile.createReader(zeroFile, OrcFile.readerOptions(conf));
+    assertEquals(0, reader.getNumberOfRows());
+    assertEquals("struct<>", reader.getSchema().toString());
+    assertEquals(CompressionKind.NONE, reader.getCompressionKind());
+    assertEquals(0, reader.getRawDataSize());
+    assertEquals(0, reader.getRowIndexStride());
+    assertEquals(0, reader.getCompressionSize());
+    assertEquals(0, reader.getMetadataSize());
+    assertEquals(OrcFile.Version.CURRENT, reader.getFileVersion());
+    assertEquals(0, reader.getStripes().size());
+    assertEquals(0, reader.getStatistics().length);
+    assertEquals(0, reader.getMetadataKeys().size());
+    assertEquals(OrcFile.CURRENT_WRITER, reader.getWriterVersion());
+    VectorizedRowBatch batch =
+        TypeDescription.fromString("struct<>").createRowBatch();
+    assertEquals(false, reader.rows().nextBatch(batch));
+  }
 }


Mime
View raw message