orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-88. Add --raw mode for orc-metadata to see the raw protobuf structures of the file tail. (omalley)
Date Thu, 04 Aug 2016 15:42:55 GMT
Repository: orc
Updated Branches:
  refs/heads/master 14b818adc -> 3bad3dfba


ORC-88. Add --raw mode for orc-metadata to see the raw protobuf
structures of the file tail. (omalley)

Fixes #50.

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/3bad3dfb
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/3bad3dfb
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/3bad3dfb

Branch: refs/heads/master
Commit: 3bad3dfba6270119e55b2c9aeb529abcc4179b9e
Parents: 14b818a
Author: Owen O'Malley <omalley@apache.org>
Authored: Mon Aug 1 16:31:13 2016 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Thu Aug 4 08:26:20 2016 -0700

----------------------------------------------------------------------
 README.md                      |  12 ++-
 tools/src/FileMetadata.cc      |  31 +++++-
 tools/test/CMakeLists.txt      |   1 +
 tools/test/TestFileMetadata.cc | 207 ++++++++++++++++++++++++++++++++++++
 4 files changed, 247 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/3bad3dfb/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 72a121c..a6072b3 100644
--- a/README.md
+++ b/README.md
@@ -21,8 +21,18 @@ format. The C++ and Java libraries are completely independent of each
 other and will each read all versions of ORC files.
 
 The current build status:
-* Apache ORC master <a href="https://travis-ci.org/apache/orc/branches">
+* Master branch <a href="https://travis-ci.org/apache/orc/branches">
 ![master build status](https://travis-ci.org/apache/orc.svg?branch=master)</a>
+* <a href="https://travis-ci.org/apache/orc/pull_requests">Pull Requests</a>
+
+The subdirectories are:
+* c++ - the c++ reader
+* docker - docker scripts to build and test on various linuxes
+* examples - various ORC example files that are used to test compatibility
+* java - the java reader and writer
+* proto - the protocol buffer definition for the ORC metadata
+* site - the website and documentation
+* tools - the c++ tools for reading and inspecting ORC files
 
 ### Building
 

http://git-wip-us.apache.org/repos/asf/orc/blob/3bad3dfb/tools/src/FileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index c292ebd..68ca208 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -25,6 +25,8 @@
 
 #include "orc/OrcFile.hh"
 #include "Adaptor.hh"
+#include "Exceptions.hh"
+#include "wrap/orc-proto-wrapper.hh"
 
 void printStripeInformation(std::ostream& out,
                             uint64_t index,
@@ -77,6 +79,19 @@ void printStripeInformation(std::ostream& out,
   out << "\n    }";
 }
 
+void printRawTail(std::ostream& out,
+                  const char*filename) {
+  out << "Raw file tail: " << filename << "\n";
+  std::unique_ptr<orc::Reader> reader =
+    orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
+  // Parse the file tail from the serialized one.
+  orc::proto::FileTail tail;
+  if (!tail.ParseFromString(reader->getSerializedFileTail())) {
+    throw orc::ParseError("Failed to parse the file tail from string");
+  }
+  out << tail.DebugString();
+}
+
 void printMetadata(std::ostream & out, const char*filename, bool verbose) {
   std::unique_ptr<orc::Reader> reader =
     orc::createReader(orc::readLocalFile(filename), orc::ReaderOptions());
@@ -136,14 +151,16 @@ void printMetadata(std::ostream & out, const char*filename, bool
verbose) {
 int main(int argc, char* argv[]) {
   static struct option longOptions[] = {
     {"help", no_argument, ORC_NULLPTR, 'h'},
+    {"raw", no_argument, ORC_NULLPTR, 'r'},
     {"verbose", no_argument, ORC_NULLPTR, 'v'},
     {ORC_NULLPTR, 0, ORC_NULLPTR, 0}
   };
   bool helpFlag = false;
   bool verboseFlag = false;
+  bool rawFlag = false;
   int opt;
   do {
-    opt = getopt_long(argc, argv, "hv", longOptions, ORC_NULLPTR);
+    opt = getopt_long(argc, argv, "hrv", longOptions, ORC_NULLPTR);
     switch (opt) {
     case '?':
     case 'h':
@@ -153,6 +170,9 @@ int main(int argc, char* argv[]) {
     case 'v':
       verboseFlag = true;
       break;
+    case 'r':
+      rawFlag = true;
+      break;
     }
   } while (opt != -1);
   argc -= optind;
@@ -160,12 +180,17 @@ int main(int argc, char* argv[]) {
 
   if (argc < 1 || helpFlag) {
     std::cerr
-      << "Usage: orc-metadata [-h] [--help] [-v] [--verbose] <filename>\n";
+      << "Usage: orc-metadata [-h] [--help] [-r] [--raw] [-v] [--verbose]"
+      << " <filename>\n";
     exit(1);
   } else {
     for(int i=0; i < argc; ++i) {
       try {
-        printMetadata(std::cout, argv[i], verboseFlag);
+        if (rawFlag) {
+          printRawTail(std::cout, argv[i]);
+        } else {
+          printMetadata(std::cout, argv[i], verboseFlag);
+        }
       } catch (std::exception& ex) {
         std::cerr << "Caught exception in " << argv[i]
                   << ": " << ex.what() << "\n";

http://git-wip-us.apache.org/repos/asf/orc/blob/3bad3dfb/tools/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/tools/test/CMakeLists.txt b/tools/test/CMakeLists.txt
index 0669e67..121f7f7 100644
--- a/tools/test/CMakeLists.txt
+++ b/tools/test/CMakeLists.txt
@@ -26,6 +26,7 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
 add_executable (tool-test
   gzip.cc
   TestReader.cc
+  TestFileMetadata.cc
   TestFileScan.cc
   ToolTest.cc
 )

http://git-wip-us.apache.org/repos/asf/orc/blob/3bad3dfb/tools/test/TestFileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
new file mode 100644
index 0000000..4b3a341
--- /dev/null
+++ b/tools/test/TestFileMetadata.cc
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "Adaptor.hh"
+#include "ToolTest.hh"
+
+#include "wrap/gmock.h"
+#include "wrap/gtest-wrapper.h"
+
+TEST (TestFileMetadata, testRaw) {
+  const std::string pgm = findProgram("tools/src/orc-metadata");
+  const std::string file = findExample("orc_split_elim.orc");
+  const std::string expected =
+    "Raw file tail: " + file + "\n"
+    "postscript {\n"
+    "  footerLength: 288\n"
+    "  compression: NONE\n"
+    "  version: 0\n"
+    "  version: 12\n"
+    "  metadataLength: 526\n"
+    "  magic: \"ORC\"\n"
+    "}\n"
+    "footer {\n"
+    "  headerLength: 3\n"
+    "  contentLength: 245568\n"
+    "  stripes {\n"
+    "    offset: 3\n"
+    "    indexLength: 137\n"
+    "    dataLength: 45282\n"
+    "    footerLength: 149\n"
+    "    numberOfRows: 5000\n"
+    "  }\n"
+    "  stripes {\n"
+    "    offset: 45571\n"
+    "    indexLength: 137\n"
+    "    dataLength: 45282\n"
+    "    footerLength: 149\n"
+    "    numberOfRows: 5000\n"
+    "  }\n"
+    "  stripes {\n"
+    "    offset: 91139\n"
+    "    indexLength: 137\n"
+    "    dataLength: 45282\n"
+    "    footerLength: 149\n"
+    "    numberOfRows: 5000\n"
+    "  }\n"
+    "  stripes {\n"
+    "    offset: 136707\n"
+    "    indexLength: 138\n"
+    "    dataLength: 45283\n"
+    "    footerLength: 149\n"
+    "    numberOfRows: 5000\n"
+    "  }\n"
+    "  stripes {\n"
+    "    offset: 200000\n"
+    "    indexLength: 137\n"
+    "    dataLength: 45282\n"
+    "    footerLength: 149\n"
+    "    numberOfRows: 5000\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: STRUCT\n"
+    "    subtypes: 1\n"
+    "    subtypes: 2\n"
+    "    subtypes: 3\n"
+    "    subtypes: 4\n"
+    "    subtypes: 5\n"
+    "    fieldNames: \"userid\"\n"
+    "    fieldNames: \"string1\"\n"
+    "    fieldNames: \"subtype\"\n"
+    "    fieldNames: \"decimal1\"\n"
+    "    fieldNames: \"ts\"\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: LONG\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: STRING\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: DOUBLE\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: DECIMAL\n"
+    "  }\n"
+    "  types {\n"
+    "    kind: TIMESTAMP\n"
+    "  }\n"
+    "  numberOfRows: 25000\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "  }\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "    intStatistics {\n"
+    "      minimum: 2\n"
+    "      maximum: 100\n"
+    "      sum: 2499619\n"
+    "    }\n"
+    "  }\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "    stringStatistics {\n"
+    "      minimum: \"bar\"\n"
+    "      maximum: \"zebra\"\n"
+    "      sum: 124990\n"
+    "    }\n"
+    "  }\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "    doubleStatistics {\n"
+    "      minimum: 0.8\n"
+    "      maximum: 80\n"
+    "      sum: 200051.40000000002\n"
+    "    }\n"
+    "  }\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "    decimalStatistics {\n"
+    "      minimum: \"0\"\n"
+    "      maximum: \"5.5\"\n"
+    "      sum: \"16.6\"\n"
+    "    }\n"
+    "  }\n"
+    "  statistics {\n"
+    "    numberOfValues: 25000\n"
+    "  }\n"
+    "  rowIndexStride: 10000\n"
+    "}\n"
+    "fileLength: 246402\n"
+    "postscriptLength: 19\n";
+  std::string output;
+  std::string error;
+
+  EXPECT_EQ(0, runProgram({pgm, "-r", file}, output, error));
+  EXPECT_EQ(expected, output);
+  EXPECT_EQ("", error);
+
+  EXPECT_EQ(0, runProgram({pgm, "--raw", file}, output, error));
+  EXPECT_EQ(expected, output);
+  EXPECT_EQ("", error);
+}
+
+TEST (TestFileMetadata, testJson) {
+  const std::string pgm = findProgram("tools/src/orc-metadata");
+  const std::string file = findExample("orc_split_elim.orc");
+  const std::string expected =
+    "{ \"name\": \"" + file + "\",\n"
+    "  \"type\": \"struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(0,0),ts:timestamp>\",\n"
+    "  \"rows\": 25000,\n"
+    "  \"stripe count\": 5,\n"
+    "  \"format\": \"0.12\", \"writer version\": \"original\",\n"
+    "  \"compression\": \"none\",\n"
+    "  \"file length\": 246402,\n"
+    "  \"content\": 245568, \"stripe stats\": 526, \"footer\": 288, \"postscript\": 19,\n"
+    "  \"row index stride\": 10000,\n"
+    "  \"user metadata\": {\n"
+    "  },\n"
+    "  \"stripes\": [\n"
+    "    { \"stripe\": 0, \"rows\": 5000,\n"
+    "      \"offset\": 3, \"length\": 45568,\n"
+    "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
+    "    },\n"
+    "    { \"stripe\": 1, \"rows\": 5000,\n"
+    "      \"offset\": 45571, \"length\": 45568,\n"
+    "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
+    "    },\n"
+    "    { \"stripe\": 2, \"rows\": 5000,\n"
+    "      \"offset\": 91139, \"length\": 45568,\n"
+    "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
+    "    },\n"
+    "    { \"stripe\": 3, \"rows\": 5000,\n"
+    "      \"offset\": 136707, \"length\": 45570,\n"
+    "      \"index\": 138, \"data\": 45283, \"footer\": 149\n"
+    "    },\n"
+    "    { \"stripe\": 4, \"rows\": 5000,\n"
+    "      \"offset\": 200000, \"length\": 45568,\n"
+    "      \"index\": 137, \"data\": 45282, \"footer\": 149\n"
+    "    }\n"
+    "  ]\n"
+    "}\n";
+
+  std::string output;
+  std::string error;
+
+  EXPECT_EQ(0, runProgram({pgm, file}, output, error));
+  EXPECT_EQ(expected, output);
+  EXPECT_EQ("", error);
+}
+


Mime
View raw message