orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-345. Create and use Decimal64StatisticsImpl for the write path.
Date Thu, 03 May 2018 22:30:37 GMT
Repository: orc
Updated Branches:
  refs/heads/master 4cc0968f5 -> c4c7b28f6


ORC-345. Create and use Decimal64StatisticsImpl for the write path.

Fixes #252

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/c4c7b28f
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/c4c7b28f
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/c4c7b28f

Branch: refs/heads/master
Commit: c4c7b28f6863a9e2b5d1442c1a9e6b4f19c217b6
Parents: 4cc0968
Author: Owen O'Malley <omalley@apache.org>
Authored: Wed Apr 18 14:30:35 2018 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Thu May 3 15:30:14 2018 -0700

----------------------------------------------------------------------
 java/bench/pom.xml                              |   8 +-
 .../java/org/apache/orc/bench/DecimalBench.java | 272 +++++++++++++++++++
 .../src/java/org/apache/orc/bench/Driver.java   |   4 +
 .../org/apache/orc/bench/NullFileSystem.java    | 121 +++++++++
 .../orc/bench/convert/GenerateVariants.java     |   4 +-
 .../java/org/apache/orc/StripeStatistics.java   |   2 +-
 .../java/org/apache/orc/TypeDescription.java    |   6 +-
 .../apache/orc/impl/ColumnStatisticsImpl.java   | 236 +++++++++++++++-
 .../java/org/apache/orc/impl/ReaderImpl.java    |   5 +-
 .../org/apache/orc/impl/RecordReaderImpl.java   |   2 +-
 .../java/org/apache/orc/impl/WriterImpl.java    |   2 +-
 .../orc/impl/writer/Decimal64TreeWriter.java    |  12 +-
 .../orc/impl/writer/DecimalTreeWriter.java      |  13 +-
 .../apache/orc/impl/writer/TreeWriterBase.java  |   5 +-
 .../apache/orc/impl/writer/WriterImplV2.java    |   2 +-
 .../org/apache/orc/TestColumnStatistics.java    |   2 +-
 .../test/org/apache/orc/TestVectorOrcFile.java  |   3 -
 .../orc/impl/TestColumnStatisticsImpl.java      |  63 +++++
 .../apache/orc/impl/TestRecordReaderImpl.java   |  51 ++--
 .../src/java/org/apache/orc/tools/FileDump.java |  12 +-
 .../java/org/apache/orc/tools/JsonFileDump.java |   8 +-
 21 files changed, 771 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/bench/pom.xml
----------------------------------------------------------------------
diff --git a/java/bench/pom.xml b/java/bench/pom.xml
index 148693a..739bcf1 100644
--- a/java/bench/pom.xml
+++ b/java/bench/pom.xml
@@ -39,6 +39,7 @@
     <avro.version>1.8.2</avro.version>
     <hadoop.version>2.7.3</hadoop.version>
     <hive.version>2.3.3</hive.version>
+    <jmh.version>1.20</jmh.version>
     <orc.version>1.5.0-SNAPSHOT</orc.version>
     <parquet.version>1.9.0</parquet.version>
     <storage-api.version>2.5.0</storage-api.version>
@@ -139,7 +140,12 @@
     <dependency>
       <groupId>org.openjdk.jmh</groupId>
       <artifactId>jmh-core</artifactId>
-      <version>1.18</version>
+      <version>${jmh.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.openjdk.jmh</groupId>
+      <artifactId>jmh-generator-annprocess</artifactId>
+      <version>${jmh.version}</version>
     </dependency>
   </dependencies>
 

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/bench/src/java/org/apache/orc/bench/DecimalBench.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/DecimalBench.java b/java/bench/src/java/org/apache/orc/bench/DecimalBench.java
new file mode 100644
index 0000000..71a1c33
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/DecimalBench.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import com.google.gson.JsonStreamParser;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.mapred.FsInput;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.TrackingLocalFileSystem;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
+import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcFile;
+import org.apache.orc.Reader;
+import org.apache.orc.RecordReader;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.apache.orc.bench.convert.BatchReader;
+import org.apache.orc.bench.convert.GenerateVariants;
+import org.apache.orc.bench.convert.csv.CsvReader;
+import org.apache.parquet.hadoop.ParquetInputFormat;
+import org.openjdk.jmh.annotations.AuxCounters;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations=2, time=30, timeUnit = TimeUnit.SECONDS)
+@Measurement(iterations=10, time=30, timeUnit = TimeUnit.SECONDS)
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(2)
+public class DecimalBench {
+
+  private static final String ROOT_ENVIRONMENT_NAME = "bench.root.dir";
+  private static final Path root;
+  static {
+    String value = System.getProperty(ROOT_ENVIRONMENT_NAME);
+    root = value == null ? null : new Path(value);
+  }
+
+  /**
+   * Abstract out whether we are writing short or long decimals
+   */
+  interface Loader {
+    /**
+     * Load the data from the values array into the ColumnVector.
+     * @param vector the output
+     * @param values the intput
+     * @param offset the first input value
+     * @param length the number of values to copy
+     */
+    void loadData(ColumnVector vector, long[] values, int offset, int length);
+  }
+
+  static class Decimal64Loader implements Loader {
+    final int scale;
+    final int precision;
+
+    Decimal64Loader(int precision, int scale) {
+      this.precision = precision;
+      this.scale = scale;
+    }
+
+    @Override
+    public void loadData(ColumnVector vector, long[] values, int offset, int length) {
+      Decimal64ColumnVector v = (Decimal64ColumnVector) vector;
+      v.ensureSize(length, false);
+      v.noNulls = true;
+      for(int p=0; p < length; ++p) {
+        v.vector[p] = values[p + offset];
+      }
+      v.precision = (short) precision;
+      v.scale = (short) scale;
+    }
+  }
+
+  static class DecimalLoader implements Loader {
+    final int scale;
+    final int precision;
+
+    DecimalLoader(int precision, int scale) {
+      this.precision = precision;
+      this.scale = scale;
+    }
+
+    @Override
+    public void loadData(ColumnVector vector, long[] values, int offset, int length) {
+      DecimalColumnVector v = (DecimalColumnVector) vector;
+      v.noNulls = true;
+      for(int p=0; p < length; ++p) {
+        v.vector[p].setFromLongAndScale(values[offset + p], scale);
+      }
+      v.precision = (short) precision;
+      v.scale = (short) scale;
+    }
+  }
+
+  @State(Scope.Thread)
+  public static class OutputState {
+
+    // try both short and long decimals
+    @Param({"8", "19"})
+    public int precision;
+
+    long[] total_amount = new long[1024 * 1024];
+    Configuration conf = new Configuration();
+    FileSystem fs = new NullFileSystem();
+    TypeDescription schema;
+    VectorizedRowBatch batch;
+    Loader loader;
+
+    @Setup
+    public void setup() throws IOException {
+      schema = TypeDescription.createDecimal()
+          .withScale(2)
+          .withPrecision(precision);
+      loader = precision <= 18 ?
+            new Decimal64Loader(precision, 2) :
+            new DecimalLoader(precision, 2);
+      readCsvData(total_amount, root, "total_amount", conf);
+      batch = schema.createRowBatchV2();
+    }
+  }
+
+  @Benchmark
+  public void write(OutputState state) throws Exception {
+    Writer writer = OrcFile.createWriter(new Path("null"),
+        OrcFile.writerOptions(state.conf)
+            .fileSystem(state.fs)
+            .setSchema(state.schema)
+            .compress(CompressionKind.NONE));
+    int r = 0;
+    int batchSize = state.batch.getMaxSize();
+    while (r < state.total_amount.length) {
+      state.batch.size = batchSize;
+      state.loader.loadData(state.batch.cols[0], state.total_amount, r, batchSize);
+      writer.addRowBatch(state.batch);
+      r += batchSize;
+    }
+    writer.close();
+  }
+
+  static void readCsvData(long[] data,
+                          Path root,
+                          String column,
+                          Configuration conf) throws IOException {
+    TypeDescription schema = Utilities.loadSchema("taxi.schema");
+    int row = 0;
+    int batchPosn = 0;
+    BatchReader reader =
+        new GenerateVariants.RecursiveReader(new Path(root, "sources/taxi"), "csv",
+        schema, conf, org.apache.orc.bench.CompressionKind.ZLIB);
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 0;
+    TypeDescription columnSchema = schema.findSubtype(column);
+    DecimalColumnVector cv = (DecimalColumnVector) batch.cols[columnSchema.getId() - 1];
+    int scale = columnSchema.getScale();
+    while (row < data.length) {
+      if (batchPosn >= batch.size) {
+        if (!reader.nextBatch(batch)) {
+          throw new IllegalArgumentException("Not enough data");
+        }
+        batchPosn = 0;
+      }
+      data[row++] = cv.vector[batchPosn++].serialize64(scale);
+    }
+  }
+
+  @State(Scope.Thread)
+  public static class InputState {
+
+    // try both DecimalColumnVector and Decimal64ColumnVector
+    @Param({"ORIGINAL", "USE_DECIMAL64"})
+    public TypeDescription.RowBatchVersion version;
+
+    Configuration conf = new Configuration();
+    FileSystem fs;
+    TypeDescription schema;
+    VectorizedRowBatch batch;
+    Path path;
+    boolean[] include;
+    Reader reader;
+    OrcFile.ReaderOptions options;
+
+    @Setup
+    public void setup() throws IOException {
+      fs = FileSystem.getLocal(conf).getRaw();
+      path = new Path(root, "generated/taxi/orc.none");
+      schema = Utilities.loadSchema("taxi.schema");
+      batch = schema.createRowBatch(version, 1024);
+      // only include the columns with decimal values
+      include = new boolean[schema.getMaximumId() + 1];
+      for(TypeDescription child: schema.getChildren()) {
+        if (child.getCategory() == TypeDescription.Category.DECIMAL) {
+          include[child.getId()] = true;
+        }
+      }
+      reader = OrcFile.createReader(path,
+          OrcFile.readerOptions(conf).filesystem(fs));
+      // just read the decimal columns from the first stripe
+      reader.options().include(include).range(0, 1000);
+    }
+  }
+
+  @Benchmark
+  public void read(Blackhole blackhole, InputState state) throws Exception {
+    RecordReader rows = state.reader.rows();
+    while (rows.nextBatch(state.batch)) {
+      blackhole.consume(state.batch);
+    }
+    rows.close();
+  }
+
+  public static void main(String[] args) throws Exception {
+    new Runner(new OptionsBuilder()
+        .include(DecimalBench.class.getSimpleName())
+        .jvmArgs("-server", "-Xms256m", "-Xmx2g",
+            "-D" + ROOT_ENVIRONMENT_NAME + "=" + args[0]).build()
+    ).run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/bench/src/java/org/apache/orc/bench/Driver.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/Driver.java b/java/bench/src/java/org/apache/orc/bench/Driver.java
index c8f1592..6a86f90 100644
--- a/java/bench/src/java/org/apache/orc/bench/Driver.java
+++ b/java/bench/src/java/org/apache/orc/bench/Driver.java
@@ -47,6 +47,7 @@ public class Driver {
       System.err.println("  scan      - Scan data variants");
       System.err.println("  read-all  - Full table scan benchmark");
       System.err.println("  read-some - Column projection benchmark");
+      System.err.println("  decimal   - Decimal benchmark");
       System.exit(1);
     }
     return result;
@@ -70,6 +71,9 @@ public class Driver {
       case "read-some":
         ColumnProjectionBenchmark.main(args);
         break;
+      case "decimal":
+        DecimalBench.main(args);
+        break;
       default:
         System.err.println("Unknown command " + command);
         System.exit(1);

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/bench/src/java/org/apache/orc/bench/NullFileSystem.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/NullFileSystem.java b/java/bench/src/java/org/apache/orc/bench/NullFileSystem.java
new file mode 100644
index 0000000..23d19cc
--- /dev/null
+++ b/java/bench/src/java/org/apache/orc/bench/NullFileSystem.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.orc.bench;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+public class NullFileSystem extends FileSystem {
+  @Override
+  public URI getUri() {
+    try {
+      return new URI("null:///");
+    } catch (URISyntaxException e) {
+      throw new IllegalArgumentException("Bad URL", e);
+    }
+  }
+
+  @Override
+  public FSDataInputStream open(Path path, int i) throws IOException {
+    return new FSDataInputStream(new InputStream() {
+      @Override
+      public int read() throws IOException {
+        return -1;
+      }
+    });
+  }
+
+  static class NullOutput extends OutputStream {
+
+    @Override
+    public void write(int b) {
+      // pass
+    }
+
+    public void write(byte[] buffer, int offset, int length) {
+      // pass
+    }
+  }
+  private static final OutputStream NULL_OUTPUT = new NullOutput();
+
+  @Override
+  public FSDataOutputStream create(Path path,
+                                   FsPermission fsPermission,
+                                   boolean b,
+                                   int i,
+                                   short i1,
+                                   long l,
+                                   Progressable progressable) throws IOException {
+    return new FSDataOutputStream(NULL_OUTPUT);
+  }
+
+  @Override
+  public FSDataOutputStream append(Path path,
+                                   int i,
+                                   Progressable progressable) throws IOException {
+    return new FSDataOutputStream(NULL_OUTPUT);
+  }
+
+  @Override
+  public boolean rename(Path path, Path path1) {
+    return false;
+  }
+
+  @Override
+  public boolean delete(Path path, boolean b) {
+    return false;
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path path)  {
+    return null;
+  }
+
+  @Override
+  public void setWorkingDirectory(Path path) {
+    // pass
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return null;
+  }
+
+  @Override
+  public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException {
+    return false;
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path path) throws IOException {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
----------------------------------------------------------------------
diff --git a/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java b/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
index 7f57468..57cf4c9 100644
--- a/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
+++ b/java/bench/src/java/org/apache/orc/bench/convert/GenerateVariants.java
@@ -95,7 +95,7 @@ public class GenerateVariants {
     }
   }
 
-  static class RecursiveReader implements BatchReader {
+  public static class RecursiveReader implements BatchReader {
     private final RemoteIterator<LocatedFileStatus> filenames;
     private final String format;
     private final TypeDescription schema;
@@ -103,7 +103,7 @@ public class GenerateVariants {
     private final CompressionKind compress;
     private BatchReader current = null;
 
-    RecursiveReader(Path root,
+    public RecursiveReader(Path root,
                     String format,
                     TypeDescription schema,
                     Configuration conf,

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/StripeStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/StripeStatistics.java b/java/core/src/java/org/apache/orc/StripeStatistics.java
index 8fc91cb..d1738ff 100644
--- a/java/core/src/java/org/apache/orc/StripeStatistics.java
+++ b/java/core/src/java/org/apache/orc/StripeStatistics.java
@@ -37,7 +37,7 @@ public class StripeStatistics {
   public ColumnStatistics[] getColumnStatistics() {
     ColumnStatistics[] result = new ColumnStatistics[cs.size()];
     for (int i = 0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(cs.get(i));
+      result[i] = ColumnStatisticsImpl.deserialize(null, cs.get(i));
     }
     return result;
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/TypeDescription.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java b/java/core/src/java/org/apache/orc/TypeDescription.java
index 264adff..50cd022 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -47,6 +47,8 @@ public class TypeDescription
   private static final int DEFAULT_PRECISION = 38;
   private static final int DEFAULT_SCALE = 10;
   public static final int MAX_DECIMAL64_PRECISION = 18;
+  public static final long MAX_DECIMAL64 = 999_999_999_999_999_999L;
+  public static final long MIN_DECIMAL64 = -MAX_DECIMAL64;
   private static final int DEFAULT_LENGTH = 256;
   static final Pattern UNQUOTED_NAMES = Pattern.compile("^[a-zA-Z0-9_]+$");
 
@@ -681,12 +683,12 @@ public class TypeDescription
   /**
    * Specify the version of the VectorizedRowBatch that the user desires.
    */
-  enum RowBatchVersion {
+  public enum RowBatchVersion {
     ORIGINAL,
     USE_DECIMAL64;
   }
 
-  VectorizedRowBatch createRowBatch(RowBatchVersion version, int size) {
+  public VectorizedRowBatch createRowBatch(RowBatchVersion version, int size) {
     VectorizedRowBatch result;
     if (category == Category.STRUCT) {
       result = new VectorizedRowBatch(children.size(), size);

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 0cd69f4..98a4e17 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -835,6 +835,13 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     }
 
     @Override
+    public void updateDecimal64(long value, int scale) {
+      HiveDecimalWritable dValue = new HiveDecimalWritable();
+      dValue.setFromLongAndScale(value, scale);
+      updateDecimal(dValue);
+    }
+
+    @Override
     public void merge(ColumnStatisticsImpl other) {
       if (other instanceof DecimalStatisticsImpl) {
         DecimalStatisticsImpl dec = (DecimalStatisticsImpl) other;
@@ -948,6 +955,215 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     }
   }
 
+  private static final class Decimal64StatisticsImpl extends ColumnStatisticsImpl
+      implements DecimalColumnStatistics {
+
+    private final int scale;
+    private long minimum;
+    private long maximum;
+    private boolean hasSum = true;
+    private long sum = 0;
+    private final HiveDecimalWritable scratch = new HiveDecimalWritable();
+
+    Decimal64StatisticsImpl(int scale) {
+      this.scale = scale;
+    }
+
+    Decimal64StatisticsImpl(int scale, OrcProto.ColumnStatistics stats) {
+      super(stats);
+      this.scale = scale;
+      OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
+      if (dec.hasMaximum()) {
+        maximum = new HiveDecimalWritable(dec.getMaximum()).serialize64(scale);
+      }
+      if (dec.hasMinimum()) {
+        minimum = new HiveDecimalWritable(dec.getMinimum()).serialize64(scale);
+      }
+      if (dec.hasSum()) {
+        hasSum = true;
+        HiveDecimalWritable sumTmp = new HiveDecimalWritable(dec.getSum());
+        if (sumTmp.getHiveDecimal().integerDigitCount() + scale <=
+            TypeDescription.MAX_DECIMAL64_PRECISION) {
+          hasSum = true;
+          sum = sumTmp.serialize64(scale);
+          return;
+        }
+      }
+      hasSum = false;
+    }
+
+    @Override
+    public void reset() {
+      super.reset();
+      minimum = 0;
+      maximum = 0;
+      hasSum = true;
+      sum = 0;
+    }
+
+    @Override
+    public void updateDecimal(HiveDecimalWritable value) {
+      updateDecimal64(value.serialize64(scale), scale);
+    }
+
+    @Override
+    public void updateDecimal64(long value, int valueScale) {
+      // normalize the scale to our desired level
+      while (valueScale != scale) {
+        if (valueScale > scale) {
+          value /= 10;
+          valueScale -= 1;
+        } else {
+          value *= 10;
+          valueScale += 1;
+        }
+      }
+      if (value < TypeDescription.MIN_DECIMAL64 ||
+          value > TypeDescription.MAX_DECIMAL64) {
+        throw new IllegalArgumentException("Out of bounds decimal64 " + value);
+      }
+      if (getNumberOfValues() == 0) {
+        minimum = value;
+        maximum = value;
+      } else if (minimum > value) {
+        minimum = value;
+      } else if (maximum < value) {
+        maximum = value;
+      }
+      if (hasSum) {
+        sum += value;
+        hasSum = sum <= TypeDescription.MAX_DECIMAL64 &&
+              sum >= TypeDescription.MIN_DECIMAL64;
+      }
+    }
+
+    @Override
+    public void merge(ColumnStatisticsImpl other) {
+      if (other instanceof Decimal64StatisticsImpl) {
+        Decimal64StatisticsImpl dec = (Decimal64StatisticsImpl) other;
+        if (getNumberOfValues() == 0) {
+          minimum = dec.minimum;
+          maximum = dec.maximum;
+          sum = dec.sum;
+        } else {
+          if (minimum > dec.minimum) {
+            minimum = dec.minimum;
+          }
+          if (maximum < dec.maximum) {
+            maximum = dec.maximum;
+          }
+          if (hasSum && dec.hasSum) {
+            sum += dec.sum;
+            hasSum = sum <= TypeDescription.MAX_DECIMAL64 &&
+                  sum >= TypeDescription.MIN_DECIMAL64;
+          } else {
+            hasSum = false;
+          }
+        }
+      } else {
+        if (getNumberOfValues() != 0) {
+          throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
+        }
+      }
+      super.merge(other);
+    }
+
+    @Override
+    public OrcProto.ColumnStatistics.Builder serialize() {
+      OrcProto.ColumnStatistics.Builder result = super.serialize();
+      OrcProto.DecimalStatistics.Builder dec =
+          OrcProto.DecimalStatistics.newBuilder();
+      if (getNumberOfValues() != 0) {
+        scratch.setFromLongAndScale(minimum, scale);
+        dec.setMinimum(scratch.toString());
+        scratch.setFromLongAndScale(maximum, scale);
+        dec.setMaximum(scratch.toString());
+      }
+      // Check hasSum for overflow.
+      if (hasSum) {
+        scratch.setFromLongAndScale(sum, scale);
+        dec.setSum(scratch.toString());
+      }
+      result.setDecimalStatistics(dec);
+      return result;
+    }
+
+    @Override
+    public HiveDecimal getMinimum() {
+      if (getNumberOfValues() > 0) {
+        scratch.setFromLongAndScale(minimum, scale);
+        return scratch.getHiveDecimal();
+      }
+      return null;
+    }
+
+    @Override
+    public HiveDecimal getMaximum() {
+      if (getNumberOfValues() > 0) {
+        scratch.setFromLongAndScale(maximum, scale);
+        return scratch.getHiveDecimal();
+      }
+      return null;
+    }
+
+    @Override
+    public HiveDecimal getSum() {
+      if (hasSum) {
+        scratch.setFromLongAndScale(sum, scale);
+        return scratch.getHiveDecimal();
+      }
+      return null;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder buf = new StringBuilder(super.toString());
+      if (getNumberOfValues() != 0) {
+        buf.append(" min: ");
+        buf.append(minimum);
+        buf.append(" max: ");
+        buf.append(maximum);
+        if (hasSum) {
+          buf.append(" sum: ");
+          buf.append(sum);
+        }
+      }
+      return buf.toString();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (!(o instanceof Decimal64StatisticsImpl)) {
+        return false;
+      }
+      if (!super.equals(o)) {
+        return false;
+      }
+
+      Decimal64StatisticsImpl that = (Decimal64StatisticsImpl) o;
+
+      if (minimum != that.minimum ||
+          maximum != that.maximum ||
+          hasSum != that.hasSum) {
+        return false;
+      }
+      return !hasSum || (sum == that.sum);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = super.hashCode();
+      boolean hasValues = getNumberOfValues() > 0;
+      result = 31 * result + (hasValues ? (int) minimum : 0);
+      result = 31 * result + (hasValues ? (int) maximum : 0);
+      result = 31 * result + (hasSum ? (int) sum : 0);
+      return result;
+    }
+  }
+
   private static final class DateStatisticsImpl extends ColumnStatisticsImpl
       implements DateColumnStatistics {
     private Integer minimum = null;
@@ -1329,6 +1545,10 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     throw new UnsupportedOperationException("Can't update decimal");
   }
 
+  public void updateDecimal64(long value, int scale) {
+    throw new UnsupportedOperationException("Can't update decimal");
+  }
+
   public void updateDate(DateWritable value) {
     throw new UnsupportedOperationException("Can't update date");
   }
@@ -1415,7 +1635,11 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
       case VARCHAR:
         return new StringStatisticsImpl();
       case DECIMAL:
-        return new DecimalStatisticsImpl();
+        if (schema.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) {
+          return new Decimal64StatisticsImpl(schema.getScale());
+        } else {
+          return new DecimalStatisticsImpl();
+        }
       case DATE:
         return new DateStatisticsImpl();
       case TIMESTAMP:
@@ -1427,7 +1651,8 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     }
   }
 
-  public static ColumnStatisticsImpl deserialize(OrcProto.ColumnStatistics stats) {
+  public static ColumnStatisticsImpl deserialize(TypeDescription schema,
+                                                 OrcProto.ColumnStatistics stats) {
     if (stats.hasBucketStatistics()) {
       return new BooleanStatisticsImpl(stats);
     } else if (stats.hasIntStatistics()) {
@@ -1437,7 +1662,12 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
     } else if (stats.hasStringStatistics()) {
       return new StringStatisticsImpl(stats);
     } else if (stats.hasDecimalStatistics()) {
-      return new DecimalStatisticsImpl(stats);
+      if (schema != null &&
+          schema.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) {
+        return new Decimal64StatisticsImpl(schema.getScale(), stats);
+      } else {
+        return new DecimalStatisticsImpl(stats);
+      }
     } else if (stats.hasDateStatistics()) {
       return new DateStatisticsImpl(stats);
     } else if (stats.hasTimestampStatistics()) {

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index e8d6be9..feb8160 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -221,14 +221,15 @@ public class ReaderImpl implements Reader {
 
   @Override
   public ColumnStatistics[] getStatistics() {
-    return deserializeStats(fileStats);
+    return deserializeStats(schema, fileStats);
   }
 
   public static ColumnStatistics[] deserializeStats(
+      TypeDescription schema,
       List<OrcProto.ColumnStatistics> fileStats) {
     ColumnStatistics[] result = new ColumnStatistics[fileStats.size()];
     for(int i=0; i < result.length; ++i) {
-      result[i] = ColumnStatisticsImpl.deserialize(fileStats.get(i));
+      result[i] = ColumnStatisticsImpl.deserialize(schema, fileStats.get(i));
     }
     return result;
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 53cc761..abb487e 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -401,7 +401,7 @@ public class RecordReaderImpl implements RecordReader {
                                            OrcProto.BloomFilter bloomFilter,
                                            OrcFile.WriterVersion writerVersion,
                                            TypeDescription.Category type) {
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(statsProto);
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, statsProto);
     Object minValue = getMin(cs);
     Object maxValue = getMax(cs);
     // files written before ORC-135 stores timestamp wrt to local timezone causing issues with PPD.

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index c3656d4..7a4a0b9 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -651,7 +651,7 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
 
     // add the column statistics
     writeFileStatistics(builder, treeWriter);
-    return ReaderImpl.deserializeStats(builder.getStatisticsList());
+    return ReaderImpl.deserializeStats(schema, builder.getStatisticsList());
   }
 
   public CompressionCodec getCompressionCodec() {

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/writer/Decimal64TreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/Decimal64TreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/Decimal64TreeWriter.java
index b79e3b2..020d8ff 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/Decimal64TreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/Decimal64TreeWriter.java
@@ -58,7 +58,7 @@ public class Decimal64TreeWriter extends TreeWriterBase {
       if (vector.noNulls || !vector.isNull[0]) {
         HiveDecimalWritable value = vector.vector[0];
         long lg = value.serialize64(scale);
-        indexStatistics.updateDecimal(value);
+        indexStatistics.updateDecimal64(lg, scale);
         if (createBloomFilter) {
           bloomFilterUtf8.addLong(lg);
         }
@@ -72,7 +72,7 @@ public class Decimal64TreeWriter extends TreeWriterBase {
           HiveDecimalWritable value = vector.vector[i + offset];
           long lg = value.serialize64(scale);
           valueWriter.write(lg);
-          indexStatistics.updateDecimal(value);
+          indexStatistics.updateDecimal64(lg, scale);
           if (createBloomFilter) {
             bloomFilterUtf8.addLong(lg);
           }
@@ -86,10 +86,8 @@ public class Decimal64TreeWriter extends TreeWriterBase {
     assert(scale == vector.scale);
     if (vector.isRepeating) {
       if (vector.noNulls || !vector.isNull[0]) {
-        HiveDecimalWritable value = vector.getScratchWritable();
         long lg = vector.vector[0];
-        value.setFromLongAndScale(lg, vector.scale);
-        indexStatistics.updateDecimal(value);
+        indexStatistics.updateDecimal64(lg, scale);
         if (createBloomFilter) {
           bloomFilterUtf8.addLong(lg);
         }
@@ -98,13 +96,11 @@ public class Decimal64TreeWriter extends TreeWriterBase {
         }
       }
     } else {
-      HiveDecimalWritable value = vector.getScratchWritable();
       for (int i = 0; i < length; ++i) {
         if (vector.noNulls || !vector.isNull[i + offset]) {
           long lg = vector.vector[i + offset];
           valueWriter.write(lg);
-          value.setFromLongAndScale(lg, vector.scale);
-          indexStatistics.updateDecimal(value);
+          indexStatistics.updateDecimal64(lg, scale);
           if (createBloomFilter) {
             bloomFilterUtf8.addLong(lg);
           }

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
index 9b2f2f0..822042e 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/DecimalTreeWriter.java
@@ -113,10 +113,10 @@ public class DecimalTreeWriter extends TreeWriterBase {
                           int length) throws IOException {
     if (vector.isRepeating) {
       if (vector.noNulls || !vector.isNull[0]) {
-        HiveDecimalWritable value = vector.getScratchWritable();
-        value.setFromLongAndScale(vector.vector[0], vector.scale);
-        indexStatistics.updateDecimal(value);
+        indexStatistics.updateDecimal64(vector.vector[0], vector.scale);
         if (createBloomFilter) {
+          HiveDecimalWritable value = vector.getScratchWritable();
+          value.setFromLongAndScale(vector.vector[0], vector.scale);
           String str = value.toString(scratchBuffer);
           if (bloomFilter != null) {
             bloomFilter.addString(str);
@@ -132,11 +132,12 @@ public class DecimalTreeWriter extends TreeWriterBase {
       HiveDecimalWritable value = vector.getScratchWritable();
       for (int i = 0; i < length; ++i) {
         if (vector.noNulls || !vector.isNull[i + offset]) {
-          utils.writeVslong(valueStream, vector.vector[i + offset]);
+          long num = vector.vector[i + offset];
+          utils.writeVslong(valueStream, num);
           scaleStream.write(vector.scale);
-          value.setFromLongAndScale(vector.vector[i + offset], vector.scale);
-          indexStatistics.updateDecimal(value);
+          indexStatistics.updateDecimal64(num, vector.scale);
           if (createBloomFilter) {
+            value.setFromLongAndScale(num, vector.scale);
             String str = value.toString(scratchBuffer);
             if (bloomFilter != null) {
               bloomFilter.addString(str);

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
index 74ef3cc..d6145cd 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/TreeWriterBase.java
@@ -64,6 +64,7 @@ public abstract class TreeWriterBase implements TreeWriter {
   private boolean foundNulls;
   private OutStream isPresentOutStream;
   private final WriterContext streamFactory;
+  private final TypeDescription schema;
 
   /**
    * Create a tree writer.
@@ -76,6 +77,7 @@ public abstract class TreeWriterBase implements TreeWriter {
                  TypeDescription schema,
                  WriterContext streamFactory,
                  boolean nullable) throws IOException {
+    this.schema = schema;
     this.streamFactory = streamFactory;
     this.isCompressed = streamFactory.isCompressed();
     this.id = columnId;
@@ -340,7 +342,8 @@ public abstract class TreeWriterBase implements TreeWriter {
 
   @Override
   public void updateFileStatistics(OrcProto.StripeStatistics stats) {
-    fileStatistics.merge(ColumnStatisticsImpl.deserialize(stats.getColStats(id)));
+    fileStatistics.merge(ColumnStatisticsImpl.deserialize(schema,
+        stats.getColStats(id)));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/java/org/apache/orc/impl/writer/WriterImplV2.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/writer/WriterImplV2.java b/java/core/src/java/org/apache/orc/impl/writer/WriterImplV2.java
index ab4fc58..668ecc2 100644
--- a/java/core/src/java/org/apache/orc/impl/writer/WriterImplV2.java
+++ b/java/core/src/java/org/apache/orc/impl/writer/WriterImplV2.java
@@ -593,7 +593,7 @@ public class WriterImplV2 implements WriterInternal, MemoryManager.Callback {
 
     // add the column statistics
     writeFileStatistics(builder, treeWriter);
-    return ReaderImpl.deserializeStats(builder.getStatisticsList());
+    return ReaderImpl.deserializeStats(schema, builder.getStatisticsList());
   }
 
   public CompressionCodec getCompressionCodec() {

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/test/org/apache/orc/TestColumnStatistics.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index bbc85ea..2045004 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -209,7 +209,7 @@ public class TestColumnStatistics {
     // serialize and read back in with phoenix timezone
     OrcProto.ColumnStatistics serial = stats2.serialize().build();
     TimeZone.setDefault(TimeZone.getTimeZone("America/Phoenix"));
-    ColumnStatisticsImpl stats3 = ColumnStatisticsImpl.deserialize(serial);
+    ColumnStatisticsImpl stats3 = ColumnStatisticsImpl.deserialize(schema, serial);
     assertEquals("2000-10-29 01:30:00.0",
         ((TimestampColumnStatistics) stats3).getMinimum().toString());
     assertEquals("2000-10-29 03:30:00.0",

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 424eb96..658c1ce 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3615,9 +3615,6 @@ public class TestVectorOrcFile {
     LongColumnVector ints2 = (LongColumnVector) struct1.fields[0];
     BytesColumnVector strs = (BytesColumnVector) struct1.fields[1];
 
-    System.out.println("------------------------------------------------------------------------------------------------------------------------");
-    System.out.println(rows.getRowNumber());
-
     Assert.assertEquals(1000L, rows.getRowNumber());
     Assert.assertEquals(true, rows.nextBatch(batch));
     assertEquals(1000, batch.size);

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
index e53cdda..64d4626 100644
--- a/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestColumnStatisticsImpl.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.orc.ColumnStatistics;
+import org.apache.orc.DecimalColumnStatistics;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
 import org.apache.orc.Reader;
@@ -85,4 +86,66 @@ public class TestColumnStatisticsImpl {
     assertEquals("2037-01-01 00:00:00.0", stats.getMaximum().toString());
     TimeZone.setDefault(original);
   }
+
+  @Test
+  public void testDecimal64Overflow() throws IOException {
+    TypeDescription schema = TypeDescription.fromString("decimal(18,6)");
+    OrcProto.ColumnStatistics.Builder pb =
+        OrcProto.ColumnStatistics.newBuilder();
+    OrcProto.DecimalStatistics.Builder decimalBuilder =
+        OrcProto.DecimalStatistics.newBuilder();
+    decimalBuilder.setMaximum("1000.0");
+    decimalBuilder.setMinimum("1.010");
+    decimalBuilder.setSum("123456789.123456");
+    pb.setDecimalStatistics(decimalBuilder);
+    pb.setHasNull(false);
+    pb.setNumberOfValues(3);
+
+    // the base case doesn't overflow
+    DecimalColumnStatistics stats1 = (DecimalColumnStatistics)
+        ColumnStatisticsImpl.deserialize(schema, pb.build());
+    ColumnStatisticsImpl updateStats1 = (ColumnStatisticsImpl) stats1;
+    assertEquals("1.01", stats1.getMinimum().toString());
+    assertEquals("1000", stats1.getMaximum().toString());
+    assertEquals("123456789.123456", stats1.getSum().toString());
+    assertEquals(3, stats1.getNumberOfValues());
+
+    // Now set the sum to something that overflows Decimal64.
+    decimalBuilder.setSum("1234567890123.45");
+    pb.setDecimalStatistics(decimalBuilder);
+    DecimalColumnStatistics stats2 = (DecimalColumnStatistics)
+        ColumnStatisticsImpl.deserialize(schema, pb.build());
+    assertEquals(null, stats2.getSum());
+
+    // merge them together
+    updateStats1.merge((ColumnStatisticsImpl) stats2);
+    assertEquals(null, stats1.getSum());
+
+    updateStats1.reset();
+    assertEquals("0", stats1.getSum().toString());
+    updateStats1.increment();
+    updateStats1.updateDecimal64(10000, 6);
+    assertEquals("0.01", stats1.getSum().toString());
+    updateStats1.updateDecimal64(1, 4);
+    assertEquals("0.0101", stats1.getSum().toString());
+    updateStats1.updateDecimal64(TypeDescription.MAX_DECIMAL64, 6);
+    assertEquals(null, stats1.getSum());
+    updateStats1.reset();
+    updateStats1.updateDecimal64(TypeDescription.MAX_DECIMAL64, 6);
+    assertEquals("999999999999.999999", stats1.getSum().toString());
+    updateStats1.updateDecimal64(1, 6);
+    assertEquals(null, stats1.getSum());
+
+    updateStats1.reset();
+    ColumnStatisticsImpl updateStats2 = (ColumnStatisticsImpl) stats2;
+    updateStats2.reset();
+    updateStats1.increment();
+    updateStats2.increment();
+    updateStats1.updateDecimal64(TypeDescription.MAX_DECIMAL64, 6);
+    updateStats2.updateDecimal64(TypeDescription.MAX_DECIMAL64, 6);
+    assertEquals("999999999999.999999", stats1.getSum().toString());
+    assertEquals("999999999999.999999", stats2.getSum().toString());
+    updateStats1.merge(updateStats2);
+    assertEquals(null, stats1.getSum());
+  }
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index bc85724..ae8bab5 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -275,23 +275,26 @@ public class TestRecordReaderImpl {
   @Test
   public void testGetMin() throws Exception {
     assertEquals(10L, RecordReaderImpl.getMin(
-      ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
+      ColumnStatisticsImpl.deserialize(null, createIntStats(10L, 100L))));
     assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      null,
       OrcProto.ColumnStatistics.newBuilder()
         .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
           .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
     assertEquals(null, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      null,
       OrcProto.ColumnStatistics.newBuilder()
         .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
         .build())));
     assertEquals("a", RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(
+      null,
       OrcProto.ColumnStatistics.newBuilder()
         .setStringStatistics(OrcProto.StringStatistics.newBuilder()
           .setMinimum("a").setMaximum("b").build()).build())));
     assertEquals("hello", RecordReaderImpl.getMin(ColumnStatisticsImpl
-      .deserialize(createStringStats("hello", "world"))));
+      .deserialize(null, createStringStats("hello", "world"))));
     assertEquals(HiveDecimal.create("111.1"), RecordReaderImpl.getMin(ColumnStatisticsImpl
-      .deserialize(createDecimalStats("111.1", "112.1"))));
+      .deserialize(null, createDecimalStats("111.1", "112.1"))));
   }
 
   private static OrcProto.ColumnStatistics createIntStats(Long min,
@@ -379,23 +382,27 @@ public class TestRecordReaderImpl {
 
   @Test
   public void testGetMax() throws Exception {
-    assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L))));
+    assertEquals(100L, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        null, createIntStats(10L, 100L))));
     assertEquals(100.0d, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        null,
         OrcProto.ColumnStatistics.newBuilder()
             .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder()
                 .setMinimum(10.0d).setMaximum(100.0d).build()).build())));
     assertEquals(null, RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        null,
         OrcProto.ColumnStatistics.newBuilder()
             .setStringStatistics(OrcProto.StringStatistics.newBuilder().build())
             .build())));
     assertEquals("b", RecordReaderImpl.getMax(ColumnStatisticsImpl.deserialize(
+        null,
         OrcProto.ColumnStatistics.newBuilder()
             .setStringStatistics(OrcProto.StringStatistics.newBuilder()
                 .setMinimum("a").setMaximum("b").build()).build())));
     assertEquals("world", RecordReaderImpl.getMax(ColumnStatisticsImpl
-      .deserialize(createStringStats("hello", "world"))));
+      .deserialize(null, createStringStats("hello", "world"))));
     assertEquals(HiveDecimal.create("112.1"), RecordReaderImpl.getMax(ColumnStatisticsImpl
-      .deserialize(createDecimalStats("111.1", "112.1"))));
+      .deserialize(null, createDecimalStats("111.1", "112.1"))));
   }
 
   static TruthValue evaluateBoolean(OrcProto.ColumnStatistics stats,
@@ -1471,7 +1478,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createIntStats(10, 100));
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong(15);
@@ -1486,7 +1493,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createIntStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong(15);
@@ -1505,7 +1512,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createIntStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createIntStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong(19);
@@ -1523,7 +1530,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addDouble(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDoubleStats(10.0, 100.0));
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addDouble(15.0);
@@ -1538,7 +1545,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addDouble(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDoubleStats(10.0, 100.0));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addDouble(15.0);
@@ -1557,7 +1564,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addDouble(i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDoubleStats(10.0, 100.0));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDoubleStats(10.0, 100.0));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addDouble(19.0);
@@ -1575,7 +1582,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString("str_" + i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createStringStats("str_10", "str_200"));
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addString("str_15");
@@ -1590,7 +1597,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString("str_" + i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createStringStats("str_10", "str_200"));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addString("str_15");
@@ -1609,7 +1616,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString("str_" + i);
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createStringStats("str_10", "str_200"));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createStringStats("str_10", "str_200"));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addString("str_19");
@@ -1628,7 +1635,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong((new DateWritable(i)).getDays());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100));
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong((new DateWritable(15)).getDays());
@@ -1644,7 +1651,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong((new DateWritable(i)).getDays());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong((new DateWritable(15)).getDays());
@@ -1663,7 +1670,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addLong((new DateWritable(i)).getDays());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDateStats(10, 100));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDateStats(10, 100));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addLong((new DateWritable(19)).getDays());
@@ -1683,7 +1690,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString(HiveDecimal.create(i).toString());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDecimalStats("10", "200"));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addString(HiveDecimal.create(15).toString());
@@ -1702,7 +1709,7 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString(HiveDecimal.create(i).toString());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200"));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDecimalStats("10", "200"));
     assertEquals(TruthValue.NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
     bf.addString(HiveDecimal.create(19).toString());
@@ -1725,11 +1732,11 @@ public class TestRecordReaderImpl {
     for (int i = 20; i < 1000; i++) {
       bf.addString(HiveDecimal.create(i).toString());
     }
-    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", false));
+    ColumnStatistics cs = ColumnStatisticsImpl.deserialize(null, createDecimalStats("10", "200", false));
     // hasNull is false, so bloom filter should return NO
     assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 
-    cs = ColumnStatisticsImpl.deserialize(createDecimalStats("10", "200", true));
+    cs = ColumnStatisticsImpl.deserialize(null, createDecimalStats("10", "200", true));
     // hasNull is true, so bloom filter should return YES_NO_NULL
     assertEquals(TruthValue.YES_NO_NULL, RecordReaderImpl.evaluatePredicate(cs, pred, bf));
 

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/tools/src/java/org/apache/orc/tools/FileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index dc89a31..3eae30d 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -302,7 +302,7 @@ public final class FileDump {
     if (reader == null) {
       return;
     }
-
+    TypeDescription schema = reader.getSchema();
     System.out.println("Structure for " + filename);
     System.out.println("File Version: " + reader.getFileVersion().getName() +
         " with " + reader.getWriterVersion());
@@ -384,7 +384,8 @@ public final class FileDump {
             .readRowIndex(stripeIx, null, null, null, sargColumns);
         for (int col : rowIndexCols) {
           StringBuilder buf = new StringBuilder();
-          String rowIdxString = getFormattedRowIndices(col, indices.getRowGroupIndex());
+          String rowIdxString = getFormattedRowIndices(col,
+              indices.getRowGroupIndex(), schema);
           buf.append(rowIdxString);
           String bloomFilString = getFormattedBloomFilters(col, indices,
               reader.getWriterVersion(),
@@ -662,7 +663,8 @@ public final class FileDump {
   }
 
   private static String getFormattedRowIndices(int col,
-                                               OrcProto.RowIndex[] rowGroupIndex) {
+                                               OrcProto.RowIndex[] rowGroupIndex,
+                                               TypeDescription schema) {
     StringBuilder buf = new StringBuilder();
     OrcProto.RowIndex index;
     buf.append("    Row group indices for column ").append(col).append(":");
@@ -672,6 +674,7 @@ public final class FileDump {
       return buf.toString();
     }
 
+    TypeDescription colSchema = schema.findSubtype(col);
     for (int entryIx = 0; entryIx < index.getEntryCount(); ++entryIx) {
       buf.append("\n      Entry ").append(entryIx).append(": ");
       OrcProto.RowIndexEntry entry = index.getEntry(entryIx);
@@ -683,7 +686,8 @@ public final class FileDump {
       if (colStats == null) {
         buf.append("no stats at ");
       } else {
-        ColumnStatistics cs = ColumnStatisticsImpl.deserialize(colStats);
+        ColumnStatistics cs =
+            ColumnStatisticsImpl.deserialize(colSchema, colStats);
         buf.append(cs.toString());
       }
       buf.append(" positions: ");

http://git-wip-us.apache.org/repos/asf/orc/blob/c4c7b28f/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
----------------------------------------------------------------------
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index e5f3b94..c02ff20 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -190,7 +190,8 @@ public class JsonFileDump {
             for (int col : rowIndexCols) {
               writer.object();
               writer.key("columnId").value(col);
-              writeRowGroupIndexes(writer, col, indices.getRowGroupIndex());
+              writeRowGroupIndexes(writer, col, indices.getRowGroupIndex(),
+                  reader.getSchema());
               writeBloomFilterIndexes(writer, col, indices,
                   reader.getWriterVersion(),
                   reader.getSchema().findSubtype(col).getCategory(),
@@ -398,7 +399,7 @@ public class JsonFileDump {
   }
 
   private static void writeRowGroupIndexes(JSONWriter writer, int col,
-      OrcProto.RowIndex[] rowGroupIndex)
+      OrcProto.RowIndex[] rowGroupIndex, TypeDescription schema)
       throws JSONException {
 
     OrcProto.RowIndex index;
@@ -416,7 +417,8 @@ public class JsonFileDump {
         continue;
       }
       OrcProto.ColumnStatistics colStats = entry.getStatistics();
-      writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(colStats));
+      writeColumnStatistics(writer, ColumnStatisticsImpl.deserialize(
+          schema.findSubtype(col), colStats));
       writer.key("positions").array();
       for (int posIx = 0; posIx < entry.getPositionsCount(); ++posIx) {
         writer.value(entry.getPositions(posIx));


Mime
View raw message