orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-148. Prevent massive logging of exceptions when applying SARGs.
Date Tue, 25 Apr 2017 23:14:39 GMT
Repository: orc
Updated Branches:
  refs/heads/branch-1.3 9c6734189 -> 443c4c87c


ORC-148. Prevent massive logging of exceptions when applying SARGs.

Fixes #99

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/443c4c87
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/443c4c87
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/443c4c87

Branch: refs/heads/branch-1.3
Commit: 443c4c87c200a7219cd83508151416fb53f0ef06
Parents: 9c67341
Author: Owen O'Malley <omalley@apache.org>
Authored: Tue Mar 7 11:17:55 2017 -0800
Committer: Owen O'Malley <omalley@apache.org>
Committed: Tue Apr 25 16:10:33 2017 -0700

----------------------------------------------------------------------
 .../org/apache/orc/impl/RecordReaderImpl.java   | 115 ++++++++-----
 .../apache/orc/impl/TestRecordReaderImpl.java   | 164 +++++++++++++++++--
 2 files changed, 221 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/443c4c87/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index b73ca1e..8d9d644 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -99,7 +99,7 @@ public class RecordReaderImpl implements RecordReader {
   private final DataReader dataReader;
   private final boolean ignoreNonUtf8BloomFilter;
   private final OrcFile.WriterVersion writerVersion;
-  
+
   /**
    * Given a list of column names, find the given column and return the index.
    *
@@ -409,9 +409,9 @@ public class RecordReaderImpl implements RecordReader {
     // disable PPD for timestamp for all old files
     if (type.equals(TypeDescription.Category.TIMESTAMP)) {
       if (!writerVersion.includes(OrcFile.WriterVersion.ORC_135)) {
-        LOG.warn("Not using predication pushdown on {} because it doesn't " +
-                "include ORC-135. Writer version: {}", predicate.getColumnName(),
-            writerVersion);
+        LOG.debug("Not using predication pushdown on {} because it doesn't " +
+                  "include ORC-135. Writer version: {}",
+            predicate.getColumnName(), writerVersion);
         return TruthValue.YES_NO_NULL;
       }
       if (predicate.getType() != PredicateLeaf.Type.TIMESTAMP &&
@@ -455,36 +455,17 @@ public class RecordReaderImpl implements RecordReader {
 
     TruthValue result;
     Object baseObj = predicate.getLiteral();
-    try {
-      // Predicate object and stats objects are converted to the type of the predicate object.
-      Object minValue = getBaseObjectForComparison(predicate.getType(), min);
-      Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
-      Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
-
-      result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
-      if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
-        result = evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull);
-      }
-      // in case failed conversion, return the default YES_NO_NULL truth value
-    } catch (Exception e) {
-      if (LOG.isWarnEnabled()) {
-        final String statsType = min.getClass().getSimpleName();
-        final String predicateType = baseObj == null ? "null" : baseObj.getClass().getSimpleName();
-        final String reason = e.getClass().getSimpleName() + " when evaluating predicate."
+
-            " Skipping ORC PPD." +
-            " Exception: " + e.getMessage() +
-            " StatsType: " + statsType +
-            " PredicateType: " + predicateType;
-        LOG.warn(reason);
-        LOG.debug(reason, e);
-      }
-      if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull)
{
-        result = TruthValue.YES_NO;
-      } else {
-        result = TruthValue.YES_NO_NULL;
-      }
+    // Predicate object and stats objects are converted to the type of the predicate object.
+    Object minValue = getBaseObjectForComparison(predicate.getType(), min);
+    Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
+    Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
+
+    result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
+    if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
+      return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull);
+    } else {
+      return result;
     }
-    return result;
   }
 
   private static boolean shouldEvaluateBloomFilter(PredicateLeaf predicate,
@@ -664,6 +645,16 @@ public class RecordReaderImpl implements RecordReader {
     return result;
   }
 
+  /**
+   * An exception for when we can't cast things appropriately
+   */
+  static class SargCastException extends IllegalArgumentException {
+
+    public SargCastException(String string) {
+      super(string);
+    }
+  }
+
   private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) {
     if (obj == null) {
       return null;
@@ -762,7 +753,7 @@ public class RecordReaderImpl implements RecordReader {
         break;
     }
 
-    throw new IllegalArgumentException(String.format(
+    throw new SargCastException(String.format(
         "ORC SARGS could not convert from %s to %s", obj.getClass()
             .getSimpleName(), type));
   }
@@ -779,6 +770,7 @@ public class RecordReaderImpl implements RecordReader {
     // same as the above array, but indices are set to true
     private final boolean[] sargColumns;
     private SchemaEvolution evolution;
+    private final long[] exceptionCount;
 
     public SargApplier(SearchArgument sarg,
                        long rowIndexStride,
@@ -801,6 +793,7 @@ public class RecordReaderImpl implements RecordReader {
         }
       }
       this.evolution = evolution;
+      exceptionCount = new long[sargLeaves.size()];
     }
 
     /**
@@ -820,11 +813,18 @@ public class RecordReaderImpl implements RecordReader {
       int groupsInStripe = (int) ((rowsInStripe + rowIndexStride - 1) / rowIndexStride);
       boolean[] result = new boolean[groupsInStripe]; // TODO: avoid alloc?
       TruthValue[] leafValues = new TruthValue[sargLeaves.size()];
-      boolean hasSelected = false, hasSkipped = false;
+      boolean hasSelected = false;
+      boolean hasSkipped = false;
+      TruthValue[] exceptionAnswer = new TruthValue[leafValues.length];
       for (int rowGroup = 0; rowGroup < result.length; ++rowGroup) {
         for (int pred = 0; pred < leafValues.length; ++pred) {
           int columnIx = filterColumns[pred];
-          if (columnIx != -1) {
+          if (columnIx == -1) {
+            // the column is a virtual column
+            leafValues[pred] = TruthValue.YES_NO_NULL;
+          } else if (exceptionAnswer[pred] != null) {
+            leafValues[pred] = exceptionAnswer[pred];
+          } else {
             if (indexes[columnIx] == null) {
               throw new AssertionError("Index is not populated for " + columnIx);
             }
@@ -840,9 +840,35 @@ public class RecordReaderImpl implements RecordReader {
               bf = bloomFilterIndices[columnIx].getBloomFilter(rowGroup);
             }
             if (evolution != null && evolution.isPPDSafeConversion(columnIx)) {
-              leafValues[pred] = evaluatePredicateProto(stats,
-                  sargLeaves.get(pred), bfk, encodings.get(columnIx), bf, writerVersion,
-                  evolution.getFileSchema().findSubtype(columnIx).getCategory());
+              PredicateLeaf predicate = sargLeaves.get(pred);
+              try {
+                leafValues[pred] = evaluatePredicateProto(stats,
+                    predicate, bfk, encodings.get(columnIx), bf,
+                    writerVersion, evolution.getFileSchema().
+                        findSubtype(columnIx).getCategory());
+              } catch (Exception e) {
+                exceptionCount[pred] += 1;
+                if (e instanceof SargCastException) {
+                  LOG.info("Skipping ORC PPD - " + e.getMessage() + " on "
+                      + predicate);
+                } else {
+                  if (LOG.isWarnEnabled()) {
+                    final String reason = e.getClass().getSimpleName() + " when evaluating
predicate." +
+                        " Skipping ORC PPD." +
+                        " Stats: " + stats +
+                        " Predicate: " + predicate;
+                    LOG.warn(reason, e);
+                  }
+                }
+                boolean hasNoNull = stats.hasHasNull() && !stats.getHasNull();
+                if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+                    || hasNoNull) {
+                  exceptionAnswer[pred] = TruthValue.YES_NO;
+                } else {
+                  exceptionAnswer[pred] = TruthValue.YES_NO_NULL;
+                }
+                leafValues[pred] = exceptionAnswer[pred];
+              }
             } else {
               leafValues[pred] = TruthValue.YES_NO_NULL;
             }
@@ -850,9 +876,6 @@ public class RecordReaderImpl implements RecordReader {
               LOG.trace("Stats = " + stats);
               LOG.trace("Setting " + sargLeaves.get(pred) + " to " + leafValues[pred]);
             }
-          } else {
-            // the column is a virtual column
-            leafValues[pred] = TruthValue.YES_NO_NULL;
           }
         }
         result[rowGroup] = sarg.evaluate(leafValues).isNeeded();
@@ -867,6 +890,14 @@ public class RecordReaderImpl implements RecordReader {
 
       return hasSkipped ? ((hasSelected || !returnNone) ? result : READ_NO_RGS) : READ_ALL_RGS;
     }
+
+    /**
+     * Get the count of exceptions for testing.
+     * @return
+     */
+    long[] getExceptionCount() {
+      return exceptionCount;
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/443c4c87/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 354cb89..fc00699 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -52,6 +52,8 @@ import org.apache.hadoop.fs.PositionedReadable;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl;
 import org.apache.orc.util.BloomFilter;
 import org.apache.orc.DataReader;
@@ -509,8 +511,12 @@ public class TestRecordReaderImpl {
     // Integer stats will not be converted date because of days/seconds/millis ambiguity
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createIntStats(10, 100), pred));
+    try {
+      evaluateInteger(createIntStats(10, 100), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Long to DATE", ia.getMessage());
+    }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
@@ -519,8 +525,12 @@ public class TestRecordReaderImpl {
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createIntStats(10, 100), pred));
+    try {
+      evaluateInteger(createIntStats(10, 100), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Long to TIMESTAMP", ia.getMessage());
+    }
   }
 
   @Test
@@ -544,8 +554,12 @@ public class TestRecordReaderImpl {
     // Double is not converted to date type because of days/seconds/millis ambiguity
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateDouble(createDoubleStats(10.0, 100.0), pred));
+    try {
+      evaluateDouble(createDoubleStats(10.0, 100.0), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Double to DATE", ia.getMessage());
+    }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
@@ -593,8 +607,12 @@ public class TestRecordReaderImpl {
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createStringStats("10", "1000"), pred));
+    try {
+      evaluateInteger(createStringStats("10", "1000"), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from String to TIMESTAMP", ia.getMessage());
+    }
   }
 
   @Test
@@ -602,14 +620,22 @@ public class TestRecordReaderImpl {
     PredicateLeaf pred = createPredicateLeaf(
         PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.LONG, "x", 15L, null);
     // Date to Integer conversion is not possible.
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createDateStats(10, 100), pred));
+    try {
+      evaluateInteger(createDateStats(10, 100), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Date to LONG", ia.getMessage());
+    }
 
     // Date to Float conversion is also not possible.
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.FLOAT, "x", 15.0, null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createDateStats(10, 100), pred));
+    try {
+      evaluateInteger(createDateStats(10, 100), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Date to FLOAT", ia.getMessage());
+    }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.STRING, "x", "15", null);
@@ -654,8 +680,12 @@ public class TestRecordReaderImpl {
     // Date to Decimal conversion is also not possible.
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createDateStats(10, 100), pred));
+    try {
+      evaluateInteger(createDateStats(10, 100), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from Date to DECIMAL", ia.getMessage());
+    }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null);
@@ -689,8 +719,12 @@ public class TestRecordReaderImpl {
     // Decimal to Date not possible.
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null);
-    assertEquals(TruthValue.YES_NO,
-        evaluateInteger(createDecimalStats("10.0", "100.0"), pred));
+    try {
+      evaluateInteger(createDecimalStats("10.0", "100.0"), pred);
+      fail("evaluate should throw");
+    } catch (RecordReaderImpl.SargCastException ia) {
+      assertEquals("ORC SARGS could not convert from HiveDecimal to DATE", ia.getMessage());
+    }
 
     pred = createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
         PredicateLeaf.Type.DECIMAL, "x", new HiveDecimalWritable("15"), null);
@@ -1908,4 +1942,102 @@ public class TestRecordReaderImpl {
     assertEquals("range start: 0 end: 5000", ranges.toString());
     assertEquals(null, ranges.next);
   }
+
+  static OrcProto.RowIndexEntry createIndexEntry(Long min, Long max) {
+    return OrcProto.RowIndexEntry.newBuilder()
+             .setStatistics(createIntStats(min, max)).build();
+  }
+
+  @Test
+  public void testPickRowGroups() throws Exception {
+    Configuration conf = new Configuration();
+    TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
+    SchemaEvolution evolution = new SchemaEvolution(schema, schema,
+        new Reader.Options(conf));
+    SearchArgument sarg =
+        SearchArgumentFactory.newBuilder()
+            .startAnd()
+            .equals("x", PredicateLeaf.Type.LONG, 100L)
+            .equals("y", PredicateLeaf.Type.LONG, 10L)
+            .end().build();
+    RecordReaderImpl.SargApplier applier =
+        new RecordReaderImpl.SargApplier(sarg, 1000, evolution,
+            OrcFile.WriterVersion.ORC_135);
+    OrcProto.StripeInformation stripe =
+        OrcProto.StripeInformation.newBuilder().setNumberOfRows(4000).build();
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[3];
+    indexes[1] = OrcProto.RowIndex.newBuilder()
+        .addEntry(createIndexEntry(0L, 10L))
+        .addEntry(createIndexEntry(100L, 200L))
+        .addEntry(createIndexEntry(300L, 500L))
+        .addEntry(createIndexEntry(100L, 100L))
+        .build();
+    indexes[2] = OrcProto.RowIndex.newBuilder()
+        .addEntry(createIndexEntry(0L, 9L))
+        .addEntry(createIndexEntry(11L, 20L))
+        .addEntry(createIndexEntry(10L, 10L))
+        .addEntry(createIndexEntry(0L, 100L))
+        .build();
+    List<OrcProto.ColumnEncoding> encodings = new ArrayList<>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build());
+    boolean[] rows = applier.pickRowGroups(new ReaderImpl.StripeInformationImpl(stripe),
+        indexes, null, encodings, null, false);
+    assertEquals(4, rows.length);
+    assertEquals(false, rows[0]);
+    assertEquals(false, rows[1]);
+    assertEquals(false, rows[2]);
+    assertEquals(true, rows[3]);
+    assertEquals(0, applier.getExceptionCount()[0]);
+    assertEquals(0, applier.getExceptionCount()[1]);
+  }
+
+  @Test
+  public void testPickRowGroupsError() throws Exception {
+    Configuration conf = new Configuration();
+    TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
+    SchemaEvolution evolution = new SchemaEvolution(schema, schema,
+        new Reader.Options(conf));
+    SearchArgument sarg =
+        SearchArgumentFactory.newBuilder()
+            .startAnd()
+              .equals("x", PredicateLeaf.Type.DATE, Date.valueOf("2017-01-02"))
+              .equals("y", PredicateLeaf.Type.LONG, 10L)
+            .end().build();
+    RecordReaderImpl.SargApplier applier =
+        new RecordReaderImpl.SargApplier(sarg, 1000, evolution,
+            OrcFile.WriterVersion.ORC_135);
+    OrcProto.StripeInformation stripe =
+        OrcProto.StripeInformation.newBuilder().setNumberOfRows(3000).build();
+    OrcProto.RowIndex[] indexes = new OrcProto.RowIndex[3];
+    indexes[1] = OrcProto.RowIndex.newBuilder()
+        .addEntry(createIndexEntry(0L, 10L))
+        .addEntry(createIndexEntry(10L, 20L))
+        .addEntry(createIndexEntry(20L, 30L))
+        .build();
+    indexes[2] = OrcProto.RowIndex.newBuilder()
+        .addEntry(createIndexEntry(0L, 9L))
+        .addEntry(createIndexEntry(10L, 20L))
+        .addEntry(createIndexEntry(0L, 30L))
+        .build();
+    List<OrcProto.ColumnEncoding> encodings = new ArrayList<>();
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build());
+    encodings.add(OrcProto.ColumnEncoding.newBuilder()
+        .setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2).build());
+    boolean[] rows = applier.pickRowGroups(new ReaderImpl.StripeInformationImpl(stripe),
+        indexes, null, encodings, null, false);
+    assertEquals(3, rows.length);
+    assertEquals(false, rows[0]);
+    assertEquals(true, rows[1]);
+    assertEquals(true, rows[2]);
+    assertEquals(1, applier.getExceptionCount()[0]);
+    assertEquals(0, applier.getExceptionCount()[1]);
+  }
 }


Mime
View raw message