pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From knogu...@apache.org
Subject svn commit: r1832947 - in /pig/branches/branch-0.17: CHANGES.txt src/org/apache/pig/builtin/PigStorage.java test/org/apache/pig/test/TestPigStorage.java
Date Tue, 05 Jun 2018 14:30:55 GMT
Author: knoguchi
Date: Tue Jun  5 14:30:55 2018
New Revision: 1832947

URL: http://svn.apache.org/viewvc?rev=1832947&view=rev
Log:
PIG-5341 PigStorage with -tagFile/-tagPath produces incorrect results with column pruning
(knoguchi)

Modified:
    pig/branches/branch-0.17/CHANGES.txt
    pig/branches/branch-0.17/src/org/apache/pig/builtin/PigStorage.java
    pig/branches/branch-0.17/test/org/apache/pig/test/TestPigStorage.java

Modified: pig/branches/branch-0.17/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/CHANGES.txt?rev=1832947&r1=1832946&r2=1832947&view=diff
==============================================================================
--- pig/branches/branch-0.17/CHANGES.txt (original)
+++ pig/branches/branch-0.17/CHANGES.txt Tue Jun  5 14:30:55 2018
@@ -28,6 +28,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-5341 PigStorage with -tagFile/-tagPath produces incorrect results with column pruning
(knoguchi)
+
 PIG-5299: PartitionFilterOptimizer failing at compile time (knoguchi)
 
 PIG-5254: Hit Ctrl-D to quit grunt shell fail (wjqian via daijy)

Modified: pig/branches/branch-0.17/src/org/apache/pig/builtin/PigStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/src/org/apache/pig/builtin/PigStorage.java?rev=1832947&r1=1832946&r2=1832947&view=diff
==============================================================================
--- pig/branches/branch-0.17/src/org/apache/pig/builtin/PigStorage.java (original)
+++ pig/branches/branch-0.17/src/org/apache/pig/builtin/PigStorage.java Tue Jun  5 14:30:55
2018
@@ -252,10 +252,10 @@ LoadPushDown, LoadMetadata, StoreMetadat
             }
             mRequiredColumnsInitialized = true;
         }
-        //Prepend input source path if source tagging is enabled
-        if(tagFile) {
+        // Prepend input source path if source tagging is enabled
+        if (tagFile && (mRequiredColumns == null || mRequiredColumns[0])) {
             mProtoTuple.add(new DataByteArray(sourcePath.getName()));
-        } else if (tagPath) {
+        } else if (tagPath && (mRequiredColumns == null || mRequiredColumns[0]))
{
             mProtoTuple.add(new DataByteArray(sourcePath.toString()));
         }
 
@@ -268,7 +268,9 @@ LoadPushDown, LoadMetadata, StoreMetadat
             byte[] buf = value.getBytes();
             int len = value.getLength();
             int start = 0;
-            int fieldID = 0;
+            // If tagging is enabled, mRequiredColumns is created based on the
+            // schema that includes tagfile/path as first index(0)
+            int fieldID = tagFile || tagPath ? 1 : 0;
             for (int i = 0; i < len; i++) {
                 if (buf[i] == fieldDel) {
                     if (mRequiredColumns==null || (mRequiredColumns.length>fieldID &&
mRequiredColumns[fieldID]))

Modified: pig/branches/branch-0.17/test/org/apache/pig/test/TestPigStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.17/test/org/apache/pig/test/TestPigStorage.java?rev=1832947&r1=1832946&r2=1832947&view=diff
==============================================================================
--- pig/branches/branch-0.17/test/org/apache/pig/test/TestPigStorage.java (original)
+++ pig/branches/branch-0.17/test/org/apache/pig/test/TestPigStorage.java Tue Jun  5 14:30:55
2018
@@ -460,14 +460,54 @@ public class TestPigStorage  {
         pig.registerQuery("Events = LOAD '" + datadir + "originput2' USING PigStorage('\\t',
'-schema');");
         pig.registerQuery("EventsName = foreach Events generate name;");
         Iterator<Tuple> sessions = pig.openIterator("EventsName");
-        sessions.next().toString().equals("(1)");
-        sessions.next().toString().equals("(2)");
-        sessions.next().toString().equals("(4)");
-        sessions.next().toString().equals("(2)");
-        sessions.next().toString().equals("(4)");
-        sessions.next().toString().equals("(1)");
-        sessions.next().toString().equals("()");
-        Assert.assertFalse(sessions.hasNext());
+        List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
+                new String[] {
+                "('peter')", "('samir')", "('michael')", "('peter')", "('peter')", "('samir')",
"('john')"
+                });
+        Util.checkQueryOutputs(sessions, expectedResults);
+    }
+
+    @Test
+    public void testColumnPruneWithSchemaAndTagPath() throws IOException {
+        Util.createLocalInputFile(datadir + "originput2",
+                new String[] {"peter\t1", "samir\t2", "michael\t4",
+                "peter\t2", "peter\t4", "samir\t1", "john\t"
+        });
+        Util.createLocalInputFile(datadir + ".pig_schema",
+                new String[] {
+                "{\"fields\":[{\"name\":\"name\",\"type\":55,\"schema\":null," +
+                "\"description\":\"autogenerated from Pig Field Schema\"}," +
+                "{\"name\":\"val\",\"type\":10,\"schema\":null,\"description\":"+
+                "\"autogenerated from Pig Field Schema\"}],\"version\":0," +
+                "\"sortKeys\":[],\"sortKeyOrders\":[]}"
+        });
+        pig.registerQuery("Events = LOAD '" + datadir + "originput2' USING PigStorage('\\t',
'-schema -tagPath');");
+        pig.registerQuery("EventsName = foreach Events generate val;");
+        Iterator<Tuple> sessions = pig.openIterator("EventsName");
+        List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
+                new String[] {
+                "(1)", "(2)", "(4)", "(2)", "(4)", "(1)", "(null)"
+                });
+        Util.checkQueryOutputs(sessions, expectedResults);
+    }
+
+    @Test
+    public void testColumnPruneWithTagFile() throws IOException {
+        // Wanted two tests.  One with tagfile being pruned and another not being
+        // pruned.  Here, testing the latter, and testing the pruned version
+        // with '-schema -tagPath' testing above (testColumnPruneWithSchemaAndTagPath)
+        Util.createLocalInputFile(datadir + "originput2",
+                new String[] {"peter\t1", "samir\t2", "michael\t4",
+                "peter\t2", "peter\t4", "samir\t1", "john\t"
+        });
+        pig.registerQuery("Events = LOAD '" + datadir + "originput2' USING PigStorage('\\t',
'-tagFile') as (filename:chararray, name:chararray,val:int);");
+        pig.registerQuery("EventsName = foreach Events generate filename, val;");
+        Iterator<Tuple> sessions = pig.openIterator("EventsName");
+        List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
+                new String[] {
+                "('originput2',1)", "('originput2',2)", "('originput2',4)", "('originput2',2)",
"('originput2',4)", "('originput2',1)", "('originput2',null)"
+                });
+        Util.checkQueryOutputs(sessions, expectedResults);
     }
 
     @Test



Mime
View raw message