datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mha...@apache.org
Subject [3/3] incubator-datafu git commit: Minor improvements to BagJoin constructors, add an inner join test for BagJoin
Date Fri, 21 Nov 2014 04:42:56 GMT
Minor improvements to BagJoin constructors, add an inner join test for BagJoin


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/fe9b86ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/fe9b86ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/fe9b86ea

Branch: refs/heads/master
Commit: fe9b86eaf0a5f8c23c21de10068885dda4fc538e
Parents: 89fe0be
Author: Matthew Hayes <matthew.terence.hayes@gmail.com>
Authored: Thu Nov 20 20:41:11 2014 -0800
Committer: Matthew Hayes <matthew.terence.hayes@gmail.com>
Committed: Thu Nov 20 20:41:11 2014 -0800

----------------------------------------------------------------------
 .../src/main/java/datafu/pig/bags/BagJoin.java  | 11 +++---
 .../java/datafu/test/pig/bags/BagTests.java     | 36 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/fe9b86ea/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java b/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
index 6d08b0f..2214dd4 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
@@ -36,15 +36,15 @@ import java.util.*;
  * <p>
  * The format for invocation is BagJoin(bag, 'key',....).
  * This UDF expects that all bags are non-null and that there is a corresponding key for
each bag.  
- * The <em>key</em> that is expected is the alias of the key inside of the preceding
bag.  By default, an inner
- * join is performed.  You can also perform 'left' and 'full' outer joins by specifying 'left'
or 'full' in the
+ * The <em>key</em> that is expected is the alias of the key inside of the preceding
bag.  By default, an 'inner'
+ * join is performed.  You can also perform 'left' or 'full' outer joins by specifying 'left'
or 'full' in the
  * definition.
  * </p> 
  * 
  * <p>
  * Example:
  * <code>
- * define BagJoin datafu.pig.bags.BagJoin();
+ * define BagJoin datafu.pig.bags.BagJoin(); -- inner join
  * 
  * -- describe data: 
  * -- data: {bag1: {(key1: chararray,value1: chararray)},bag2: {(key2: chararray,value2:
int)}} 
@@ -61,7 +61,6 @@ import java.util.*;
  */
 public class BagJoin extends AliasableEvalFunc<DataBag>
 {
-
   private static final String BAG_NAMES_PROPERTY = "BagFullOuterJoin_BAG_NAMES";
   private static final String BAG_NAME_TO_JOIN_PREFIX_PROPERTY = "BagFullOuterJoin_BAG_NAME_TO_JOIN_PREFIX";
   private static final String BAG_NAME_TO_SIZE_PROPERTY = "BagFullOuterJoin_BAG_NAME_TO_SIZE_PROPERTY";
@@ -76,7 +75,7 @@ public class BagJoin extends AliasableEvalFunc<DataBag>
   public enum JoinType { INNER,LEFT,FULL }
 
   public BagJoin() {
-    this.joinType = JoinType.INNER;
+    this("inner");
   }
 
   public BagJoin(String joinType) {
@@ -84,6 +83,8 @@ public class BagJoin extends AliasableEvalFunc<DataBag>
           this.joinType = JoinType.LEFT;
       } else if ("full".equals(joinType.toLowerCase())) {
           this.joinType = JoinType.FULL;
+      } else if ("inner".equals(joinType.toLowerCase())) {
+          this.joinType = JoinType.INNER;
     } else {
           throw new IllegalArgumentException("Invalid constructor argument.  Valid values
are 'left' or 'full', found: " + joinType);
     }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/fe9b86ea/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
index 57917e3..0eb07c7 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
@@ -1194,7 +1194,43 @@ public class BagTests extends PigTests
                 "(1,{(K1,A1,K1,A2,K1,A3),(K2,B1,K2,B2,,),(K2,B1,K2,B22,,),(K3,C1,,,K3,C3),(,,,,K4,D3)},{(K1,A1,K1,A3,K1,A2),(K2,B1,,,K2,B2),(K2,B1,,,K2,B22),(K3,C1,K3,C3,,),(,,K4,D3,,)})");
     }
     
+    /**
+
+
+    define BagInnerJoin datafu.pig.bags.BagJoin();
 
+    data = LOAD 'input' AS (outer_key:chararray, bag1:bag{T:tuple(k:chararray,v:chararray)},
bag2:bag{T:tuple(k:chararray,v:chararray)}, bag3:bag{T:tuple(k3:chararray,v3:chararray)});
+    describe data;
+
+    data2 = FOREACH data GENERATE
+    outer_key,
+    BagInnerJoin(bag1, 'k', bag2, 'k', bag3, 'k3') as joined1,
+    BagInnerJoin(bag1, 'k', bag3, 'k3', bag2, 'k') as joined2; --this will break without
UDF signature and pig < 0.11
+    describe data2;
+
+    STORE data2 INTO 'output';
+
+    */
+   @Multiline
+   private String bagJoinInnerTest;
+
+   @Test
+   public void bagJoinInnerTest() throws Exception {
+       PigTest test = createPigTestFromString(bagJoinInnerTest);
+
+       writeLinesToFile("input",
+               "1\t{(K1,A1),(K2,B1),(K3,C1)}\t{(K1,A2),(K2,B2),(K2,B22)}\t{(K1,A3),(K3,C3),(K4,D3)}");
+
+       try {
+           test.runScript();
+       } catch (Exception e) {
+           e.printStackTrace();
+           throw e;
+       }
+
+       assertOutput(test, "data2",
+               "(1,{(K1,A1,K1,A2,K1,A3)},{(K1,A1,K1,A3,K1,A2)})");
+   }
 
   /**
 


Mime
View raw message