Minor improvements to BagJoin constructors, add an inner join test for BagJoin
Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/fe9b86ea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/fe9b86ea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/fe9b86ea
Branch: refs/heads/master
Commit: fe9b86eaf0a5f8c23c21de10068885dda4fc538e
Parents: 89fe0be
Author: Matthew Hayes <matthew.terence.hayes@gmail.com>
Authored: Thu Nov 20 20:41:11 2014 -0800
Committer: Matthew Hayes <matthew.terence.hayes@gmail.com>
Committed: Thu Nov 20 20:41:11 2014 -0800
----------------------------------------------------------------------
.../src/main/java/datafu/pig/bags/BagJoin.java | 11 +++---
.../java/datafu/test/pig/bags/BagTests.java | 36 ++++++++++++++++++++
2 files changed, 42 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/fe9b86ea/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java b/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
index 6d08b0f..2214dd4 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/BagJoin.java
@@ -36,15 +36,15 @@ import java.util.*;
* <p>
* The format for invocation is BagJoin(bag, 'key',....).
* This UDF expects that all bags are non-null and that there is a corresponding key for
each bag.
- * The <em>key</em> that is expected is the alias of the key inside of the preceding
bag. By default, an inner
- * join is performed. You can also perform 'left' and 'full' outer joins by specifying 'left'
or 'full' in the
+ * The <em>key</em> that is expected is the alias of the key inside of the preceding
bag. By default, an 'inner'
+ * join is performed. You can also perform 'left' or 'full' outer joins by specifying 'left'
or 'full' in the
* definition.
* </p>
*
* <p>
* Example:
* <code>
- * define BagJoin datafu.pig.bags.BagJoin();
+ * define BagJoin datafu.pig.bags.BagJoin(); -- inner join
*
* -- describe data:
* -- data: {bag1: {(key1: chararray,value1: chararray)},bag2: {(key2: chararray,value2:
int)}}
@@ -61,7 +61,6 @@ import java.util.*;
*/
public class BagJoin extends AliasableEvalFunc<DataBag>
{
-
private static final String BAG_NAMES_PROPERTY = "BagFullOuterJoin_BAG_NAMES";
private static final String BAG_NAME_TO_JOIN_PREFIX_PROPERTY = "BagFullOuterJoin_BAG_NAME_TO_JOIN_PREFIX";
private static final String BAG_NAME_TO_SIZE_PROPERTY = "BagFullOuterJoin_BAG_NAME_TO_SIZE_PROPERTY";
@@ -76,7 +75,7 @@ public class BagJoin extends AliasableEvalFunc<DataBag>
public enum JoinType { INNER,LEFT,FULL }
public BagJoin() {
- this.joinType = JoinType.INNER;
+ this("inner");
}
public BagJoin(String joinType) {
@@ -84,6 +83,8 @@ public class BagJoin extends AliasableEvalFunc<DataBag>
this.joinType = JoinType.LEFT;
} else if ("full".equals(joinType.toLowerCase())) {
this.joinType = JoinType.FULL;
+ } else if ("inner".equals(joinType.toLowerCase())) {
+ this.joinType = JoinType.INNER;
} else {
throw new IllegalArgumentException("Invalid constructor argument. Valid values
are 'left' or 'full', found: " + joinType);
}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/fe9b86ea/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
index 57917e3..0eb07c7 100644
--- a/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
+++ b/datafu-pig/src/test/java/datafu/test/pig/bags/BagTests.java
@@ -1194,7 +1194,43 @@ public class BagTests extends PigTests
"(1,{(K1,A1,K1,A2,K1,A3),(K2,B1,K2,B2,,),(K2,B1,K2,B22,,),(K3,C1,,,K3,C3),(,,,,K4,D3)},{(K1,A1,K1,A3,K1,A2),(K2,B1,,,K2,B2),(K2,B1,,,K2,B22),(K3,C1,K3,C3,,),(,,K4,D3,,)})");
}
+ /**
+
+
+ define BagInnerJoin datafu.pig.bags.BagJoin();
+ data = LOAD 'input' AS (outer_key:chararray, bag1:bag{T:tuple(k:chararray,v:chararray)},
bag2:bag{T:tuple(k:chararray,v:chararray)}, bag3:bag{T:tuple(k3:chararray,v3:chararray)});
+ describe data;
+
+ data2 = FOREACH data GENERATE
+ outer_key,
+ BagInnerJoin(bag1, 'k', bag2, 'k', bag3, 'k3') as joined1,
+ BagInnerJoin(bag1, 'k', bag3, 'k3', bag2, 'k') as joined2; --this will break without
UDF signature and pig < 0.11
+ describe data2;
+
+ STORE data2 INTO 'output';
+
+ */
+ @Multiline
+ private String bagJoinInnerTest;
+
+ @Test
+ public void bagJoinInnerTest() throws Exception {
+ PigTest test = createPigTestFromString(bagJoinInnerTest);
+
+ writeLinesToFile("input",
+ "1\t{(K1,A1),(K2,B1),(K3,C1)}\t{(K1,A2),(K2,B2),(K2,B22)}\t{(K1,A3),(K3,C3),(K4,D3)}");
+
+ try {
+ test.runScript();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+
+ assertOutput(test, "data2",
+ "(1,{(K1,A1,K1,A2,K1,A3)},{(K1,A1,K1,A3,K1,A2)})");
+ }
/**
|