datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mha...@apache.org
Subject [1/3] git commit: Create SelectFieldByName UDF
Date Mon, 03 Nov 2014 22:13:49 GMT
Repository: incubator-datafu
Updated Branches:
  refs/heads/master 639268a68 -> 83e1f5411


Create SelectFieldByName UDF

Given a field who's value contains a field name, and *, returns the value of the field referenced
by the field name.

https://issues.apache.org/jira/browse/DATAFU-69

Signed-off-by: Matt Hayes <mhayes@linkedin.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/3b6554aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/3b6554aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/3b6554aa

Branch: refs/heads/master
Commit: 3b6554aa24634c7dd635900321f3f85db663db3e
Parents: b7bef9c
Author: Russell Jurney <russell.jurney@gmail.com>
Authored: Mon Nov 3 14:09:59 2014 -0800
Committer: Matt Hayes <mhayes@linkedin.com>
Committed: Mon Nov 3 14:09:59 2014 -0800

----------------------------------------------------------------------
 .../pig/util/SelectStringFieldByName.java       | 75 +++++++++++++++++++
 .../pig/util/SelectStringFieldByNameTest.java   | 77 ++++++++++++++++++++
 2 files changed, 152 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/3b6554aa/datafu-pig/src/main/java/datafu/pig/util/SelectStringFieldByName.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/util/SelectStringFieldByName.java b/datafu-pig/src/main/java/datafu/pig/util/SelectStringFieldByName.java
new file mode 100644
index 0000000..bb3e1c9
--- /dev/null
+++ b/datafu-pig/src/main/java/datafu/pig/util/SelectStringFieldByName.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package datafu.pig.util;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.*;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+
+/**
+ * Selects the value for a field within a tuple using that field's name.
+ *
+ * <p>
+ * Example:
+ * <pre>
+ * {@code
+ * define SelectStringFieldByName datafu.pig.util.SelectStringFieldByName();
+ *
+ * -- input:
+ * -- ("text1", "hi", "how", "are")
+ * input = LOAD 'input' AS (fieldName:chararray, text1:chararray, text2:chararray, text3:chararray);
+ *
+ * -- output:
+ * -- ("hi")
+ * outfoo = FOREACH input GENERATE SelectStringFieldByName(fieldName, *) as value;
+ * }
+ * </pre>
+ */
+
+public class SelectStringFieldByName extends EvalFunc<String>
+{
+    @Override
+    public String exec(Tuple input) throws IOException
+    {
+        if(input.size() < 2) {
+            throw new IllegalArgumentException("Less then two arguments!");
+        }
+
+        String fieldNameToReturn = input.get(0).toString();
+        if(fieldNameToReturn == null || fieldNameToReturn == "") {
+            return null;
+        }
+
+        Schema inputSchema = getInputSchema();
+        String matchField = null;
+        for(int i=1; i < input.size(); i++)
+        {
+            Schema.FieldSchema fieldSchema = inputSchema.getField(i);
+            if(fieldSchema.alias.equals(fieldNameToReturn)) {
+                matchField = (String)input.get(i);
+                break;
+            }
+        }
+        return matchField;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/3b6554aa/datafu-pig/src/test/java/datafu/test/pig/util/SelectStringFieldByNameTest.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/test/java/datafu/test/pig/util/SelectStringFieldByNameTest.java
b/datafu-pig/src/test/java/datafu/test/pig/util/SelectStringFieldByNameTest.java
new file mode 100644
index 0000000..716d3a2
--- /dev/null
+++ b/datafu-pig/src/test/java/datafu/test/pig/util/SelectStringFieldByNameTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package datafu.test.pig.util;
+
+import java.util.List;
+
+import junit.framework.Assert;
+
+import org.adrianwalker.multilinestring.Multiline;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.pigunit.PigTest;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.testng.annotations.Test;
+
+import datafu.test.pig.PigTests;
+
+public class SelectStringFieldByNameTest extends PigTests
+{
+    /**
+
+     define SelectStringFieldByName datafu.pig.util.SelectStringFieldByName();
+
+     data = LOAD 'input' using PigStorage(',') AS (fieldName:chararray, text1:chararray,
text2:chararray, text3:chararray);
+
+     data2 = FOREACH data GENERATE SelectStringFieldByName(fieldName,*) as result;
+
+     describe data2;
+
+     data3 = FOREACH data2 GENERATE result;
+
+     STORE data3 INTO 'output';
+     */
+    @Multiline private static String chooseFieldByValueTest;
+
+    @Test
+    public void chooseFieldByValueTest() throws Exception
+    {
+        PigTest test = createPigTestFromString(chooseFieldByValueTest);
+
+        writeLinesToFile("input",
+                "text1,hi,how,are",
+                "text2,you,sir,today",
+                "text3,bob,is,a",
+                "text1,friend,of,mine",
+                "text2,and,I,say",
+                "text3,he,is,nice.");
+
+        //test.runScript();
+
+        assertOutput(test, "data3",
+                "(hi)",
+                "(sir)",
+                "(a)",
+                "(friend)",
+                "(I)",
+                "(nice.)");
+    }
+}


Mime
View raw message