http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/stats/package-info.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/stats/package-info.java b/src/java/datafu/pig/stats/package-info.java
deleted file mode 100644
index 4c100e7..0000000
--- a/src/java/datafu/pig/stats/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * Statistics UDFs for computing median, quantiles, variance, confidence intervals, etc.
- */
-package datafu.pig.stats;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/CachedFile.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/CachedFile.java b/src/java/datafu/pig/text/opennlp/CachedFile.java
deleted file mode 100644
index 5832c81..0000000
--- a/src/java/datafu/pig/text/opennlp/CachedFile.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-public class CachedFile {
-
- public static String getFileName(String modelLink, String modelFile) throws IOException {
- // if the symlink exists, use it, if not, use the raw name if it exists
- // note: this is to help with testing, as it seems distributed cache doesn't work with PigUnit
- String loadFile = modelFile;
- if (!new File(loadFile).exists()) {
- if (new File(modelLink).exists()) {
- loadFile = modelLink;
- } else {
- throw new IOException(String.format("Could not load model, neither symlink %s nor file %s exist", modelFile, modelLink));
- }
- }
- return loadFile;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/POSTag.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/POSTag.java b/src/java/datafu/pig/text/opennlp/POSTag.java
deleted file mode 100644
index fb17c63..0000000
--- a/src/java/datafu/pig/text/opennlp/POSTag.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSTaggerME;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.*;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * The OpenNLP POSTag UDF tags bags of sequential words with parts of speech and confidence levels using the OpenNLP
- * toolset, and specifically the POSTaggerME class.
- * <p>
- * Example:
- * <pre>
- * {@code
- * define TokenizeME datafu.pig.text.opennlp.TokenizeME('data/en-token.bin');
- * define POSTag datafu.pig.text.opennlp.POSTag('data/en-pos-maxent.bin');
- *
- * -- input:
- * -- (Appetizers during happy hour range from low to high.)
- * input = LOAD 'input' AS (text:chararray);
- * --
- * -- ({(Appetizers),(during),(happy),(hour),(range),(from),(low),(to),(high),(.)})
- * tokenized = FOREACH input GENERATE TokenizeME(text) AS tokens;
- * --
- * -- output:
- * -- Tuple schema is: (word, tag, confidence)
- * -- ({(Appetizers,NNP,0.3619277937390988),(during,IN,0.7945543860326094),(happy,JJ,0.9888504792754391),
- * -- (hour,NN,0.9427455123502427),(range,NN,0.7335527963654751),(from,IN,0.9911576465589752),(low,JJ,0.9652034031895174),
- * -- (to,IN,0.7005347487371849),(high,JJ,0.8227771746247106),(.,.,0.9900983495480891)})
- * output = FOREACH tokenized GENERATE POSTag(tokens) AS tagged;
- * }
- * </pre>
- */
-public class POSTag extends EvalFunc<DataBag>
-{
- private POSTaggerME tagger = null;
- private static final String MODEL_FILE = "pos";
- private TupleFactory tf = TupleFactory.getInstance();
- private BagFactory bf = BagFactory.getInstance();
- private String modelPath;
-
- public POSTag(String modelPath) {
- this.modelPath = modelPath;
- }
-
- @Override
- public List<String> getCacheFiles() {
- List<String> list = new ArrayList<String>(1);
- list.add(this.modelPath + "#" + MODEL_FILE);
- return list;
- }
-
- // Enable multiple languages by specifying the model path. See http://text.sourceforge.net/models-1.5/
- public DataBag exec(Tuple input) throws IOException
- {
- DataBag inputBag = null;
-
- if(input.size() != 1) {
- throw new IOException();
- }
-
- inputBag = (DataBag)input.get(0);
- DataBag outBag = bf.newDefaultBag();
- if(this.tagger == null) {
- String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
- InputStream modelIn = new FileInputStream(loadFile);
- InputStream buffer = new BufferedInputStream(modelIn);
- POSModel model = new POSModel(buffer);
- this.tagger = new POSTaggerME(model);
- }
-
- // Form an inputString array thing for tagger to act on
- int bagLength = (int)inputBag.size();
- String[] words = new String[bagLength];
-
- Iterator<Tuple> itr = inputBag.iterator();
- int i = 0;
- while(itr.hasNext()) {
- words[i] = (String)itr.next().get(0);
- i++;
- }
-
- // Compute tags and their probabilities
- String tags[] = this.tagger.tag(words);
- double probs[] = this.tagger.probs();
-
- // Build output bag of 3-tuples
- for(int j = 0; j < tags.length; j++) {
- Tuple newTuple = tf.newTuple(3);
- newTuple.set(0, words[j]);
- newTuple.set(1, tags[j]);
- newTuple.set(2, probs[j]);
- outBag.add(newTuple);
- }
-
- return outBag;
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.BAG)
- {
- throw new RuntimeException("Expected a BAG as input");
- }
-
- Schema inputBagSchema = inputFieldSchema.schema;
-
- if(inputBagSchema == null) {
- return null;
- }
-
- if (inputBagSchema.getField(0).type != DataType.TUPLE)
- {
- throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
- DataType.findTypeName(inputBagSchema.getField(0).type)));
- }
-
- Schema inputTupleSchema = inputBagSchema.getField(0).schema;
-
- if (inputTupleSchema.size() != 1)
- {
- throw new RuntimeException("Expected one field for the token data");
- }
-
- if (inputTupleSchema.getField(0).type != DataType.CHARARRAY)
- {
- throw new RuntimeException(String.format("Expected source to be a CHARARRAY, but instead found %s",
- DataType.findTypeName(inputTupleSchema.getField(0).type)));
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));
- tupleSchema.add(new Schema.FieldSchema("tag",DataType.CHARARRAY));
- tupleSchema.add(new Schema.FieldSchema("probability",DataType.DOUBLE));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/SentenceDetect.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/SentenceDetect.java b/src/java/datafu/pig/text/opennlp/SentenceDetect.java
deleted file mode 100644
index 50537fd..0000000
--- a/src/java/datafu/pig/text/opennlp/SentenceDetect.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.*;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * The OpenNLP SentenceDectectors segment an input paragraph into sentences.
- * <p>
- * Example:
- * <pre>
- * {@code
- * define SentenceDetect datafu.pig.text.SentenceDetect('data/en-sent.bin');
- *
- * -- input:
- * -- ("I believe the Masons have infiltrated the Apache PMC. I believe laser beams control cat brains.")
- * infoo = LOAD 'input' AS (text:chararray);
-
- * -- output:
- * -- ({(I believe the Masons have infiltrated the Apache PMC.)(I believe laser beams control cat brains.)})
- * outfoo = FOREACH input GENERATE SentenceDetect(text) as sentences;
- * }
- * </pre>
- */
-public class SentenceDetect extends EvalFunc<DataBag>
-{
- private SentenceDetectorME sdetector = null;
- private static final String MODEL_FILE = "sentences";
- private TupleFactory tf = TupleFactory.getInstance();
- private BagFactory bf = BagFactory.getInstance();
- private String modelPath = null;
-
- public SentenceDetect(String modelPath) {
- this.modelPath = modelPath;
- }
-
- @Override
- public List<String> getCacheFiles() {
- List<String> list = new ArrayList<String>(1);
- list.add(this.modelPath + "#" + MODEL_FILE);
- return list;
- }
-
- // Enable multiple languages by specifying the model path. See http://text.sourceforge.net/models-1.5/
- public DataBag exec(Tuple input) throws IOException
- {
- if(input.size() != 1) {
- throw new IOException();
- }
-
- String inputString = input.get(0).toString();
- if(inputString == null || inputString == "") {
- return null;
- }
- DataBag outBag = bf.newDefaultBag();
- if(sdetector == null) {
- String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);
- InputStream is = new FileInputStream(modelPath);
- InputStream buffer = new BufferedInputStream(is);
- SentenceModel model = new SentenceModel(buffer);
- this.sdetector = new SentenceDetectorME(model);
- }
- String sentences[] = this.sdetector.sentDetect(inputString);
- for(String sentence : sentences) {
- Tuple outTuple = tf.newTuple(sentence);
- outBag.add(outTuple);
- }
- return outBag;
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.CHARARRAY)
- {
- throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("sentence",DataType.CHARARRAY));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/TokenizeME.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/TokenizeME.java b/src/java/datafu/pig/text/opennlp/TokenizeME.java
deleted file mode 100644
index f1f4257..0000000
--- a/src/java/datafu/pig/text/opennlp/TokenizeME.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.List;
-
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.*;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * The OpenNLP Tokenizers segment an input character sequence into tokens using the OpenNLP TokenizeME class, which is
- * a probabilistic, 'maximum entropy' classifier.
- * <p>
- * Example:
- * <pre>
- * {@code
- * define TokenizeME datafu.pig.text.opennlp.TokenizeME('data/en-token.bin');
- *
- * -- input:
- * -- ("I believe the Masons have infiltrated the Apache PMC.")
- * infoo = LOAD 'input' AS (text:chararray);
-
- * -- output:
- * -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeME(text) as tokens;
- * }
- * </pre>
- */
-
-
-
-public class TokenizeME extends EvalFunc<DataBag>
-{
- private TokenizerME tokenizer = null;
- private static final String MODEL_FILE = "tokens";
- private TupleFactory tf = TupleFactory.getInstance();
- private BagFactory bf = BagFactory.getInstance();
- private String modelPath;
-
- public TokenizeME(String modelPath) {
- this.modelPath = modelPath;
- }
-
- @Override
- public List<String> getCacheFiles() {
- List<String> list = new ArrayList<String>(1);
- list.add(this.modelPath + "#" + MODEL_FILE);
- return list;
- }
-
- // Enable multiple languages by specifying the model path. See http://text.sourceforge.net/models-1.5/
- public DataBag exec(Tuple input) throws IOException
- {
- if(input.size() != 1) {
- throw new IOException();
- }
-
- String inputString = input.get(0).toString();
- if(inputString == null || inputString == "") {
- return null;
- }
- DataBag outBag = bf.newDefaultBag();
- if(this.tokenizer == null) {
- String loadFile = CachedFile.getFileName(MODEL_FILE, this.modelPath);;
- InputStream file = new FileInputStream(loadFile);
- InputStream buffer = new BufferedInputStream(file);
- TokenizerModel model = new TokenizerModel(buffer);
- this.tokenizer = new TokenizerME(model);
- }
- String tokens[] = this.tokenizer.tokenize(inputString);
- for(String token : tokens) {
- Tuple outTuple = tf.newTuple(token);
- outBag.add(outTuple);
- }
- return outBag;
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.CHARARRAY)
- {
- throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/TokenizeSimple.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/TokenizeSimple.java b/src/java/datafu/pig/text/opennlp/TokenizeSimple.java
deleted file mode 100644
index cea48b4..0000000
--- a/src/java/datafu/pig/text/opennlp/TokenizeSimple.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import opennlp.tools.tokenize.SimpleTokenizer;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.*;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * The OpenNLP Tokenizers segment an input character sequence into tokens. This one uses the OpenNLP class SimpleTokenizer
- * <p>
- * Example:
- * <pre>
- * {@code
- * define TokenizeSimple datafu.pig.text.opennlp.TokenizeSimple();
- *
- * -- input:
- * -- ("I believe the Masons have infiltrated the Apache PMC.")
- * infoo = LOAD 'input' AS (text:chararray);
-
- * -- output:
- * -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeSimple(text) as tokens;
- * }
- * </pre>
- */
-public class TokenizeSimple extends EvalFunc<DataBag>
-{
- private SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
- private TupleFactory tf = TupleFactory.getInstance();
- private BagFactory bf = BagFactory.getInstance();
-
- public DataBag exec(Tuple input) throws IOException
- {
- if(input.size() != 1) {
- throw new IOException();
- }
-
- String inputString = input.get(0).toString();
- if(inputString == null || inputString == "") {
- return null;
- }
-
- DataBag outBag = bf.newDefaultBag();
- String tokens[] = tokenizer.tokenize(inputString);
- for(String token : tokens) {
- Tuple outTuple = tf.newTuple(token);
- outBag.add(outTuple);
- }
- return outBag;
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.CHARARRAY)
- {
- throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/text/opennlp/TokenizeWhitespace.java b/src/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
deleted file mode 100644
index 8efafb0..0000000
--- a/src/java/datafu/pig/text/opennlp/TokenizeWhitespace.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.pig.text.opennlp;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.*;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * The OpenNLP Tokenizers segment an input character sequence into tokens. This one uses the OpenNLP class
- * WhitespaceTokenizer.
- * <p>
- * Example:
- * <pre>
- * {@code
- * define TokenizeWhitespace datafu.pig.text.opennlp.TokenizeWhitespace();
- *
- * -- input:
- * -- ("I believe the Masons have infiltrated the Apache PMC.")
- * infoo = LOAD 'input' AS (text:chararray);
-
- * -- output:
- * -- ({(I),(believe),(the),(Masons),(have),(infiltrated),(the),(Apache),(PMC),(.)})
- * outfoo = FOREACH input GENERATE TokenizeWhitespace(text) as tokens;
- * }
- * </pre>
- */
-public class TokenizeWhitespace extends EvalFunc<DataBag>
-{
- private WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
- private TupleFactory tf = TupleFactory.getInstance();
- private BagFactory bf = BagFactory.getInstance();
-
- public DataBag exec(Tuple input) throws IOException
- {
- if(input.size() != 1) {
- throw new IOException();
- }
-
- String inputString = input.get(0).toString();
- if(inputString == null || inputString == "") {
- return null;
- }
-
- DataBag outBag = bf.newDefaultBag();
- String tokens[] = tokenizer.tokenize(inputString);
- for(String token : tokens) {
- Tuple outTuple = tf.newTuple(token);
- outBag.add(outTuple);
- }
- return outBag;
- }
-
- @Override
- public Schema outputSchema(Schema input)
- {
- try
- {
- Schema.FieldSchema inputFieldSchema = input.getField(0);
-
- if (inputFieldSchema.type != DataType.CHARARRAY)
- {
- throw new RuntimeException("Expected a CHARARRAY as input, but got a " + inputFieldSchema.toString());
- }
-
- Schema tupleSchema = new Schema();
- tupleSchema.add(new Schema.FieldSchema("token",DataType.CHARARRAY));
-
- return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
- .getName()
- .toLowerCase(), input),
- tupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e)
- {
- throw new RuntimeException(e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/urls/UserAgentClassify.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/urls/UserAgentClassify.java b/src/java/datafu/pig/urls/UserAgentClassify.java
deleted file mode 100644
index 4040fea..0000000
--- a/src/java/datafu/pig/urls/UserAgentClassify.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.urls;
-
-import datafu.pig.util.SimpleEvalFunc;
-
-/**
- * Given a user agent string, this UDF classifies clients to 'mobile' and 'desktop'.
- * Current as of June 2011.
- */
-public class UserAgentClassify extends SimpleEvalFunc<String>
-{
-
- public String call(String useragent)
- {
- if (useragent.length() < 4)
- return "desktop"; //
- String ua=useragent.toLowerCase();
- if(ua.matches(".*(android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino).*")||ua.substring(0,4).matches("1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kp
t |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte
\\-"))
- return "mobile";
- else
- return "desktop";
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/urls/package-info.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/urls/package-info.java b/src/java/datafu/pig/urls/package-info.java
deleted file mode 100644
index 332beda..0000000
--- a/src/java/datafu/pig/urls/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * UDFs for processing URLs.
- */
-package datafu.pig.urls;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/AliasableEvalFunc.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/AliasableEvalFunc.java b/src/java/datafu/pig/util/AliasableEvalFunc.java
deleted file mode 100644
index ee2c3f3..0000000
--- a/src/java/datafu/pig/util/AliasableEvalFunc.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-/**
- * Makes implementing and using UDFs easier by enabling named parameters.
- *
- * <p>
- * This works by capturing the schema of the input tuple on the front-end and storing it into the UDFContext.
- * It provides an easy means of referencing the parameters on the back-end to aid in writing schema-based UDFs.
- * </p>
- *
- * <p>
- * A related class is {@link SimpleEvalFunc}. However they are actually fairly different. The primary purpose of {@link SimpleEvalFunc} is
- * to skip the boilerplate under the assumption that the arguments in and out are well... simple.
- * It also assumes that these arguments are in a well-defined positional ordering.
- * </p>
- *
- * <p>
- * AliasableEvalFunc allows the UDF writer to avoid dealing with all positional assumptions and instead reference fields
- * by their aliases. This practice allows for more readable code since the alias names should have more meaning
- * to the reader than the position. This approach is also less error prone since it creates a more explicit contract
- * for what input the UDF expects and prevents simple mistakes that positional-based UDFs could not easily catch,
- * such as transposing two fields of the same type. If this contract is violated, say, by attempting to reference
- * a field that is not present, a meaningful error message may be thrown.
- * </p>
- *
- * <p>
- * Example: This example computes the monthly payments for mortgages depending on interest rate.
- * <pre>
- * {@code
- * public class MortgagePayment extends AliasableEvalFunc<DataBag> {
- * ...
- * public DataBag exec(Tuple input) throws IOException {
- * DataBag output = BagFactory.getInstance().newDefaultBag();
- *
- * Double principal = getDouble(input, "principal"); // get a value from the input tuple by alias
- * Integer numPayments = getInteger(input, "num_payments");
- * DataBag interestRates = getBag(input, "interest_rates");
- *
- * for (Tuple interestTuple : interestRates) {
- * Double interest = getDouble(interestTuple, getPrefixedAliasName("interest_rates", "interest_rate")); // get a value from the inner bag tuple by alias
- * double monthlyPayment = computeMonthlyPayment(principal, numPayments, interest);
- * output.add(TupleFactory.getInstance().newTuple(monthlyPayment));
- * }
- * return output;
- * }
- * }
- * }
- * </pre>
- * </p>
- *
- * @author wvaughan
- *
- * @param <T>
- */
-public abstract class AliasableEvalFunc<T> extends ContextualEvalFunc<T>
-{
- private static final String ALIAS_MAP_PROPERTY = "aliasMap";
-
- private Map<String, Integer> aliasToPosition = null;
-
- public AliasableEvalFunc() {
-
- }
-
- /**
- * A wrapper method which captures the schema and then calls getOutputSchema
- */
- @Override
- public Schema outputSchema(Schema input) {
- storeFieldAliases(input);
- return getOutputSchema(input);
- }
-
- /**
- * Specify the output schema as in {link EvalFunc#outputSchema(Schema)}.
- *
- * @param input
- * @return outputSchema
- */
- public abstract Schema getOutputSchema(Schema input);
-
- @SuppressWarnings("unchecked")
- private Map<String, Integer> getAliasMap() {
- return (Map<String, Integer>)getInstanceProperties().get(ALIAS_MAP_PROPERTY);
- }
-
- private void setAliasMap(Map<String, Integer> aliases) {
- getInstanceProperties().put(ALIAS_MAP_PROPERTY, aliases);
- }
-
- private void storeFieldAliases(Schema tupleSchema)
- {
- Map<String, Integer> aliases = new HashMap<String, Integer>();
- constructFieldAliases(aliases, tupleSchema, null);
- log.debug("In instance: "+getInstanceName()+", stored alias map: " + aliases);
-
- // pass the input schema into the exec function
- setAliasMap(aliases);
- }
-
- private void constructFieldAliases(Map<String, Integer> aliases, Schema tupleSchema, String prefix)
- {
- int position = 0;
- for (Schema.FieldSchema field : tupleSchema.getFields()) {
- String alias = getPrefixedAliasName(prefix, field.alias);
- if (field.alias != null && !field.alias.equals("null")) {
- aliases.put(alias, position);
- log.debug("In instance: "+getInstanceName()+", stored alias " + alias + " as position " + position);
- }
- if (field.schema != null) {
- constructFieldAliases(aliases, field.schema, alias);
- }
- position++;
- }
- }
-
- public String getPrefixedAliasName(String prefix, String alias)
- {
- if (alias == null || alias.equals("null")) {
- if (prefix == null) return "";
- else return prefix; // ignore the null inner bags/tuples
- }
- else return ((prefix == null || prefix.equals("null") || prefix.trim().equals("")) ? "" : prefix+".") + alias; // handle top bag
- }
-
- /**
- * Field aliases are generated from the input schema<br/>
- * Each alias maps to a bag position<br/>
- * Inner bags/tuples will have alias of outer.inner.foo
- *
- * @return A map of field alias to field position
- */
- public Map<String, Integer> getFieldAliases()
- {
- Map<String, Integer> aliases = getAliasMap();
- if (aliases == null) {
- log.error("Class: " + this.getClass());
- log.error("Instance name: " + this.getInstanceName());
- log.error("Properties: " + getContextProperties());
- throw new RuntimeException("Could not retrieve aliases from properties using " + ALIAS_MAP_PROPERTY);
- }
- return aliases;
- }
-
- public Integer getPosition(String alias) {
- if (aliasToPosition == null) {
- aliasToPosition = getFieldAliases();
- }
- return aliasToPosition.get(alias);
- }
-
- public Integer getPosition(String prefix, String alias) {
- return getPosition(getPrefixedAliasName(prefix, alias));
- }
-
- public Integer getInteger(Tuple tuple, String alias) throws ExecException {
- return getInteger(tuple, alias, null);
- }
-
- public Integer getInteger(Tuple tuple, String alias, Integer defaultValue) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- Number number = (Number)tuple.get(i);
- if (number == null) return defaultValue;
- return number.intValue();
- }
-
- public Long getLong(Tuple tuple, String alias) throws ExecException {
- return getLong(tuple, alias, null);
- }
-
- public Long getLong(Tuple tuple, String alias, Long defaultValue) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- Number number = (Number)tuple.get(i);
- if (number == null) return defaultValue;
- return number.longValue();
- }
-
- public Float getFloat(Tuple tuple, String alias) throws ExecException {
- return getFloat(tuple, alias, null);
- }
-
- public Float getFloat(Tuple tuple, String alias, Float defaultValue) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- Number number = (Number)tuple.get(i);
- if (number == null) return defaultValue;
- return number.floatValue();
- }
-
- public Double getDouble(Tuple tuple, String alias) throws ExecException {
- return getDouble(tuple, alias, null);
- }
-
- public Double getDouble(Tuple tuple, String alias, Double defaultValue) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- Number number = (Number)tuple.get(i);
- if (number == null) return defaultValue;
- return number.doubleValue();
- }
-
- public String getString(Tuple tuple, String alias) throws ExecException {
- return getString(tuple, alias, null);
- }
-
- public String getString(Tuple tuple, String alias, String defaultValue) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- String s = (String)tuple.get(i);
- if (s == null) return defaultValue;
- return s;
- }
-
- public Boolean getBoolean(Tuple tuple, String alias) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- return (Boolean)tuple.get(i);
- }
-
- public DataBag getBag(Tuple tuple, String alias) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- return (DataBag)tuple.get(i);
- }
-
- public Object getObject(Tuple tuple, String alias) throws ExecException {
- Integer i = getPosition(alias);
- if (i == null) throw new FieldNotFound("Attempt to reference unknown alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- if (i >= tuple.size()) throw new FieldNotFound("Attempt to reference outside of tuple for alias: "+alias+"\n Instance Properties: "+getInstanceProperties());
- return tuple.get(i);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/Assert.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/Assert.java b/src/java/datafu/pig/util/Assert.java
deleted file mode 100644
index a258915..0000000
--- a/src/java/datafu/pig/util/Assert.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-/**
- * Assert has been renamed to AssertUDF.
- *
- * This class is provided for backward compatibility.
- *
- * @deprecated Use {@link AssertUDF} instead.
- */
- @Deprecated
-public class Assert extends AssertUDF
-{
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/AssertUDF.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/AssertUDF.java b/src/java/datafu/pig/util/AssertUDF.java
deleted file mode 100644
index 16f9247..0000000
--- a/src/java/datafu/pig/util/AssertUDF.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.io.IOException;
-
-import org.apache.pig.FilterFunc;
-import org.apache.pig.data.Tuple;
-
-/**
- * Filter function which asserts that a value is true.
- *
- * <p>
- * Unfortunately, the Pig interpreter doesn't recognize boolean expressions nested as function
- * arguments, so this uses C-style booleans. That is, the first argument should be
- * an integer. 0 is interpreted as "false", and anything else is considered "true".
- * The function will cause the Pig script to fail if a "false" value is encountered.
- * </p>
- *
- * <p>
- * There is a unary and a binary version. The unary version just takes a boolean, and throws out a generic exception message when the
- * assertion is violated. The binary version takes a String as a second argument and throws that out when the assertion
- * is violated.
- * </p>
- *
- * <p>
- * Example:
- * <pre>
- * {@code
- * FILTER members BY AssertUDF( (member_id >= 0 ? 1 : 0), 'Doh! Some member ID is negative.' );
- * }
- * </pre>
- * </p>
- */
-public class AssertUDF extends FilterFunc
-{
- @Override
- public Boolean exec(Tuple tuple)
- throws IOException
- {
- if ((Integer) tuple.get(0) == 0) {
- if (tuple.size() > 1) {
- throw new IOException("Assertion violated: " + tuple.get(1).toString());
- }
- else {
- throw new IOException("Assertion violated. What assertion, I do not know, but it was officially violated.");
- }
- }
- else {
- return true;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/BoolToInt.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/BoolToInt.java b/src/java/datafu/pig/util/BoolToInt.java
deleted file mode 100644
index 5ec8d3a..0000000
--- a/src/java/datafu/pig/util/BoolToInt.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-/**
- * UDF which converts a Boolean to an Integer.
- */
-public class BoolToInt extends SimpleEvalFunc<Integer>
-{
- public Integer call(Boolean val)
- {
- return (val == null || !val) ? 0 : 1;
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/Coalesce.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/Coalesce.java b/src/java/datafu/pig/util/Coalesce.java
deleted file mode 100644
index f8e25f4..0000000
--- a/src/java/datafu/pig/util/Coalesce.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.io.IOException;
-
-import org.apache.pig.data.DataType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
-
-/**
- * Returns the first non-null value from a tuple, just like {@link <a href="http://msdn.microsoft.com/en-us/library/ms190349.aspx" target="_blank">COALESCE</a>} in SQL.
- *
- * <p>
- * Example:
- * <pre>
- * {@code
- *
- * define COALESCE datafu.pig.util.COALESCE();
-
- * -- input: 1,2,3,NULL,4,NULL,5
- * input = LOAD 'input' AS (val:int);
- *
- * -- produces: 1,2,3,99,4,99,5
- * coalesced = FOREACH input GENERATE COALESCE(val,99);
- *
- * }
- * </pre>
- * </p>
- *
- * @author "Matthew Hayes <mhayes@linkedin.com>"
- *
- */
-public class Coalesce extends AliasableEvalFunc<Object>
-{
- private boolean strict;
-
- private static String STRICT_OPTION = "strict";
- private static String LAZY_OPTION = "lazy";
-
- public Coalesce()
- {
- strict = true;
- }
-
- public Coalesce(String option)
- {
- if (option.equals(STRICT_OPTION))
- {
- strict = true;
- }
- else if (option.equals(LAZY_OPTION))
- {
- strict = false;
- }
- else
- {
- throw new IllegalArgumentException("Unexpected option: " + option);
- }
- }
-
- @Override
- public Object exec(Tuple input) throws IOException
- {
- if (input == null || input.size() == 0)
- {
- return null;
- }
-
- Byte type = (Byte)getInstanceProperties().get("type");
-
- for (Object o : input)
- {
- if (o != null)
- {
- if (strict)
- {
- return o;
- }
- else
- {
- try
- {
- switch (type)
- {
- case DataType.INTEGER:
- return DataType.toInteger(o);
- case DataType.LONG:
- return DataType.toLong(o);
- case DataType.DOUBLE:
- return DataType.toDouble(o);
- case DataType.FLOAT:
- return DataType.toFloat(o);
- default:
- return o;
- }
- }
- catch (Exception e)
- {
- DataFuException dfe = new DataFuException(e.getMessage(),e);
- dfe.setData(o);
- dfe.setFieldAliases(getFieldAliases());
- throw dfe;
- }
- }
- }
- }
-
- return null;
- }
-
- @Override
- public Schema getOutputSchema(Schema input)
- {
- if (input.getFields().size() == 0)
- {
- throw new RuntimeException("Expected at least one parameter");
- }
-
- Byte outputType = null;
- int pos = 0;
- for (FieldSchema field : input.getFields())
- {
- if (DataType.isSchemaType(field.type))
- {
- throw new RuntimeException(String.format("Not supported on schema types. Found %s in position %d.",DataType.findTypeName(field.type),pos));
- }
-
- if (DataType.isComplex(field.type))
- {
- throw new RuntimeException(String.format("Not supported on complex types. Found %s in position %d.",DataType.findTypeName(field.type),pos));
- }
-
- if (!DataType.isUsableType(field.type))
- {
- throw new RuntimeException(String.format("Not a usable type. Found %s in position %d.",DataType.findTypeName(field.type),pos));
- }
-
- if (outputType == null)
- {
- outputType = field.type;
- }
- else if (!outputType.equals(field.type))
- {
- if (strict)
- {
- throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d. First element has type '%s'. "
- + "If you'd like to attempt merging types, use the '%s' option, as '%s' is the default.",
- DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType),LAZY_OPTION,STRICT_OPTION));
- }
- else
- {
- byte merged = DataType.mergeType(outputType, field.type);
- if (merged == DataType.ERROR)
- {
- throw new RuntimeException(String.format("Expected all types to be equal, but found '%s' in position %d, where output type is '%s', and types could not be merged.",
- DataType.findTypeName(field.type),pos,DataType.findTypeName((byte)outputType)));
- }
- outputType = merged;
- }
- }
-
- pos++;
- }
-
- getInstanceProperties().put("type", outputType);
-
- return new Schema(new Schema.FieldSchema("item",outputType));
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/ContextualEvalFunc.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/ContextualEvalFunc.java b/src/java/datafu/pig/util/ContextualEvalFunc.java
deleted file mode 100644
index c534b77..0000000
--- a/src/java/datafu/pig/util/ContextualEvalFunc.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.util.Properties;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.impl.util.UDFContext;
-
-/**
- * An abstract class which enables UDFs to store instance properties
- * on the front end which will be available on the back end.
- * For example, properties may be set in the call to outputSchema(),
- * which will be available when exec() is called.
- *
- * @param <T>
- */
-public abstract class ContextualEvalFunc<T> extends EvalFunc<T>
-{
- private String instanceName;
-
- @Override
- public void setUDFContextSignature(String signature) {
- setInstanceName(signature);
- }
-
- /**
- * Helper method to return the context properties for this class
- *
- * @return context properties
- */
- protected Properties getContextProperties() {
- UDFContext context = UDFContext.getUDFContext();
- Properties properties = context.getUDFProperties(this.getClass());
- return properties;
- }
-
- /**
- * Helper method to return the context properties for this instance of this class
- *
- * @return instances properties
- */
- protected Properties getInstanceProperties() {
- Properties contextProperties = getContextProperties();
- if (!contextProperties.containsKey(getInstanceName())) {
- contextProperties.put(getInstanceName(), new Properties());
- }
- return (Properties)contextProperties.get(getInstanceName());
- }
-
- /**
- *
- * @return the name of this instance corresponding to the UDF Context Signature
- * @see #setUDFContextSignature(String)
- */
- protected String getInstanceName() {
- if (instanceName == null) {
- throw new RuntimeException("Instance name is null. This should not happen unless UDFContextSignature was not set.");
- }
- return instanceName;
- }
-
- private void setInstanceName(String instanceName) {
- this.instanceName = instanceName;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/DataFuException.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/DataFuException.java b/src/java/datafu/pig/util/DataFuException.java
deleted file mode 100644
index 0066aa8..0000000
--- a/src/java/datafu/pig/util/DataFuException.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.util.Map;
-
-public class DataFuException extends RuntimeException
-{
- private static final long serialVersionUID = 1L;
- private Map<String, Integer> fieldAliases;
- private Object data;
-
- public DataFuException()
- {
- super();
- }
-
- public DataFuException(String message)
- {
- super(message);
- }
-
- public DataFuException(String message, Throwable cause)
- {
- super(message, cause);
- }
-
- public DataFuException(Throwable cause)
- {
- super(cause);
- }
-
- /**
- * Gets field aliases for a UDF which may be relevant to this exception.
- *
- * @return field aliases
- */
- public Map<String, Integer> getFieldAliases()
- {
- return fieldAliases;
- }
-
- /**
- * Gets data relevant to this exception.
- *
- * @return data
- */
- public Object getData()
- {
- return data;
- }
-
- /**
- * Sets field aliases for a UDF which may be relevant to this exception.
- *
- * @param fieldAliases
- */
- public void setFieldAliases(Map<String, Integer> fieldAliases)
- {
- this.fieldAliases = fieldAliases;
- }
-
- /**
- * Sets data relevant to this exception.
- * @param data
- */
- public void setData(Object data)
- {
- this.data = data;
- }
-
- @Override
- public String toString()
- {
- String s = getClass().getName();
- String message = getLocalizedMessage();
-
- StringBuilder result = new StringBuilder(s);
-
- if (message != null)
- {
- result.append(": ");
- result.append(message);
- }
-
- if (getFieldAliases() != null)
- {
- result.append("\nAliases:");
- for (String alias : getFieldAliases().keySet())
- {
- result.append("\n");
- result.append(alias != null && alias.length() > 0 ? alias : "???");
- }
- }
-
- if (getData() != null)
- {
- result.append("\nData:");
- result.append("\n");
- result.append(data.toString());
- }
-
- return result.toString();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/FieldNotFound.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/FieldNotFound.java b/src/java/datafu/pig/util/FieldNotFound.java
deleted file mode 100644
index d624007..0000000
--- a/src/java/datafu/pig/util/FieldNotFound.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import org.apache.pig.backend.executionengine.ExecException;
-
-/**
- * Thrown by {see AliasableEvalFunc} when attempting to access an unknown field by name.
- *
- * @author wvaughan
- *
- */
-public class FieldNotFound extends ExecException
-{
- private static final long serialVersionUID = 1L;
-
- public FieldNotFound() {
- super();
- }
-
- public FieldNotFound(String message) {
- super(message);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/In.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/In.java b/src/java/datafu/pig/util/In.java
deleted file mode 100644
index 0667914..0000000
--- a/src/java/datafu/pig/util/In.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-/**
- * In has been renamed to InUDF.
- *
- * This class is provided for backward compatibility.
- *
- * @deprecated Use {@link InUDF} instead.
- */
- @Deprecated
-public class In extends InUDF
-{
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/InUDF.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/InUDF.java b/src/java/datafu/pig/util/InUDF.java
deleted file mode 100644
index 5057285..0000000
--- a/src/java/datafu/pig/util/InUDF.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.io.IOException;
-
-import org.apache.pig.FilterFunc;
-import org.apache.pig.data.Tuple;
-
-/**
- * Similar to the SQL IN function, this function provides a convenient way to filter
- * using a logical disjunction over many values.
- * Returns true when the first value of the tuple is contained within the remainder of the tuple.
- *
- * <p>
- * Example:
- * <pre>
- * {@code
- * define In datafu.pig.util.InUDF();
- * -- cars: (alice, red), (bob, blue), (charlie, green), (dave, red);
- * cars = LOAD cars AS (owner:chararray, color:chararray);
- *
- * -- cars: (alice, red), (bob, blue), (dave, red);
- * red_blue_cars = FILTER cars BY In(color, 'red', 'blue');
- *
- * }</pre>
- * </p>
- *
- * @author wvaughan
- *
- */
-public class InUDF extends FilterFunc
-{
-
- @Override
- public Boolean exec(Tuple input) throws IOException
- {
- Object o = input.get(0);
- Boolean match = false;
- if (o != null) {
- for (int i=1; i<input.size() && !match; i++) {
- match = match || o.equals(input.get(i));
- }
- }
- return match;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/IntToBool.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/IntToBool.java b/src/java/datafu/pig/util/IntToBool.java
deleted file mode 100644
index d00e297..0000000
--- a/src/java/datafu/pig/util/IntToBool.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-/**
- * UDF which converts an Integer to a Boolean.
- */
-public class IntToBool extends SimpleEvalFunc<Boolean>
-{
- public Boolean call(Integer val)
- {
- return (val == null || val == 0) ? false : true;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/SimpleEvalFunc.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/SimpleEvalFunc.java b/src/java/datafu/pig/util/SimpleEvalFunc.java
deleted file mode 100644
index 2d262b4..0000000
--- a/src/java/datafu/pig/util/SimpleEvalFunc.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Type;
-
-import org.apache.pig.EvalFunc;
-import org.apache.pig.data.DataType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-
-/**
- Uses reflection to makes writing simple wrapper Pig UDFs easier.
-
- For example, writing a simple string trimming UDF might look like
- this:
- <pre>
- {@code
- public class TRIM extends EvalFunc<String>
- {
- public String exec(Tuple input) throws IOException
- {
- if (input.size() != 1)
- throw new IllegalArgumentException("requires a parameter");
-
- try {
- Object o = input.get(0);
- if (!(o instanceof String))
- throw new IllegalArgumentException("expected a string");
-
- String str = (String)o;
- return (str == null) ? null : str.trim();
- }
- catch (Exception e) {
- throw WrappedIOException.wrap("error...", e);
- }
- }
- }
- }
- </pre>
- There is a lot of boilerplate to check the number of arguments and
- the parameter types in the tuple.
-
- Instead, with this class, you can derive from SimpleEvalFunc and
- create a <code>call()</code> method (not exec!), just specifying the
- arguments as a regular function. The class handles all the argument
- checking and exception wrapping for you. So your code would be:
- <pre>
- {@code
- public class TRIM2 extends SimpleEvalFunc<String>
- {
- public String call(String s)
- {
- return (s != null) ? s.trim() : null;
- }
- }
- }
- </pre>
-
- An example of this UDF in action with Pig:
- <pre>
- {@code
- grunt> a = load 'test' as (x:chararray, y:chararray); dump a;
- (1 , 2)
-
- grunt> b = foreach a generate TRIM2(x); dump b;
- (1)
-
- grunt> c = foreach a generate TRIM2((int)x); dump c;
- datafu.pig.util.TRIM2(java.lang.String): argument type
- mismatch [#1]; expected java.lang.String, got java.lang.Integer
-
- grunt> d = foreach a generate TRIM2(x, y); dump d;
- datafu.pig.util.TRIM2(java.lang.String): got 2 arguments, expected 1.
- }
- </pre>
-
-*/
-
-public abstract class SimpleEvalFunc<T> extends EvalFunc<T>
-{
- // TODO Add support for other UDF types (e.g., FilterFunc)
- // TODO Algebraic EvalFuncs
-
- Method m = null;
-
- public SimpleEvalFunc()
- {
- for (Method method : this.getClass().getMethods()) {
- if (method.getName() == "call")
- m = method;
- }
- if (m == null)
- throw new IllegalArgumentException(String.format("%s: couldn't find call() method in UDF.", getClass().getName()));
- }
-
- // Pig can't get the return type via reflection (as getReturnType normally tries to do), so give it a hand
- @Override
- public Type getReturnType()
- {
- return m.getReturnType();
- }
-
- private String _method_signature()
- {
- StringBuilder sb = new StringBuilder(getClass().getName());
- Class<?> pvec[] = m.getParameterTypes();
-
- sb.append("(");
- for (int i=0; i < pvec.length; i++) {
- if (i > 0)
- sb.append(", ");
- sb.append(String.format("%s", pvec[i].getName()));
- }
- sb.append(")");
-
- return sb.toString();
- }
-
- @Override
- @SuppressWarnings("unchecked")
- public T exec(Tuple input) throws IOException
- {
- @SuppressWarnings("rawtypes")
- Class pvec[] = m.getParameterTypes();
-
- if (input == null || input.size() == 0)
- return null;
-
- // check right number of arguments
- if (input.size() != pvec.length)
- throw new IOException(String.format("%s: got %d arguments, expected %d.", _method_signature(), input.size(), pvec.length));
-
- // pull and check argument types
- Object[] args = new Object[input.size()];
- for (int i=0; i < pvec.length; i++) {
- Object o = input.get(i);
- try {
- o = pvec[i].cast(o);
- }
- catch (ClassCastException e) {
- throw new IOException(String.format("%s: argument type mismatch [#%d]; expected %s, got %s", _method_signature(), i+1,
- pvec[i].getName(), o.getClass().getName()));
- }
- args[i] = o;
- }
-
- try {
- return (T) m.invoke(this, args);
- }
- catch (Exception e) {
- throw new IOException(String.format("%s: caught exception processing input.", _method_signature()), e);
- }
- }
-
- /**
- * Override outputSchema so we can verify the input schema at pig compile time, instead of runtime
- * @param inputSchema input schema
- * @return call to super.outputSchema in case schema was defined elsewhere
- */
- @Override
- public Schema outputSchema(Schema inputSchema)
- {
- if (inputSchema == null) {
- throw new IllegalArgumentException(String.format("%s: null schema passed to %s", _method_signature(), getClass().getName()));
- }
-
- // check correct number of arguments
- @SuppressWarnings("rawtypes")
- Class parameterTypes[] = m.getParameterTypes();
- if (inputSchema.size() != parameterTypes.length) {
- throw new IllegalArgumentException(String.format("%s: got %d arguments, expected %d.",
- _method_signature(),
- inputSchema.size(),
- parameterTypes.length));
- }
-
- // check type for each argument
- for (int i=0; i < parameterTypes.length; i++) {
- try {
- byte inputType = inputSchema.getField(i).type;
- byte parameterType = DataType.findType(parameterTypes[i]);
- if (inputType != parameterType) {
- throw new IllegalArgumentException(String.format("%s: argument type mismatch [#%d]; expected %s, got %s",
- _method_signature(),
- i+1,
- DataType.findTypeName(parameterType),
- DataType.findTypeName(inputType)));
- }
- }
- catch (FrontendException fe) {
- throw new IllegalArgumentException(String.format("%s: Problem with input schema: ", _method_signature(), inputSchema), fe);
- }
- }
-
- // delegate to super to determine the actual outputSchema (if specified)
- return super.outputSchema(inputSchema);
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/TransposeTupleToBag.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/TransposeTupleToBag.java b/src/java/datafu/pig/util/TransposeTupleToBag.java
deleted file mode 100644
index f8a39df..0000000
--- a/src/java/datafu/pig/util/TransposeTupleToBag.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.pig.util;
-
-import java.io.IOException;
-import java.util.HashMap;
-
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.DataType;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
-
-/**
- * Performs a transpose on a tuple, resulting in a bag of key, value fields where
- * the key is the column name and the value is the value of that column in the tuple.
- *
- * <p>
- * Example:
- * <pre>
- * {@code
- *
- * define TransposeTupleToBag datafu.pig.util.TransposeTupleToBag();
-
- * -- input: 1,10,11,12
- * input = LOAD 'input' AS (id:int,val1:int,val2:int,val3:int);
- *
- * -- produces: 1,{("val1",10),("val2",11),("val3",12)}
- * output = FOREACH input GENERATE id, TransposeTupleToBag(val1 .. val3);
- *
- * }
- * </pre>
- * </p>
- *
- * @author "William Vaughan <wvaughan@linkedin.com>"
- *
- */
-public class TransposeTupleToBag extends AliasableEvalFunc<DataBag>
-{
- private final String TRANSPOSE_TYPE = "TRANSPOSE_TYPE";
-
- @Override
- public Schema getOutputSchema(Schema input)
- {
- try
- {
- // require that every field in the input has the same type
- Byte type = null;
- for (FieldSchema fieldSchema : input.getFields()) {
- if (type == null) {
- type = fieldSchema.type;
- } else {
- if (type != fieldSchema.type) {
- throw new RuntimeException(
- String.format("Expected all input types to match. Got both %s and %s.",
- DataType.findTypeName(type.byteValue()), DataType.findTypeName(fieldSchema.type)));
- }
- }
- }
- getInstanceProperties().put(TRANSPOSE_TYPE, type);
-
- Schema outputTupleSchema = new Schema();
- outputTupleSchema.add(new Schema.FieldSchema("key", DataType.CHARARRAY));
- outputTupleSchema.add(new Schema.FieldSchema("value", type));
- return new Schema(new Schema.FieldSchema(
- getSchemaName(this.getClass().getName().toLowerCase(), input),
- outputTupleSchema,
- DataType.BAG));
- }
- catch (FrontendException e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public DataBag exec(Tuple input) throws IOException
- {
- // initialize a reverse mapping
- HashMap<Integer, String> positionToAlias = new HashMap<Integer, String>();
- for (String alias : getFieldAliases().keySet()) {
- positionToAlias.put(getFieldAliases().get(alias), alias);
- }
- DataBag output = BagFactory.getInstance().newDefaultBag();
- for (int i=0; i<input.size(); i++) {
- Tuple tuple = TupleFactory.getInstance().newTuple();
- tuple.append(positionToAlias.get(i));
- tuple.append(input.get(i));
- output.add(tuple);
- }
- return output;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/src/java/datafu/pig/util/package-info.java
----------------------------------------------------------------------
diff --git a/src/java/datafu/pig/util/package-info.java b/src/java/datafu/pig/util/package-info.java
deleted file mode 100644
index cce46a0..0000000
--- a/src/java/datafu/pig/util/package-info.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/**
- * Other useful utilities.
- */
-package datafu.pig.util;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test.sh
----------------------------------------------------------------------
diff --git a/test.sh b/test.sh
index f017e58..f4b479b 100755
--- a/test.sh
+++ b/test.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
echo $$ > test.pid
-ant test
+gradle test
rm test.pid
|