datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wvaug...@apache.org
Subject [04/19] DATAFU-27 Migrate build system to Gradle
Date Tue, 04 Mar 2014 07:09:22 GMT
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/PigTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/PigTests.java b/test/pig/datafu/test/pig/PigTests.java
deleted file mode 100644
index 9b4eddd..0000000
--- a/test/pig/datafu/test/pig/PigTests.java
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig;
-
-import static org.testng.Assert.*;
-
-import java.io.BufferedInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileWriter;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.metrics.jvm.JvmMetrics;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.apache.pig.tools.parameters.ParseException;
-
-public abstract class PigTests
-{    
-  @org.testng.annotations.BeforeClass
-  public void beforeClass()
-  {
-    // TODO make it configurable whether this happens, for travis-ci we can't spam the logs so much,
-    // however otherwise it is useful to see the errors
-    Logger.getRootLogger().removeAllAppenders();
-    Logger.getLogger(JvmMetrics.class).setLevel(Level.OFF);
-  }
-  
-  @org.testng.annotations.BeforeMethod
-  public void beforeMethod(Method method)
-  {
-    System.out.println("\n*** Running " + method.getName() + " ***");
-  }
-  
-  protected String[] getDefaultArgs()
-  {
-    String[] args = {
-        "JAR_PATH=" + getJarPath()
-      };
-    return args;
-  }
-  
-  protected List<String> getDefaultArgsAsList()
-  {
-    String[] args = getDefaultArgs();
-    List<String> argsList = new ArrayList<String>(args.length);
-    for (String arg : args)
-    {
-      argsList.add(arg);
-    }
-    return argsList;
-  }
-  
-  protected PigTest createPigTestFromString(String str, String... args) throws IOException
-  {
-    return createPigTest(str.split("\n"),args);
-  }
-  
-  protected PigTest createPigTest(String[] lines, String... args) throws IOException
-  {
-    // append args to list of default args
-    List<String> theArgs = getDefaultArgsAsList();
-    for (String arg : args)
-    {
-      theArgs.add(arg);
-    }
-    
-    for (String arg : theArgs)
-    {
-      String[] parts = arg.split("=",2);
-      if (parts.length == 2)
-      {
-        for (int i=0; i<lines.length; i++)
-        {
-          lines[i] = lines[i].replaceAll(Pattern.quote("$" + parts[0]), parts[1]);
-        }
-      }
-    }
-    
-    return new PigTest(lines);
-  }
-  
-  protected PigTest createPigTest(String scriptPath, String... args) throws IOException
-  {
-    return createPigTest(getLinesFromFile(scriptPath), args);
-  }
-  
-  protected String getJarPath()
-  {    
-    String jarDir = null;
-    
-    if (System.getProperty("datafu.jar.dir") != null)
-    {
-      jarDir = System.getProperty("datafu.jar.dir");
-    }
-    else
-    {
-      jarDir = new File(System.getProperty("user.dir"), "dist").getAbsolutePath();
-    }  
-    
-    File userDir = new File(jarDir);
-    
-    String[] files = userDir.list(new FilenameFilter() {
-
-      @Override
-      public boolean accept(File dir, String name)
-      {
-        return name.endsWith(".jar") && !name.contains("sources") && !name.contains("javadoc");
-      }
-      
-    });
-    
-    if (files == null || files.length == 0)
-    {
-      throw new RuntimeException("Could not find JAR file");
-    }
-    else if (files.length > 1)
-    {
-      StringBuilder sb = new StringBuilder();
-      for (String file : files)
-      {
-        sb.append(file);
-        sb.append(",");
-      }
-      throw new RuntimeException("Found more JAR files than expected: " + sb.substring(0, sb.length()-1));
-    }
-    
-    return  userDir.getAbsolutePath() + "/" + files[0];
-  }
-  
-  protected List<Tuple> getLinesForAlias(PigTest test, String alias) throws IOException, ParseException
-  {
-    return getLinesForAlias(test,alias,true);
-  }
-  
-  protected List<Tuple> getLinesForAlias(PigTest test, String alias, boolean logValues) throws IOException, ParseException
-  {
-    Iterator<Tuple> tuplesIterator = test.getAlias(alias);
-    List<Tuple> tuples = new ArrayList<Tuple>();
-    if (logValues)
-    {
-      System.out.println(String.format("Values for %s: ", alias));
-    }
-    while (tuplesIterator.hasNext())
-    {
-      Tuple tuple = tuplesIterator.next();
-      if (logValues)
-      {
-        System.out.println(tuple.toString());
-      }
-      tuples.add(tuple);
-    }
-    return tuples;
-  }
-    
-  protected void writeLinesToFile(String fileName, String... lines) throws IOException
-  {
-    File inputFile = deleteIfExists(getFile(fileName));
-    writeLinesToFile(inputFile, lines);
-  }
-  
-  protected void writeLinesToFile(File file, String[] lines) throws IOException
-  {
-    FileWriter writer = new FileWriter(file);
-    for (String line : lines)
-    {
-      writer.write(line + "\n");
-    }
-    writer.close();
-  }
-
-  protected void assertOutput(PigTest test, String alias, String... expected) throws IOException, ParseException
-  {
-    List<Tuple> tuples = getLinesForAlias(test, alias);
-    assertEquals(expected.length, tuples.size());
-    int i=0;
-    for (String e : expected)
-    {
-      assertEquals(tuples.get(i++).toString(), e);
-    }
-  }
-  
-  protected File deleteIfExists(File file)
-  {
-    if (file.exists())
-    {
-      file.delete();
-    }
-    return file;
-  }
-  
-  protected File getFile(String fileName)
-  {
-    return new File(System.getProperty("user.dir"), fileName).getAbsoluteFile();
-  }
-  
-  /**
-   * Gets the lines from a given file.
-   * 
-   * @param relativeFilePath The path relative to the datafu-tests project.
-   * @return The lines from the file
-   * @throws IOException
-   */
-  protected String[] getLinesFromFile(String relativeFilePath) throws IOException
-  {
-    // assume that the working directory is the datafu-tests project
-    File file = new File(System.getProperty("user.dir"), relativeFilePath).getAbsoluteFile();
-    BufferedInputStream content = new BufferedInputStream(new FileInputStream(file));
-    Object[] lines = IOUtils.readLines(content).toArray();
-    String[] result = new String[lines.length];
-    for (int i=0; i<lines.length; i++)
-    {
-      result[i] = (String)lines[i];
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/bags/BagTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/bags/BagTests.java b/test/pig/datafu/test/pig/bags/BagTests.java
deleted file mode 100644
index 80bb0cc..0000000
--- a/test/pig/datafu/test/pig/bags/BagTests.java
+++ /dev/null
@@ -1,1254 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.bags;
-
-import static org.testng.Assert.assertEquals;
-
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
-
-import junit.framework.Assert;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import datafu.pig.bags.CountEach;
-import datafu.pig.bags.DistinctBy;
-import datafu.pig.bags.Enumerate;
-import datafu.test.pig.PigTests;
-
-
-public class BagTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  define NullToEmptyBag datafu.pig.bags.NullToEmptyBag();
-  
-  data = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
-  
-  dump data;
-  
-  data2 = FOREACH data GENERATE NullToEmptyBag(B) as P;
-  
-  dump data2;
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String nullToEmptyBag;
-  
-  @Test
-  public void nullToEmptyBagTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(nullToEmptyBag);
-            
-    writeLinesToFile("input", 
-                     "({(1),(2),(3),(4),(5)})",
-                     "()",
-                     "{(4),(5)})");
-            
-    test.runScript();
-        
-    assertOutput(test, "data2",
-                 "({(1),(2),(3),(4),(5)})",
-                 "({})",
-                 "({(4),(5)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define EmptyBagToNull datafu.pig.bags.EmptyBagToNull();
-  
-  data = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
-  
-  dump data;
-  
-  data2 = FOREACH data GENERATE EmptyBagToNull(B) as P;
-  
-  dump data2;
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String emptyBagToNullTest;
-  
-  @Test
-  public void emptyBagToNullTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(emptyBagToNullTest);
-            
-    writeLinesToFile("input", 
-                     "({(1),(2),(3),(4),(5)})",
-                     "()",
-                     "({})",
-                     "{(4),(5)})");
-            
-    test.runScript();
-        
-    assertOutput(test, "data2",
-                 "({(1),(2),(3),(4),(5)})",
-                 "()",
-                 "()",
-                 "({(4),(5)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define EmptyBagToNullFields datafu.pig.bags.EmptyBagToNullFields();
-  
-  data = LOAD 'input' AS (B: bag {T: tuple(v1:INT,v2:INT)});
-  
-  dump data;
-  
-  data2 = FOREACH data GENERATE EmptyBagToNullFields(B) as P;
-  
-  dump data2;
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String emptyBagToNullFieldsTest;
-  
-  @Test
-  public void emptyBagToNullFieldsTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(emptyBagToNullFieldsTest);
-            
-    writeLinesToFile("input", 
-                     "({(1,1),(2,2),(3,3),(4,4),(5,5)})",
-                     "({})",
-                     "{(4,4),(5,5)})");
-            
-    test.runScript();
-        
-    assertOutput(test, "data2",
-                 "({(1,1),(2,2),(3,3),(4,4),(5,5)})",
-                 "({(,)})",
-                 "({(4,4),(5,5)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define AppendToBag datafu.pig.bags.AppendToBag();
-  
-  data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT));
-  
-  data2 = FOREACH data GENERATE key, AppendToBag(B,T) as B;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String appendToBagTest;
-  
-  @Test
-  public void appendToBagTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(appendToBagTest);
-    
-    writeLinesToFile("input", 
-                     "1\t{(1),(2),(3)}\t(4)",
-                     "2\t{(10),(20),(30),(40),(50)}\t(60)");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "(1,{(1),(2),(3),(4)})",
-                 "(2,{(10),(20),(30),(40),(50),(60)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define FirstTupleFromBag datafu.pig.bags.FirstTupleFromBag();
-  
-  data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)});
-  
-  data2 = FOREACH data GENERATE key, FirstTupleFromBag(B, null) as B;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String firstTupleFromBagTest;
-
-   @Test
-  public void firstTupleFromBagTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(firstTupleFromBagTest);
-
-    writeLinesToFile("input", "1\t{(4),(9),(16)}");
-
-    test.runScript();
-
-    assertOutput(test, "data2", "(1,(4))");
-  }
-
-  /**
-  register $JAR_PATH
-
-  define PrependToBag datafu.pig.bags.PrependToBag();
-  
-  data = LOAD 'input' AS (key:INT, B: bag{T: tuple(v:INT)}, T: tuple(v:INT));
-  
-  data2 = FOREACH data GENERATE key, PrependToBag(B,T) as B;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String prependToBagTest;
-  
-  @Test
-  public void prependToBagTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(prependToBagTest);
-    
-    writeLinesToFile("input", 
-                     "1\t{(1),(2),(3)}\t(4)",
-                     "2\t{(10),(20),(30),(40),(50)}\t(60)");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "(1,{(4),(1),(2),(3)})",
-                 "(2,{(60),(10),(20),(30),(40),(50)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagConcat datafu.pig.bags.BagConcat();
-  
-  data = LOAD 'input' AS (A: bag{T: tuple(v:INT)}, B: bag{T: tuple(v:INT)}, C: bag{T: tuple(v:INT)});
-  
-  describe data;
-  
-  data2 = FOREACH data GENERATE BagConcat(A,B,C);
-  
-  describe data2;
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String bagConcatTest;
-  
-  @Test
-  public void bagConcatTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagConcatTest);
-
-    writeLinesToFile("input", 
-                     "({(1),(2),(3)}\t{(3),(5),(6)}\t{(10),(13)})",
-                     "({(2),(3),(4)}\t{(5),(5)}\t{(20)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-                 "({(1),(2),(3),(3),(5),(6),(10),(13)})",
-                 "({(2),(3),(4),(5),(5),(20)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define UnorderedPairs datafu.pig.bags.UnorderedPairs();
-  
-  data = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
-  
-  data2 = FOREACH data GENERATE UnorderedPairs(B) as P;
-  
-  data3 = FOREACH data2 GENERATE FLATTEN(P);
-  
-  data4 = FOREACH data3 GENERATE FLATTEN(elem1), FLATTEN(elem2);
-  
-  data5 = ORDER data4 BY $0, $1;
-  
-  STORE data5 INTO 'output';
-
-
-   */
-  @Multiline
-  private String unorderedPairsTest;
-  
-  @Test
-  public void unorderedPairsTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(unorderedPairsTest);
-    
-    String[] input = {
-      "{(1),(2),(3),(4),(5)}"
-    };
-    
-    String[] output = {
-        "(1,2)",
-        "(1,3)",
-        "(1,4)",
-        "(1,5)",
-        "(2,3)",
-        "(2,4)",
-        "(2,5)",
-        "(3,4)",
-        "(3,5)",
-        "(4,5)"
-      };
-    
-    test.assertOutput("data",input,"data4",output);
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define UnorderedPairs datafu.pig.bags.UnorderedPairs();
-  
-  data = LOAD 'input' AS (A:int, B: bag {T: tuple(v:INT)});
-  
-  data2 = FOREACH data GENERATE A, UnorderedPairs(B) as P;
-  
-  data3 = FOREACH data2 GENERATE A, FLATTEN(P);
-  
-  STORE data3 INTO 'output';
-
-   */
-  @Multiline
-  private String unorderedPairsTest2;
-  
-  @Test
-  public void unorderedPairsTest2() throws Exception
-  {
-    PigTest test = createPigTestFromString(unorderedPairsTest2);
-        
-    this.writeLinesToFile("input", "1\t{(1),(2),(3),(4),(5)}");
-    
-    test.runScript();
-    this.getLinesForAlias(test, "data3");
-    
-    this.assertOutput(test, "data3",
-                      "(1,(1),(2))",
-                      "(1,(1),(3))",
-                      "(1,(1),(4))",
-                      "(1,(1),(5))",
-                      "(1,(2),(3))",
-                      "(1,(2),(4))",
-                      "(1,(2),(5))",
-                      "(1,(3),(4))",
-                      "(1,(3),(5))",
-                      "(1,(4),(5))");    
-  }
- 
-  /**
-  register $JAR_PATH
-
-  define BagSplit datafu.pig.bags.BagSplit();
-  
-  data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)});
-  
-  data2 = FOREACH data GENERATE BagSplit($MAX,B);
-  --describe data2;
-  
-  data3 = FOREACH data2 GENERATE FLATTEN($0);
-  
-  --describe data3
-  
-  STORE data3 INTO 'output';
-
-   */
-  @Multiline
-  private String bagSplitTest;
-  
-  @Test
-  public void bagSplitTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagSplitTest,
-                                 "MAX=5");
-    
-    writeLinesToFile("input", 
-                     "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-    
-    test.runScript();
-    
-    assertOutput(test, "data3",
-                 "({(1,11),(2,22),(3,33),(4,44),(5,55)})",
-                 "({(6,66),(7,77),(8,88),(9,99),(10,1010)})",
-                 "({(11,1111),(12,1212)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagSplit datafu.pig.bags.BagSplit('true');
-  
-  data = LOAD 'input' AS (B:bag{T:tuple(val1:INT,val2:INT)});
-  
-  data2 = FOREACH data GENERATE BagSplit($MAX,B);
-  
-  data3 = FOREACH data2 GENERATE FLATTEN($0);
-  
-  STORE data3 INTO 'output';
-   */
-  @Multiline
-  private String bagSplitWithBagNumTest;
-  
-  @Test
-  public void bagSplitWithBagNumTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagSplitWithBagNumTest,
-                                 "MAX=10");
-    
-    writeLinesToFile("input", 
-                     "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");
-    
-    test.runScript();
-    
-    assertOutput(test, "data3",
-                 "({(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010)},0)",
-                 "({(11,1111),(12,1212)},1)");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define Enumerate datafu.pig.bags.ReverseEnumerate('1');
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-  
-  data2 = FOREACH data GENERATE Enumerate(data);
-  --describe data2;
-  
-  data3 = FOREACH data2 GENERATE FLATTEN($0);
-  --describe data3;
-  
-  data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-  --describe data4;
-  
-  STORE data4 INTO 'output';
-
-   */
-  @Multiline
-  private String enumerateWithReverseTest;
-  
-  @Test
-  public void enumerateWithReverseTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(enumerateWithReverseTest);
-       
-    writeLinesToFile("input", 
-                     "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},5)",
-                 "(20,{(4),(5),(6)},4)",
-                 "(30,{(7),(8)},3)",
-                 "(40,{(9),(10),(11)},2)",
-                 "(50,{(12),(13),(14),(15)},1)");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define Enumerate datafu.pig.bags.Enumerate('1');
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-  
-  data2 = FOREACH data GENERATE Enumerate(data);
-  --describe data2;
-  
-  data3 = FOREACH data2 GENERATE FLATTEN($0);
-  --describe data3;
-  
-  data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-  --describe data4;
-  
-  STORE data4 INTO 'output';
-
-   */
-  @Multiline
-  private String enumerateWithStartTest;
-  
-  @Test
-  public void enumerateWithStartTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(enumerateWithStartTest);
-       
-    writeLinesToFile("input", 
-                     "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},1)",
-                 "(20,{(4),(5),(6)},2)",
-                 "(30,{(7),(8)},3)",
-                 "(40,{(9),(10),(11)},4)",
-                 "(50,{(12),(13),(14),(15)},5)");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define Enumerate datafu.pig.bags.Enumerate();
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(v1:INT,B: bag{T: tuple(v2:INT)})});
-  
-  data2 = FOREACH data GENERATE Enumerate(data);
-  --describe data2;
-  
-  data3 = FOREACH data2 GENERATE FLATTEN($0);
-  --describe data3;
-  
-  data4 = FOREACH data3 GENERATE $0 as v1, $1 as B, $2 as i;
-  --describe data4;
-  
-  STORE data4 INTO 'output';
-
-   */
-  @Multiline
-  private String enumerateTest;
-  
-  @Test
-  public void enumerateTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(enumerateTest);
-       
-    writeLinesToFile("input",
-                     "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},0)",
-                 "(20,{(4),(5),(6)},1)",
-                 "(30,{(7),(8)},2)",
-                 "(40,{(9),(10),(11)},3)",
-                 "(50,{(12),(13),(14),(15)},4)");
-  }
-  
-  @Test
-  public void enumerateTest2() throws Exception
-  {
-    PigTest test = createPigTestFromString(enumerateTest);
-      
-    writeLinesToFile("input",
-                     "({(10,{(1),(2),(3)}),(20,{(4),(5),(6)}),(30,{(7),(8)}),(40,{(9),(10),(11)}),(50,{(12),(13),(14),(15)})})",
-                     "({(11,{(11),(12),(13),(14)}),(21,{(15),(16),(17),(18)}),(31,{(19),(20)}),(41,{(21),(22),(23),(24)}),(51,{(25),(26),(27)})})");
-   
-    test.runScript();
-   
-    assertOutput(test, "data4",
-                 "(10,{(1),(2),(3)},0)",
-                 "(20,{(4),(5),(6)},1)",
-                 "(30,{(7),(8)},2)",
-                 "(40,{(9),(10),(11)},3)",
-                 "(50,{(12),(13),(14),(15)},4)",
-                 "(11,{(11),(12),(13),(14)},0)",
-                 "(21,{(15),(16),(17),(18)},1)",
-                 "(31,{(19),(20)},2)",
-                 "(41,{(21),(22),(23),(24)},3)",
-                 "(51,{(25),(26),(27)},4)");
-  }  
-  
-  /* 
-   * Testing "Accumulator" part of Enumeration by manually invoking accumulate(), getValue() and cleanup()
-   */
-  @Test
-  public void enumerateAccumulatorTest() throws Exception
-  {
-    Enumerate enumerate = new Enumerate(); 
-    
-    Tuple tuple1 = TupleFactory.getInstance().newTuple(1);
-    tuple1.set(0, 10);
-    
-    Tuple tuple2 = TupleFactory.getInstance().newTuple(1);
-    tuple2.set(0, 20);
-    
-    Tuple tuple3 = TupleFactory.getInstance().newTuple(1);
-    tuple3.set(0, 30);
-    
-    Tuple tuple4 = TupleFactory.getInstance().newTuple(1);
-    tuple4.set(0, 40);
-    
-    Tuple tuple5 = TupleFactory.getInstance().newTuple(1);
-    tuple5.set(0, 50);
-    
-    DataBag bag1 = BagFactory.getInstance().newDefaultBag();
-    bag1.add(tuple1);
-    bag1.add(tuple2);
-    bag1.add(tuple3);
-    
-    DataBag bag2 = BagFactory.getInstance().newDefaultBag();
-    bag2.add(tuple4);
-    bag2.add(tuple5);
-    
-    Tuple inputTuple1 = TupleFactory.getInstance().newTuple(1);
-    inputTuple1.set(0,bag1);
-    
-    Tuple inputTuple2 = TupleFactory.getInstance().newTuple(1);
-    inputTuple2.set(0,bag2);
-    
-    enumerate.accumulate(inputTuple1);
-    enumerate.accumulate(inputTuple2);
-    assertEquals(enumerate.getValue().toString(), "{(10,0),(20,1),(30,2),(40,3),(50,4)}");
-
-    // Testing that cleanup code is correct by calling cleanup() and passing inputs back to Enumerate instance
-    enumerate.cleanup();
-    enumerate.accumulate(inputTuple1);
-    enumerate.accumulate(inputTuple2);
-    assertEquals(enumerate.getValue().toString(), "{(10,0),(20,1),(30,2),(40,3),(50,4)}");     
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagSplit datafu.pig.bags.BagSplit();
-  define Enumerate datafu.pig.bags.Enumerate('1');
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(name:CHARARRAY, score:double)});
-  
-  data2 = FOREACH data GENERATE BagSplit(3,data) as the_bags;
-  
-  --describe data2
-  
-  data3 = FOREACH data2 GENERATE Enumerate(the_bags) as enumerated_bags;
-  
-  --describe data3
-  
-  data4 = FOREACH data3 GENERATE FLATTEN(enumerated_bags) as (data,i);
-  
-  --describe data4
-  
-  data5 = FOREACH data4 GENERATE data as the_data, i as the_key;
-  
-  --describe data5
-  
-  data_out = FOREACH data5 GENERATE FLATTEN(the_data), the_key;
-  
-  --describe data_out
-   */
-  @Multiline
-  private String comprehensiveBagSplitAndEnumerate;
-  
-  @Test
-  public void comprehensiveBagSplitAndEnumerate() throws Exception
-  {
-    PigTest test = createPigTestFromString(comprehensiveBagSplitAndEnumerate);
-    
-    writeLinesToFile("input",
-                     "({(A,1.0),(B,2.0),(C,3.0),(D,4.0),(E,5.0)})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data_out",
-                 // bag #1
-                 "(A,1.0,1)",
-                 "(B,2.0,1)",
-                 "(C,3.0,1)",
-                 // bag #2
-                 "(D,4.0,2)",
-                 "(E,5.0,2)");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define DistinctBy datafu.pig.bags.DistinctBy('0');
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:INT, c:INT)});
-  
-  data2 = FOREACH data GENERATE DistinctBy(data);
-  
-  --describe data2;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String distinctByTest;
-
-  @Test
-  public void distinctByTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(distinctByTest);
-    
-    writeLinesToFile("input",
-                     "({(Z,1,0),(A,1,0),(A,1,0),(B,2,0),(B,22,1),(C,3,0),(D,4,0),(E,5,0)})",
-                     "({(A,10,2),(M,50,3),(A,34,49), (A,24,42), (Z,49,22),(B,1,1)},(B,2,2))");
-    
-    test.runScript();
-    
-    assertOutput(test, "data2",
-                 "({(Z,1,0),(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})",
-                 "({(A,10,2),(M,50,3),(Z,49,22),(B,1,1)})");
-  }
- 
-  /**
-  register $JAR_PATH
-
-  define DistinctBy datafu.pig.bags.DistinctBy('1', '2');
-
-  data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:map[INT], c:bag{t: tuple(c0:CHARARRAY, c1:INT)})});
-
-  data2 = FOREACH data GENERATE DistinctBy(data);
-
-  --describe data2;
-
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String distinctByMultiComplexFieldTest;
-
-  @Test
-  public void distinctByMultiComplexFieldTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(distinctByMultiComplexFieldTest);
-
-    writeLinesToFile("input",
-                     "({(a-b,[a#0,b#1],{(a-b,0),(a-b,1)}),(a-c,[b#1,a#0],{(a-b,0),(a-b,1)}),(a-d,[a#1,b#0],{(a-b,1),(a-b,2)})})");
-    
-    test.runScript();
- 
-    assertOutput(test, "data2",
-                 "({(a-b,[b#1,a#0],{(a-b,0),(a-b,1)}),(a-d,[b#0,a#1],{(a-b,1),(a-b,2)})})");
-  }
-
-  /**
-  register $JAR_PATH
-
-  define DistinctBy datafu.pig.bags.DistinctBy('1');
-
-  data = LOAD 'input' AS (data: bag {T: tuple(a:CHARARRAY, b:CHARARRAY)});
-
-  data2 = FOREACH data GENERATE DistinctBy(data);
-
-  --describe data2;
-
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String distinctByDelimTest;
-
-  @Test
-  public void distinctByDelimTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(distinctByDelimTest);
-    
-    writeLinesToFile("input",
-                     "({(a-b,c),(a-b,d)})");
-    
-    test.runScript();
-    
-    assertOutput(test, "data2",
-                 "({(a-b,c),(a-b,d)})");
-  }
-  
-  @Test
-  public void distinctByExecTest() throws Exception
-  {
-    DistinctBy distinct = new DistinctBy("0");
-    
-    DataBag bag;
-    Tuple input;
-    Tuple data;
-   
-    bag = BagFactory.getInstance().newDefaultBag();
-    input = TupleFactory.getInstance().newTuple(bag);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 10);
-    data.set(1, 20);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 11);
-    data.set(1, 50);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 10);
-    data.set(1, 22);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 12);
-    data.set(1, 40);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 11);
-    data.set(1, 50);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 11);
-    data.set(1, 51);
-        
-    DataBag result = distinct.exec(input);
-    
-    Assert.assertEquals(3, result.size());
-    
-    Iterator<Tuple> iter = result.iterator();
-    Assert.assertEquals("(10,20)", iter.next().toString());
-    Assert.assertEquals("(11,50)", iter.next().toString());
-    Assert.assertEquals("(12,40)", iter.next().toString());
-    
-    // do it again to test cleanup
-    bag = BagFactory.getInstance().newDefaultBag();
-    input = TupleFactory.getInstance().newTuple(bag);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 12);
-    data.set(1, 42);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 11);
-    data.set(1, 51);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag.add(data);
-    data.set(0, 11);
-    data.set(1, 50);
-    
-    result = distinct.exec(input);
-    
-    Assert.assertEquals(2, result.size());
-    
-    iter = result.iterator();
-    Assert.assertEquals("(12,42)", iter.next().toString());
-    Assert.assertEquals("(11,51)", iter.next().toString());
-  }
-  
-  @Test
-  public void distinctByAccumulateTest() throws Exception
-  {
-    DistinctBy distinct = new DistinctBy("0");
-    
-    DataBag bag;
-    Tuple input;
-    Tuple data;
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 10);
-    data.set(1, 20);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 11);
-    data.set(1, 50);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 10);
-    data.set(1, 22);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 12);
-    data.set(1, 40);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 11);
-    data.set(1, 50);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 11);
-    data.set(1, 51);
-    distinct.accumulate(input);
-    
-    DataBag result = distinct.getValue();
-    
-    Assert.assertEquals(3, result.size());
-    
-    Iterator<Tuple> iter = result.iterator();
-    Assert.assertEquals("(10,20)", iter.next().toString());
-    Assert.assertEquals("(11,50)", iter.next().toString());
-    Assert.assertEquals("(12,40)", iter.next().toString());
-    
-    // do it again to test cleanup
-    distinct.cleanup();
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 12);
-    data.set(1, 42);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 11);
-    data.set(1, 51);
-    distinct.accumulate(input);
-    
-    data = TupleFactory.getInstance().newTuple(2);
-    bag = BagFactory.getInstance().newDefaultBag();
-    bag.add(data);
-    input = TupleFactory.getInstance().newTuple(bag);
-    data.set(0, 11);
-    data.set(1, 50);
-    distinct.accumulate(input);
-    
-    result = distinct.getValue();
-    
-    Assert.assertEquals(2, result.size());
-    
-    iter = result.iterator();
-    Assert.assertEquals("(12,42)", iter.next().toString());
-    Assert.assertEquals("(11,51)", iter.next().toString());
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define CountEach datafu.pig.bags.CountEach();
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(v1:chararray)});
-  
-  data2 = FOREACH data GENERATE CountEach(data) as counted;
-  --describe data2;
-  
-  data3 = FOREACH data2 {
-    ordered = ORDER counted BY count DESC;
-    GENERATE ordered;
-  }
-  --describe data3
-  
-  STORE data3 INTO 'output';
-
-   */
-  @Multiline
-  private String countEachTest;
- 
-  @Test 
-  public void countEachTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(countEachTest);
-
-    writeLinesToFile("input", 
-                     "({(A),(B),(A),(C),(A),(B)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data3",
-        "({((A),3),((B),2),((C),1)})");
-  }
-  
-  @Test 
-  public void countEachExecAndAccumulateTest() throws Exception
-  {    
-    for (int c=0; c<2; c++)
-    {
-      CountEach countEach = new CountEach("flatten");
-      
-      DataBag bag = BagFactory.getInstance().newDefaultBag();
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "A");
-        bag.add(t);
-      }
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "B");
-        bag.add(t);
-      }
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "B");
-        bag.add(t);
-      }
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "C");
-        bag.add(t);
-      }
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "A");
-        bag.add(t);
-      }
-      { 
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, "D");
-        bag.add(t);
-      }
-      
-      DataBag output = null;
-      
-      if (c == 0)
-      {
-        Tuple input = TupleFactory.getInstance().newTuple(1);
-        input.set(0, bag);
-        
-        System.out.println("Testing exec");
-        output = countEach.exec(input);
-      }
-      else
-      {
-        System.out.println("Testing accumulate");
-        for (Tuple t : bag)
-        {
-          DataBag tb = BagFactory.getInstance().newDefaultBag();
-          tb.add(t);
-          Tuple input = TupleFactory.getInstance().newTuple(1);
-          input.set(0, tb);
-          countEach.accumulate(input);
-        }
-        
-        output = countEach.getValue();
-        
-        countEach.cleanup();        
-        Assert.assertEquals(0, countEach.getValue().size());
-      }
-      
-      System.out.println(output.toString());
-      
-      Assert.assertEquals(4, output.size());
-      Set<String> found = new HashSet<String>();
-      for (Tuple t : output)
-      {
-        String key = (String)t.get(0);    
-        found.add(key);  
-        if (key == "A")
-        {
-          Assert.assertEquals(2, t.get(1));
-        }
-        else if (key == "B")
-        {
-          Assert.assertEquals(2, t.get(1));
-        }
-        else if (key == "C")
-        {
-          Assert.assertEquals(1, t.get(1));
-        }
-        else if (key == "D")
-        {
-          Assert.assertEquals(1, t.get(1));
-        }
-        else
-        {
-          Assert.fail("Unexpected: " + key);
-        }
-      }
-      Assert.assertEquals(4, found.size());
-    }
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define CountEach datafu.pig.bags.CountEach('flatten');
-  
-  data = LOAD 'input' AS (data: bag {T: tuple(v1:chararray)});
-  
-  data2 = FOREACH data GENERATE CountEach(data) as counted;
-  --describe data2;
-  
-  data3 = FOREACH data2 {
-    ordered = ORDER counted BY count DESC;
-    GENERATE ordered;
-  }
-  --describe data3
-  
-  STORE data3 INTO 'output';
-
-   */
-  @Multiline
-  private String countEachFlattenTest;
-  
-  @Test 
-  public void countEachFlattenTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(countEachFlattenTest);
-
-    writeLinesToFile("input", 
-                     "({(A),(B),(A),(C),(A),(B)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data3",
-        "({(A,3),(B,2),(C,1)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagLeftOuterJoin datafu.pig.bags.BagLeftOuterJoin();
-  
-  data = LOAD 'input' AS (outer_key:chararray, bag1:bag{T:tuple(k:chararray,v:chararray)}, bag2:bag{T:tuple(k:chararray,v:chararray)}, bag3:bag{T:tuple(k3:chararray,v3:chararray)});
-  describe data;
-  
-  data2 = FOREACH data GENERATE 
-    outer_key, 
-    BagLeftOuterJoin(bag1, 'k', bag2, 'k', bag3, 'k3') as joined1,
-    BagLeftOuterJoin(bag1, 'k', bag3, 'k3', bag2, 'k') as joined2; --this will break without UDF signature and pig < 0.11
-  describe data2;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String bagLeftOuterJoinTest;
-  
-  @Test 
-  public void bagLeftOuterJoinTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagLeftOuterJoinTest);
-
-    writeLinesToFile("input", 
-                     "1\t{(K1,A1),(K2,B1),(K3,C1)}\t{(K1,A2),(K2,B2),(K2,B22)}\t{(K1,A3),(K3,C3),(K4,D3)}");
-                  
-    test.runScript();
-    
-    assertOutput(test, "data2",
-        "(1,{(K1,A1,K1,A2,K1,A3),(K2,B1,K2,B2,,),(K2,B1,K2,B22,,),(K3,C1,,,K3,C3)},{(K1,A1,K1,A3,K1,A2),(K2,B1,,,K2,B2),(K2,B1,,,K2,B22),(K3,C1,K3,C3,,)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagUnion datafu.pig.bags.BagConcat();
-  
-  data = LOAD 'input' AS (input_bag: bag {T: tuple(inner_bag: bag {T2: tuple(k: int, v: chararray)})});
-  describe data;
-  
-  data2 = FOREACH data GENERATE BagUnion(input_bag) as unioned;
-  describe data2;
-  
-  STORE data INTO 'output';
-
-   */
-  @Multiline
-  private String bagUnionTest;
-  
-  @Test
-  public void bagUnionTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagUnionTest);
-    writeLinesToFile("input", "({({(1,A),(1,B)}),({(2,A),(2,B),(2,C)}),({(3,A)})}");
-    test.runScript();
-    assertOutput(test, "data2", "({(1,A),(1,B),(2,A),(2,B),(2,C),(3,A)})");
-  }
- 
-  /**
-  register $JAR_PATH
-
-  define BagGroup datafu.pig.bags.BagGroup();
-  
-  data = LOAD 'input' AS (input_bag: bag {T: tuple(k: int, v: chararray)});
-  describe data;
-  
-  data2 = FOREACH data GENERATE BagGroup(input_bag, input_bag.k) as grouped;
-  describe data2;
-  
-  data3 = FOREACH data2 {
-    ordered = ORDER grouped BY group;
-    GENERATE
-      ordered as grouped;
-  }
-  describe data3;
-  
-  STORE data INTO 'output';
-
-   */
-  @Multiline
-  private String bagGroupSingleTest;
-  
-  @Test
-  public void bagGroupSingleTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagGroupSingleTest);
-    writeLinesToFile("input", "({(1,A),(1,B),(2,A),(2,B),(2,C),(3,A)})");
-    test.runScript();
-    getLinesForAlias(test, "data2", true);
-    assertOutput(test, "data3", "({(1,{(1,A),(1,B)}),(2,{(2,A),(2,B),(2,C)}),(3,{(3,A)})})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define BagGroup datafu.pig.bags.BagGroup();
-  
-  data = LOAD 'input' AS (input_bag: bag {T: tuple(k: int, k2: chararray, v: int)});
-  describe data;
-  
-  data2 = FOREACH data GENERATE BagGroup(input_bag, input_bag.(k, k2)) as grouped;
-  describe data2;
-  
-  data3 = FOREACH data2 {
-    ordered = ORDER grouped BY group;
-    GENERATE
-      ordered as grouped;
-  }
-  describe data3;
-  
-  STORE data INTO 'output';
-
-   */
-  @Multiline
-  private String bagGroupMultipleTest;
-  
-  @Test
-  public void bagGroupMultipleTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(bagGroupMultipleTest);
-    writeLinesToFile("input", "({(1,A,1),(1,B,1),(1,A,2),(2,A,1),(2,B,1),(2,C,1),(3,A,1)})");
-    test.runScript();
-    getLinesForAlias(test, "data2", true);
-    assertOutput(test, "data3", "({((1,A),{(1,A,1),(1,A,2)}),((1,B),{(1,B,1)}),((2,A),{(2,A,1)}),((2,B),{(2,B,1)}),((2,C),{(2,C,1)}),((3,A),{(3,A,1)})})");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/geo/GeoTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/geo/GeoTests.java b/test/pig/datafu/test/pig/geo/GeoTests.java
deleted file mode 100644
index 39c7b61..0000000
--- a/test/pig/datafu/test/pig/geo/GeoTests.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.geo;
-
-import static org.testng.Assert.*;
-
-import java.util.List;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import datafu.test.pig.PigTests;
-
-public class GeoTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  define HaversineDistInMiles datafu.pig.geo.HaversineDistInMiles();
-  
-  data = LOAD 'input' AS (lat1:double,lng1:double,lat2:double,lng2:double);
-  
-  data2 = FOREACH data GENERATE HaversineDistInMiles(lat1,lng1,lat2,lng2);
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String haversineTest;
-  
-  @Test
-  public void haversineTest() throws Exception
-  {    
-    PigTest test = createPigTestFromString(haversineTest);
-    
-    // Approximate latitude and longitude for major cities from maps.google.com
-    double[] la = {34.040143,-118.243103};
-    double[] tokyo = {35.637209,139.65271};
-    double[] ny = {40.716038,-73.99498};
-    double[] paris = {48.857713,2.342491};
-    double[] sydney = {-33.872696,151.195221};
-        
-    this.writeLinesToFile("input", 
-                          coords(la,tokyo),
-                          coords(ny,tokyo),
-                          coords(ny,sydney),
-                          coords(ny,paris));
-    
-    test.runScript();
-    
-    List<Tuple> distances = this.getLinesForAlias(test, "data2");
-    
-    // ensure distance is within 20 miles of expected (distances found online)
-    assertWithin(5478.0, distances.get(0), 20.0); // la <-> tokyo
-    assertWithin(6760.0, distances.get(1), 20.0); // ny <-> tokyo
-    assertWithin(9935.0, distances.get(2), 20.0); // ny <-> sydney
-    assertWithin(3635.0, distances.get(3), 20.0); // ny <-> paris
-    
-  }
-  
-  private void assertWithin(double expected, Tuple actual, double maxDiff) throws Exception
-  {
-    Double actualVal = (Double)actual.get(0);
-    assertTrue(Math.abs(expected-actualVal) < maxDiff);
-  }
-  
-  private String coords(double[] coords1, double[] coords2)
-  {
-    assertTrue(coords1.length == 2);
-    assertTrue(coords2.length == 2);
-    return String.format("%f\t%f\t%f\t%f", coords1[0], coords1[1], coords2[0], coords2[1]);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/hash/HashTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/hash/HashTests.java b/test/pig/datafu/test/pig/hash/HashTests.java
deleted file mode 100644
index ba19344..0000000
--- a/test/pig/datafu/test/pig/hash/HashTests.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.hash;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import datafu.test.pig.PigTests;
-
-public class HashTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  define MD5 datafu.pig.hash.MD5();
-  
-  data_in = LOAD 'input' as (val:chararray);
-  
-  data_out = FOREACH data_in GENERATE MD5(val) as val;
-  
-  STORE data_out INTO 'output';
-   */
-  @Multiline private String md5Test;
-  
-  @Test
-  public void md5Test() throws Exception
-  {
-    PigTest test = createPigTestFromString(md5Test);
-    
-    writeLinesToFile("input", 
-                     "ladsljkasdglk",
-                     "lkadsljasgjskdjks",
-                     "aladlasdgjks");
-            
-    test.runScript();
-        
-    assertOutput(test, "data_out",
-                 "(d9a82575758bb4978949dc0659205cc6)",
-                 "(9ec37f02fae0d8d6a7f4453a62272f1f)",
-                 "(cb94139a8b9f3243e68a898ec6bd9b3d)");
-  }
-  
-  /**
-  register $JAR_PATH
-  
-  define SHA256 datafu.pig.hash.SHA('256');
-  
-  data_in = LOAD 'input' as (val:chararray);
-  
-  data_out = FOREACH data_in GENERATE SHA256(val) as val;
-  
-  STORE data_out INTO 'output';
-   */
-  @Multiline private String sha256Test;
-  
-  @Test
-  public void sha256Test() throws Exception
-  {
-	  PigTest test = createPigTestFromString(sha256Test);
-	  
-	  writeLinesToFile("input", 
-			  		   "ladsljkasdglk",
-                       "lkadsljasgjskdjks",
-                       "aladlasdgjks");
-	  
-	  test.runScript();
-	  
-	  assertOutput(test, "data_out", 
-			  "(70ebaf99c4d8ff8860869e50be2d46afbf150b883f66b50a76ee81cdc802242b)",
-			  "(f22e3c744a9ade0fa591d28c55392035248b391c9ee4c77ebfeaf6558c8c0dac)",
-			  "(49420b42e764178830783d4520aea56b759f325c1d1167f5640ded91f33f3e69)");
-  }
-  
-  /**
-  register $JAR_PATH
-  
-  define SHA512 datafu.pig.hash.SHA('512');
-  
-  data_in = LOAD 'input' as (val:chararray);
-  
-  data_out = FOREACH data_in GENERATE SHA512(val) as val;
-  
-  STORE data_out INTO 'output';
-   */
-  @Multiline private String sha512Test;
-  
-  @Test
-  public void sha512Test() throws Exception
-  {
-	  PigTest test = createPigTestFromString(sha512Test);
-	  
-	  writeLinesToFile("input", 
-			  		   "ladsljkasdglk",
-                       "lkadsljasgjskdjks",
-                       "aladlasdgjks");
-	  
-	  test.runScript();
-	  
-	  assertOutput(test, "data_out", 
-			  "(f681dbd89dfc9edf00f68107ed81b4b7c89abdf84337921785d13d9189937a43decbc264b5013d396a102b18564c39595732c43d6d4cc99473f6d6d7101ecf87)",
-			  "(85c130c8636c052e52a2ca091a92d0bb98ee361adcbeeebbd6af978a593b2486a22ac1e7352c683035cfa28de8eee3402adc6760ad54c5c7eda122c5124766bd)",
-			  "(3b82af5c08c9ab70523abf56db244eaa6740fa8c356e3a41bb5225560c0949b14b417c8d56e72cc26d5682400e0420a556e692c41ea82855013e8b7bae5fb0fb)");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define MD5 datafu.pig.hash.MD5('base64');
-  
-  data_in = LOAD 'input' as (val:chararray);
-  
-  data_out = FOREACH data_in GENERATE MD5(val) as val;
-  
-  STORE data_out INTO 'output';
-   */
-  @Multiline private String md5Base64Test;
-  
-  @Test
-  public void md5Base64Test() throws Exception
-  {
-    PigTest test = createPigTestFromString(md5Base64Test);
-    
-    writeLinesToFile("input", 
-                     "ladsljkasdglk",
-                     "lkadsljasgjskdjks",
-                     "aladlasdgjks");
-            
-    test.runScript();
-        
-    assertOutput(test, "data_out",
-                 "(2agldXWLtJeJSdwGWSBcxg==)",
-                 "(nsN/Avrg2Nan9EU6YicvHw==)",
-                 "(y5QTmoufMkPmiomOxr2bPQ==)");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/linkanalysis/PageRankImplTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/linkanalysis/PageRankImplTests.java b/test/pig/datafu/test/pig/linkanalysis/PageRankImplTests.java
deleted file mode 100644
index 01d540f..0000000
--- a/test/pig/datafu/test/pig/linkanalysis/PageRankImplTests.java
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.linkanalysis;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.testng.annotations.Test;
-
-public class PageRankImplTests 
-{
-  @Test
-  public void wikipediaGraphInMemoryTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting wikipediaGraphInMemoryTest");
-    
-    datafu.pig.linkanalysis.PageRankImpl graph = new datafu.pig.linkanalysis.PageRankImpl();
-   
-    String[] edges = getWikiExampleEdges();
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    // Without dangling node handling we will not get the true page rank since the total rank will
-    // not add to 1.0.  Without dangling node handling some of the page rank drains out of the graph.
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    String[] expectedRanks = getWikiExampleExpectedRanks();
-    
-    Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-    
-    validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
-  }
-  
-  @Test
-  public void wikipediaGraphDiskCacheTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting wikipediaGraphDiskCacheTest");
-    
-    datafu.pig.linkanalysis.PageRankImpl graph = new datafu.pig.linkanalysis.PageRankImpl();
-    
-    String[] edges = getWikiExampleEdges();
-    
-    graph.enableEdgeDiskCaching();
-    graph.setEdgeCachingThreshold(5);
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    assert graph.isUsingEdgeDiskCache() : "Expected disk cache to be used";
-    
-    // Without dangling node handling we will not get the true page rank since the total rank will
-    // not add to 1.0.  Without dangling node handling some of the page rank drains out of the graph.
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    String[] expectedRanks = getWikiExampleExpectedRanks();
-    
-    Map<String,Float> expectedRanksMap = parseExpectedRanks(expectedRanks);
-    
-    validateExpectedRanks(graph, nodeIdsMap, expectedRanksMap);
-  }
-  
-  @Test(groups="perf")
-  public void hubAndSpokeInMemoryTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting hubAndSpokeInMemoryTest");
-    
-    datafu.pig.linkanalysis.PageRankImpl graph = new datafu.pig.linkanalysis.PageRankImpl();
-   
-    String[] edges = getHubAndSpokeEdges();
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    // no need to validate, this is just a perf test for runtime comparison
-  }
-  
-  @Test(groups="perf")
-  public void hubAndSpokeDiskCacheTest() throws Exception {
-    System.out.println();
-    System.out.println("Starting hubAndSpokeDiskCacheTest");
-    
-    datafu.pig.linkanalysis.PageRankImpl graph = new datafu.pig.linkanalysis.PageRankImpl();
-   
-    String[] edges = getHubAndSpokeEdges();
-    
-    graph.enableEdgeDiskCaching();
-    graph.setEdgeCachingThreshold(5);
-    
-    Map<String,Integer> nodeIdsMap = loadGraphFromEdgeList(graph, edges);
-    
-    graph.enableDanglingNodeHandling();
-    
-    performIterations(graph, 150, 1e-18f);
-    
-    // no need to validate, this is just a perf test for runtime comparison
-  }
-  
-  private String[] getHubAndSpokeEdges()
-  {
-    int count = 50000;
-    String[] edges = new String[count];
-    
-    for (int i=0; i<count; i++)
-    {
-      edges[i] = String.format("S%d H", i);
-    }
-    return edges;
-  }
-  
-  public static String[] getWikiExampleEdges()
-  {
-    // graph taken from:
-    // http://en.wikipedia.org/wiki/PageRank
-    String[] edges = {
-        "B C",
-        "C B",
-        "D A",
-        "D B",
-        "E D",
-        "E B",
-        "E F",
-        "F E",
-        "F B",
-        "P1 B",
-        "P1 E",
-        "P2 B",
-        "P2 E",
-        "P3 B",
-        "P3 E",
-        "P4 E",
-        "P5 E"
-      };
-    return edges;
-  }
-  
-  public static String[] getWikiExampleExpectedRanks()
-  {
-    // these ranks come from the Wikipedia page:
-    // http://en.wikipedia.org/wiki/PageRank
-    String[] expectedRanks = {
-        "A 3.3",
-        "B 38.4",
-        "C 34.3",
-        "D 3.9",
-        "E 8.1",
-        "F 3.9",
-        "P1 1.6",
-        "P2 1.6",
-        "P3 1.6",
-        "P4 1.6",
-        "P5 1.6"      
-      };
-    return expectedRanks;
-  }
-  
-  private Map<String,Integer> loadGraphFromEdgeList(datafu.pig.linkanalysis.PageRankImpl graph, String[] edges) throws IOException
-  {
-    Map<Integer,ArrayList<Map<String,Object>>> nodeEdgesMap = new HashMap<Integer,ArrayList<Map<String,Object>>>();
-    Map<String,Integer> nodeIdsMap = new HashMap<String,Integer>();
-    
-    for (String edge : edges)
-    {
-      String[] parts = edge.split(" ");
-      assert parts.length == 2 : "Expected two parts";
-      
-      int sourceId = getOrCreateId(parts[0], nodeIdsMap);
-      int destId = getOrCreateId(parts[1], nodeIdsMap);
-      
-      Map<String,Object> edgeMap = new HashMap<String,Object>();
-      edgeMap.put("weight", 1.0);
-      edgeMap.put("dest", destId);
-      
-      ArrayList<Map<String,Object>> nodeEdges = null;
-      
-      if (nodeEdgesMap.containsKey(sourceId))
-      {
-        nodeEdges = nodeEdgesMap.get(sourceId);
-      }
-      else
-      {
-        nodeEdges = new ArrayList<Map<String,Object>>();
-        nodeEdgesMap.put(sourceId, nodeEdges);
-      }
-      
-      nodeEdges.add(edgeMap);
-    }
-    
-    for (Map.Entry<Integer, ArrayList<Map<String,Object>>> e : nodeEdgesMap.entrySet())
-    {
-      graph.addNode(e.getKey(), e.getValue());
-    }
-    
-    return nodeIdsMap;
-  }
-  
-  private void performIterations(datafu.pig.linkanalysis.PageRankImpl graph, int maxIters, float tolerance) throws IOException
-  {
-    System.out.println(String.format("Beginning iteration (maxIters = %d, tolerance=%e)", maxIters, tolerance));
-        
-    System.out.println("Initializing graph");
-    long startTime = System.nanoTime();
-    graph.init();
-    System.out.println(String.format("Done, took %f ms", (System.nanoTime() - startTime)/10.0e6));
-    
-    float totalDiff;
-    int iter = 0;
-    
-    System.out.println("Beginning iterations");
-    startTime = System.nanoTime();
-    do 
-    {
-      totalDiff = graph.nextIteration();
-      iter++;      
-    } while(iter < maxIters && totalDiff > tolerance);
-    System.out.println(String.format("Done, took %f ms", (System.nanoTime() - startTime)/10.0e6));
-  }
-  
-  private void validateExpectedRanks(datafu.pig.linkanalysis.PageRankImpl graph, Map<String,Integer> nodeIds, Map<String,Float> expectedRanks)
-  {
-    System.out.println("Validating page rank results");
-    
-    for (Map.Entry<String,Integer> e : nodeIds.entrySet())
-    {
-      float rank = graph.getNodeRank(e.getValue());
-      
-      float expectedRank = expectedRanks.get(e.getKey());
-      // require 0.1% accuracy
-      assert (Math.abs(expectedRank - rank*100.0f) < 0.1) : String.format("Did not get expected rank for %s", e.getKey());      
-    }
-    
-    System.out.println("All ranks match expected");
-  }
-  
-  public static Map<String,Float> parseExpectedRanks(String[] expectedRanks)
-  {
-    Map<String,Float> expectedRanksMap = new HashMap<String,Float>();
-    for (String expectedRankString : expectedRanks)
-    {
-      String[] parts = expectedRankString.split(" ");
-      assert parts.length == 2 : "Expected two parts";
-      String name = parts[0];
-      Float expectedRank = Float.parseFloat(parts[1]);
-      expectedRanksMap.put(name, expectedRank);
-    }
-    return expectedRanksMap;
-  }
-
-  private Integer getOrCreateId(String name, Map<String,Integer> nodeIds)
-  {
-    if (nodeIds.containsKey(name))
-    {
-      return nodeIds.get(name);
-    }
-    else
-    {
-      Integer id = nodeIds.size();
-      nodeIds.put(name, id);
-      return id;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/linkanalysis/PageRankTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/linkanalysis/PageRankTests.java b/test/pig/datafu/test/pig/linkanalysis/PageRankTests.java
deleted file mode 100644
index 371d2cc..0000000
--- a/test/pig/datafu/test/pig/linkanalysis/PageRankTests.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.linkanalysis;
-
-
-import static org.testng.Assert.*;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-
-import datafu.test.pig.linkanalysis.PageRankImplTests;
-import datafu.test.pig.PigTests;
-
-public class PageRankTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  -- Need to enable dangling node handling since the Wikipedia example has them,
-  -- otherwise the ranks won't be right. 
-  define PageRank datafu.pig.linkanalysis.PageRank('dangling_nodes','true');
-  
-  data = LOAD 'input' AS (topic:INT,source:INT,dest:INT,weight:DOUBLE);
-  
-  data_grouped = GROUP data by (topic,source);
-  
-  data_grouped = foreach data_grouped {
-    generate group.topic as topic, group.source as source, data.(dest,weight) as edges;
-  };
-  
-  data_grouped2 = GROUP data_grouped by topic;
-  data_grouped2 = foreach data_grouped2 {
-    generate group as topic, FLATTEN(PageRank(data_grouped.(source,edges))) as (source,rnk);
-  };
-  
-  data_grouped3 = FOREACH data_grouped2 GENERATE
-    topic,
-    source,
-    rnk;
-    
-  STORE data_grouped3 INTO 'output';
-
-
-   */
-  @Multiline private String pageRankTest;
-  
-  @Test
-  public void pigPageRankTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(pageRankTest);
-
-    String[] edges = PageRankImplTests.getWikiExampleEdges();
-
-    Map<String,Integer> nodeIds = new HashMap<String,Integer>();
-    Map<Integer,String> nodeIdsReversed = new HashMap<Integer,String>();
-    Map<String,Float> expectedRanks = PageRankImplTests.parseExpectedRanks(PageRankImplTests.getWikiExampleExpectedRanks());
-
-    File f = new File(System.getProperty("user.dir"), "input").getAbsoluteFile();
-    if (f.exists())
-    {
-      f.delete();
-    }
-
-    FileWriter writer = new FileWriter(f);
-    BufferedWriter bufferedWriter = new BufferedWriter(writer);
-
-    for (String edge : edges)
-    {
-      String[] edgeParts = edge.split(" ");
-      String source = edgeParts[0];
-      String dest = edgeParts[1];
-      if (!nodeIds.containsKey(source))
-      {
-        int id = nodeIds.size();
-        nodeIds.put(source,id);
-        nodeIdsReversed.put(id, source);
-      }
-      if (!nodeIds.containsKey(dest))
-      {
-        int id = nodeIds.size();
-        nodeIds.put(dest,id);
-        nodeIdsReversed.put(id, dest);
-      }
-      Integer sourceId = nodeIds.get(source);
-      Integer destId = nodeIds.get(dest);
-
-      StringBuffer sb = new StringBuffer();
-
-      sb.append("1\t"); // topic
-      sb.append(sourceId.toString() + "\t");
-      sb.append(destId.toString() + "\t");
-      sb.append("1.0\n"); // weight
-
-      bufferedWriter.write(sb.toString());
-    }
-
-    bufferedWriter.close();
-
-    test.runScript();
-    Iterator<Tuple> tuples = test.getAlias("data_grouped3");
-
-    System.out.println("Final node ranks:");
-    int nodeCount = 0;
-    while (tuples.hasNext())
-    {
-      Tuple nodeTuple = tuples.next();
-
-      Integer topic = (Integer)nodeTuple.get(0);
-      Integer nodeId = (Integer)nodeTuple.get(1);
-      Float nodeRank = (Float)nodeTuple.get(2);
-
-      assertEquals(1, topic.intValue());
-
-      System.out.println(String.format("%d => %f", nodeId, nodeRank));
-
-      Float expectedNodeRank = expectedRanks.get(nodeIdsReversed.get(nodeId));
-
-      assertTrue(Math.abs(expectedNodeRank - nodeRank * 100.0f) < 0.1,
-                 String.format("expected: %f, actual: %f", expectedNodeRank, nodeRank));
-
-      nodeCount++;
-    }
-
-    assertEquals(nodeIds.size(),nodeCount);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/random/NumberTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/random/NumberTests.java b/test/pig/datafu/test/pig/random/NumberTests.java
deleted file mode 100644
index 867c27c..0000000
--- a/test/pig/datafu/test/pig/random/NumberTests.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.random;
-
-import static org.testng.Assert.*;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import datafu.test.pig.PigTests;
-
-public class NumberTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  define RandInt datafu.pig.random.RandInt();
-  
-  data = LOAD 'input' AS (key:INT);
-  data2 = FOREACH data GENERATE key, RandInt($MIN,$MAX) as val;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline private String randomIntRangeTest;
-  
-  /**
-   * Test the RandomIntRange UDF.  The main purpose is to make sure it can be used in a Pig script.
-   * Also the range of output values is tested.
-   * 
-   * @throws Exception
-   */
-  @Test
-  public void randomIntRangeTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(randomIntRangeTest,
-                                 "MIN=1", "MAX=10");
-        
-    List<String> input = new ArrayList<String>();
-    for (int i=0; i<100; i++)
-    {
-      input.add(String.format("(%d)", i));
-    }
-    
-    writeLinesToFile("input", 
-                     input.toArray(new String[0]));
-            
-    test.runScript();
-        
-    List<Tuple> tuples = getLinesForAlias(test, "data2", false);
-    for (Tuple tuple : tuples)
-    {
-      Integer randValue = (Integer)tuple.get(1);
-      assertTrue(randValue >= 1);
-      assertTrue(randValue <= 10);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/random/UUIDTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/random/UUIDTests.java b/test/pig/datafu/test/pig/random/UUIDTests.java
deleted file mode 100644
index e199760..0000000
--- a/test/pig/datafu/test/pig/random/UUIDTests.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package datafu.test.pig.random;
-
-import datafu.test.pig.PigTests;
-import junit.framework.Assert;
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import java.util.*;
-
-import static org.testng.Assert.assertTrue;
-
-public class UUIDTests extends PigTests
-{
-    /**
-     register $JAR_PATH
-
-     define RandomUUID datafu.pig.random.RandomUUID();
-
-     data = LOAD 'input' AS (key: chararray);
-     DUMP data
-
-     data2 = FOREACH data GENERATE key, RandomUUID() as val;
-     DUMP data2
-
-     STORE data2 INTO 'output';
-     */
-    @Multiline private String randomUUIDTest;
-
-    /**
-     * Test the RandomUUID UDF.  The main purpose is to make sure it can be used in a Pig script.
-     * Also the range of length of values is tested.
-     *
-     * @throws Exception
-     */
-    @Test
-    public void randomUUIDTest() throws Exception
-    {
-        PigTest test = createPigTestFromString(randomUUIDTest);
-
-        writeLinesToFile("input",
-                "input1",
-                "input2",
-                "input3");
-
-        List<Tuple> tuples = getLinesForAlias(test, "data2", true);
-        Set<UUID> set = new HashSet<UUID>();
-        for (Tuple tuple : tuples)
-        {
-            set.add(UUID.fromString((String)tuple.get(1)));
-        }
-        Assert.assertEquals(set.size(), 3);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/778bef1e/test/pig/datafu/test/pig/sampling/SamplingTests.java
----------------------------------------------------------------------
diff --git a/test/pig/datafu/test/pig/sampling/SamplingTests.java b/test/pig/datafu/test/pig/sampling/SamplingTests.java
deleted file mode 100644
index 9209133..0000000
--- a/test/pig/datafu/test/pig/sampling/SamplingTests.java
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package datafu.test.pig.sampling;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import junit.framework.Assert;
-
-import org.adrianwalker.multilinestring.Multiline;
-import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.data.TupleFactory;
-import org.apache.pig.pigunit.PigTest;
-import org.testng.annotations.Test;
-
-import datafu.pig.sampling.ReservoirSample;
-import datafu.pig.sampling.SampleByKey;
-import datafu.pig.sampling.WeightedSample;
-import datafu.test.pig.PigTests;
-
-
-public class SamplingTests extends PigTests
-{
-  /**
-  register $JAR_PATH
-
-  define WeightedSample datafu.pig.sampling.WeightedSample('1');
-  
-  data = LOAD 'input' AS (A: bag {T: tuple(v1:chararray,v2:INT)});
-  
-  data2 = FOREACH data GENERATE WeightedSample(A,1);
-  --describe data2;
-  
-  STORE data2 INTO 'output';
-
-   */
-  @Multiline
-  private String weightedSampleTest;
-  
-  @Test
-  public void weightedSampleTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(weightedSampleTest);
-
-    writeLinesToFile("input", 
-                     "({(a, 100),(b, 1),(c, 5),(d, 2)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-        "({(a,100),(c,5),(b,1),(d,2)})");
-  }
-  
-  /**
-  register $JAR_PATH
-
-  define WeightedSample datafu.pig.sampling.WeightedSample('1');
-  
-  data = LOAD 'input' AS (A: bag {T: tuple(v1:chararray,v2:INT)});
-  
-  data2 = FOREACH data GENERATE WeightedSample(A,1,3);
-  --describe data2;
-  
-  STORE data2 INTO 'output';
-   */
-  @Multiline
-  private String weightedSampleLimitTest;
-  
-  @Test
-  public void weightedSampleLimitTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(weightedSampleLimitTest);
-
-    writeLinesToFile("input", 
-                     "({(a, 100),(b, 1),(c, 5),(d, 2)})");
-                  
-    test.runScript();
-            
-    assertOutput(test, "data2",
-        "({(a,100),(c,5),(b,1)})");
-  }
-  
-  @Test
-  public void weightedSampleLimitExecTest() throws IOException
-  {
-    WeightedSample sampler = new WeightedSample();
-    
-    DataBag bag = BagFactory.getInstance().newDefaultBag();
-    for (int i=0; i<100; i++)
-    {
-      Tuple t = TupleFactory.getInstance().newTuple(2);
-      t.set(0, i);
-      t.set(1, 1); // score is equal for all
-      bag.add(t);
-    }
-    
-    Tuple input = TupleFactory.getInstance().newTuple(3);
-    input.set(0, bag);
-    input.set(1, 1); // use index 1 for score
-    input.set(2, 10); // get 10 items
-    
-    DataBag result = sampler.exec(input);
-    
-    Assert.assertEquals(10, result.size());
-    
-    // all must be found, no repeats
-    Set<Integer> found = new HashSet<Integer>();
-    for (Tuple t : result)
-    {
-      Integer i = (Integer)t.get(0);
-      System.out.println(i);
-      Assert.assertTrue(i>=0 && i<100);
-      Assert.assertFalse(String.format("Found duplicate of %d",i), found.contains(i));
-      found.add(i);
-    }
-  }
-  
-  /**
-  register $JAR_PATH
-  
-  DEFINE SampleByKey datafu.pig.sampling.SampleByKey('0.5', 'salt2.5');
-  
-  data = LOAD 'input' AS (A_id:chararray, B_id:chararray, C:int);
-  sampled = FILTER data BY SampleByKey(A_id);
-  
-  STORE sampled INTO 'output';
-
-   */
-  @Multiline
-  private String sampleByKeyTest;
-  
-  @Test
-  public void sampleByKeyTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(sampleByKeyTest);
-    
-    writeLinesToFile("input",
-                     "A1\tB1\t1","A1\tB1\t4","A1\tB3\t4","A1\tB4\t4",
-                     "A2\tB1\t4","A2\tB2\t4",
-                     "A3\tB1\t3","A3\tB1\t1","A3\tB3\t77",
-                     "A4\tB1\t3","A4\tB2\t3","A4\tB3\t59","A4\tB4\t29",
-                     "A5\tB1\t4",
-                     "A6\tB2\t3","A6\tB2\t55","A6\tB3\t1",
-                     "A7\tB1\t39","A7\tB2\t27","A7\tB3\t85",
-                     "A8\tB1\t4","A8\tB2\t45",
-                     "A9\tB3\t92", "A9\tB1\t42","A9\tB2\t1","A9\tB3\t0",
-                     "A10\tB1\t7","A10\tB2\t23","A10\tB3\t1","A10\tB4\t41","A10\tB5\t52");
-    
-    test.runScript();
-    assertOutput(test, "sampled", 
-                 "(A4,B1,3)","(A4,B2,3)","(A4,B3,59)","(A4,B4,29)",
-                 "(A5,B1,4)",
-                 "(A6,B2,3)","(A6,B2,55)","(A6,B3,1)",
-                 "(A7,B1,39)","(A7,B2,27)","(A7,B3,85)",
-                 "(A10,B1,7)","(A10,B2,23)","(A10,B3,1)","(A10,B4,41)","(A10,B5,52)");
-  }
-
-  /**
-  register $JAR_PATH
-  
-  DEFINE SampleByKey datafu.pig.sampling.SampleByKey('0.5', 'salt2.5');
-  
-  data = LOAD 'input' AS (A_id:chararray, B_id:chararray, C:int);
-  sampled = FILTER data BY SampleByKey(A_id, B_id);
-  
-  STORE sampled INTO 'output';
-
-   */
-  @Multiline
-  private String sampleByKeyMultipleKeyTest;
-  
-  @Test
-  public void sampleByKeyMultipleKeyTest() throws Exception
-  {
-    PigTest test = createPigTestFromString(sampleByKeyMultipleKeyTest);
-    
-    writeLinesToFile("input",
-                     "A1\tB1\t1","A1\tB1\t4",
-                     "A1\tB3\t4",
-                     "A1\tB4\t4",
-                     "A2\tB1\t4",
-                     "A2\tB2\t4",
-                     "A3\tB1\t3","A3\tB1\t1",
-                     "A3\tB3\t77",
-                     "A4\tB1\t3",
-                     "A4\tB2\t3",
-                     "A4\tB3\t59",
-                     "A4\tB4\t29",
-                     "A5\tB1\t4",
-                     "A6\tB2\t3","A6\tB2\t55",
-                     "A6\tB3\t1",
-                     "A7\tB1\t39",
-                     "A7\tB2\t27",
-                     "A7\tB3\t85",
-                     "A8\tB1\t4",
-                     "A8\tB2\t45",
-                     "A9\tB3\t92","A9\tB3\t0",
-                     "A9\tB6\t42","A9\tB5\t1",
-                     "A10\tB1\t7",
-                     "A10\tB2\t23","A10\tB2\t1","A10\tB2\t31",
-                     "A10\tB6\t41",
-                     "A10\tB7\t52");
-    test.runScript();
-    assertOutput(test, "sampled", 
-                 "(A1,B1,1)","(A1,B1,4)",
-                 "(A1,B4,4)",
-                 "(A2,B1,4)",
-                 "(A2,B2,4)",
-                 "(A3,B1,3)","(A3,B1,1)",
-                 "(A4,B4,29)",
-                 "(A5,B1,4)",
-                 "(A6,B3,1)",
-                 "(A7,B1,39)",
-                 "(A8,B1,4)",
-                 "(A9,B3,92)","(A9,B3,0)",
-                 "(A10,B2,23)","(A10,B2,1)","(A10,B2,31)"
-                 );
-                   
-  }
-  
-  @Test
-  public void sampleByKeyExecTest() throws Exception
-  {
-    SampleByKey sampler = new SampleByKey("0.10", "thesalt");
-    
-    Map<Integer,Integer> valuesPerKey = new HashMap<Integer,Integer>();
-    
-    // 10,000 keys total
-    for (int i=0; i<10000; i++)
-    {
-      // 5 values per key
-      for (int j=0; j<5; j++)
-      {
-        Tuple t = TupleFactory.getInstance().newTuple(1);
-        t.set(0, i);
-        if (sampler.exec(t))
-        {          
-          if (valuesPerKey.containsKey(i))
-          {
-            valuesPerKey.put(i, valuesPerKey.get(i)+1);
-          }
-          else
-          {
-            valuesPerKey.put(i, 1);
-          }
-        }
-      }
-    }
-    
-    // 10% sample, so should have roughly 1000 keys
-    Assert.assertTrue(Math.abs(1000-valuesPerKey.size()) < 50);
-    
-    // every value should be present for the same key
-    for (Map.Entry<Integer, Integer> pair : valuesPerKey.entrySet())
-    {
-      Assert.assertEquals(5, pair.getValue().intValue());
-    }
-  }
-  
-  /**
-  register $JAR_PATH
-
-  DEFINE ReservoirSample datafu.pig.sampling.ReservoirSample('$RESERVOIR_SIZE');
-  
-  data = LOAD 'input' AS (A_id:chararray, B_id:chararray, C:int);
-  sampled = FOREACH (GROUP data ALL) GENERATE ReservoirSample(data) as sample_data;
-  sampled = FOREACH sampled GENERATE COUNT(sample_data) AS sample_count;
-  STORE sampled INTO 'output';
-
-   */
-  @Multiline
-  private String reservoirSampleTest;
-  
-  @Test
-  public void reservoirSampleTest() throws Exception
-  {
-    
-    writeLinesToFile("input",
-                     "A1\tB1\t1",
-                     "A1\tB1\t4",
-                     "A1\tB3\t4",
-                     "A1\tB4\t4",
-                     "A2\tB1\t4",
-                     "A2\tB2\t4",
-                     "A3\tB1\t3",
-                     "A3\tB1\t1",
-                     "A3\tB3\t77",
-                     "A4\tB1\t3",
-                     "A4\tB2\t3",
-                     "A4\tB3\t59",
-                     "A4\tB4\t29",
-                     "A5\tB1\t4",
-                     "A6\tB2\t3",
-                     "A6\tB2\t55",
-                     "A6\tB3\t1",
-                     "A7\tB1\t39",
-                     "A7\tB2\t27",
-                     "A7\tB3\t85",
-                     "A8\tB1\t4",
-                     "A8\tB2\t45",
-                     "A9\tB3\t92",
-                     "A9\tB3\t0",
-                     "A9\tB6\t42",
-                     "A9\tB5\t1",
-                     "A10\tB1\t7",
-                     "A10\tB2\t23",
-                     "A10\tB2\t1",
-                     "A10\tB2\t31",
-                     "A10\tB6\t41",
-                     "A10\tB7\t52");
-   
-    for(int i=10; i<=30; i=i+10){
-      int reservoirSize = i ;
-      PigTest test = createPigTestFromString(reservoirSampleTest, "RESERVOIR_SIZE="+reservoirSize);
-      test.runScript();
-      assertOutput(test, "sampled", "("+reservoirSize+")");
-    }
-  }
-  
-  /**
-  register $JAR_PATH
-
-  DEFINE ReservoirSample datafu.pig.sampling.ReservoirSample('$RESERVOIR_SIZE');
-  DEFINE Assert datafu.pig.util.Assert();
-  
-  data = LOAD 'input' AS (A_id:int, B_id:chararray, C:int);
-  sampled = FOREACH (GROUP data BY A_id) GENERATE group as A_id, ReservoirSample(data.(B_id,C)) as sample_data;
-  sampled = FILTER sampled BY Assert((SIZE(sample_data) <= $RESERVOIR_SIZE ? 1 : 0), 'must be <= $RESERVOIR_SIZE');
-  sampled = FOREACH sampled GENERATE A_id, FLATTEN(sample_data);
-  STORE sampled INTO 'output';
-
-   */
-  @Multiline
-  private String reservoirSampleGroupTest;
-  
-  /**
-   * Verifies that ReservoirSample works when data grouped by a key.
-   * In particular it ensures that the reservoir is not reused across keys.
-   * 
-   * <p>
-   * This confirms the fix for DATAFU-11.
-   * </p>
-   * 
-   * @throws Exception
-   */
-  @Test
-  public void reservoirSampleGroupTest() throws Exception
-  {    
-    // first value is the key.  last value matches the key so we can
-    // verify the register is reset for each key.  values should not
-    // bleed across to other keys.
-    writeLinesToFile("input",
-                     "1\tB1\t1",
-                     "1\tB1\t1",
-                     "1\tB3\t1",
-                     "1\tB4\t1",
-                     "2\tB1\t2",
-                     "2\tB2\t2",
-                     "3\tB1\t3",
-                     "3\tB1\t3",
-                     "3\tB3\t3",
-                     "4\tB1\t4",
-                     "4\tB2\t4",
-                     "4\tB3\t4",
-                     "4\tB4\t4",
-                     "5\tB1\t5",
-                     "6\tB2\t6",
-                     "6\tB2\t6",
-                     "6\tB3\t6",
-                     "7\tB1\t7",
-                     "7\tB2\t7",
-                     "7\tB3\t7",
-                     "8\tB1\t8",
-                     "8\tB2\t8",
-                     "9\tB3\t9",
-                     "9\tB3\t9",
-                     "9\tB6\t9",
-                     "9\tB5\t9",
-                     "10\tB1\t10",
-                     "10\tB2\t10",
-                     "10\tB2\t10",
-                     "10\tB2\t10",
-                     "10\tB6\t10",
-                     "10\tB7\t10");
-   
-    for(int i=1; i<=3; i++) {
-      int reservoirSize = i ;
-      PigTest test = createPigTestFromString(reservoirSampleGroupTest, "RESERVOIR_SIZE="+reservoirSize);
-      test.runScript();
-      
-      List<Tuple> tuples = getLinesForAlias(test, "sampled");
-      
-      for (Tuple tuple : tuples)
-      {
-        Assert.assertEquals(((Number)tuple.get(0)).intValue(), ((Number)tuple.get(2)).intValue());
-      }
-    }
-  }
-  
-  @Test
-  public void reservoirSampleExecTest() throws IOException
-  {
-    ReservoirSample sampler = new ReservoirSample("10");
-    
-    DataBag bag = BagFactory.getInstance().newDefaultBag();
-    for (int i=0; i<100; i++)
-    {
-      Tuple t = TupleFactory.getInstance().newTuple(1);
-      t.set(0, i);
-      bag.add(t);
-    }
-    
-    Tuple input = TupleFactory.getInstance().newTuple(bag);
-    
-    DataBag result = sampler.exec(input);
-    
-    Assert.assertEquals(10, result.size());
-    
-    // all must be found, no repeats
-    Set<Integer> found = new HashSet<Integer>();
-    for (Tuple t : result)
-    {
-      Integer i = (Integer)t.get(0);
-      System.out.println(i);
-      Assert.assertTrue(i>=0 && i<100);
-      Assert.assertFalse(String.format("Found duplicate of %d",i), found.contains(i));
-      found.add(i);
-    }
-  }
-  
-  @Test
-  public void reservoirSampleAccumulateTest() throws IOException
-  {
-    ReservoirSample sampler = new ReservoirSample("10");
-    
-    for (int i=0; i<100; i++)
-    {
-      Tuple t = TupleFactory.getInstance().newTuple(1);
-      t.set(0, i);
-      DataBag bag = BagFactory.getInstance().newDefaultBag();
-      bag.add(t);
-      Tuple input = TupleFactory.getInstance().newTuple(bag);
-      sampler.accumulate(input);
-    }
-        
-    DataBag result = sampler.getValue();
-    
-    Assert.assertEquals(10, result.size());
-    
-    // all must be found, no repeats
-    Set<Integer> found = new HashSet<Integer>();
-    for (Tuple t : result)
-    {
-      Integer i = (Integer)t.get(0);
-      System.out.println(i);
-      Assert.assertTrue(i>=0 && i<100);
-      Assert.assertFalse(String.format("Found duplicate of %d",i), found.contains(i));
-      found.add(i);
-    }
-  }
-  
-  @Test
-  public void reservoirSampleAlgebraicTest() throws IOException
-  {
-    ReservoirSample.Initial initialSampler = new ReservoirSample.Initial("10");
-    ReservoirSample.Intermediate intermediateSampler = new ReservoirSample.Intermediate("10");
-    ReservoirSample.Final finalSampler = new ReservoirSample.Final("10");
-    
-    DataBag bag = BagFactory.getInstance().newDefaultBag();
-    for (int i=0; i<100; i++)
-    {
-      Tuple t = TupleFactory.getInstance().newTuple(1);
-      t.set(0, i);
-      bag.add(t);
-    }
-    
-    Tuple input = TupleFactory.getInstance().newTuple(bag);
-    
-    Tuple intermediateTuple = initialSampler.exec(input);  
-    DataBag intermediateBag = BagFactory.getInstance().newDefaultBag(Arrays.asList(intermediateTuple));
-    intermediateTuple = intermediateSampler.exec(TupleFactory.getInstance().newTuple(intermediateBag));  
-    intermediateBag = BagFactory.getInstance().newDefaultBag(Arrays.asList(intermediateTuple));
-    DataBag result = finalSampler.exec(TupleFactory.getInstance().newTuple(intermediateBag));
-    
-    Assert.assertEquals(10, result.size());
-    
-    // all must be found, no repeats
-    Set<Integer> found = new HashSet<Integer>();
-    for (Tuple t : result)
-    {
-      Integer i = (Integer)t.get(0);
-      System.out.println(i);
-      Assert.assertTrue(i>=0 && i<100);
-      Assert.assertFalse(String.format("Found duplicate of %d",i), found.contains(i));
-      found.add(i);
-    }
-  }
-}


Mime
View raw message