datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mha...@apache.org
Subject [3/3] git commit: DATAFU-57 Source tarball generation and gradle bootstrapping
Date Tue, 05 Aug 2014 03:50:47 GMT
DATAFU-57 Source tarball generation and gradle bootstrapping

https://issues.apache.org/jira/browse/DATAFU-57

Note: This includes a lot of Gradle build code taken from Apache Samza.

Added the Gradle bootstrap task to be used when building from the source release.  This is from SAMZA-283.

Created a sourceRelease task to build the source tarball.  There is also a signSourceRelease task to build a signed version.
The source release does not include the gradle jar.  This is why the bootstrap task is necessary.

Created RELEASE.md for release instructions on how to create the source tarball.

Did some preparation for a binary release.  The artifacts datafu-pig and datafu-hourglass can be installed to the local maven repo using the 'install' task.

I also fixed all the javadoc issues, which caused build errors when I added the nexus plugin.  These were numerous.

Add org.gradle.jvmargs to gradle.properties


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/0f9b853b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/0f9b853b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/0f9b853b

Branch: refs/heads/master
Commit: 0f9b853be2efd7176091c433bc08180aa81bc453
Parents: db3838a
Author: Matthew Hayes <matthew.terence.hayes@gmail.com>
Authored: Sat Aug 2 10:05:34 2014 -0700
Committer: Matthew Hayes <matthew.terence.hayes@gmail.com>
Committed: Mon Aug 4 20:49:31 2014 -0700

----------------------------------------------------------------------
 README.md                                       |  57 +++++------
 RELEASE.md                                      |  56 ++++++++++
 bootstrap.gradle                                |  31 ++++++
 build-plugin/build.gradle                       |  11 +-
 build.gradle                                    |   3 +-
 datafu-hourglass/build.gradle                   | 102 +++++++------------
 .../hourglass/avro/AvroDateRangeMetadata.java   |   2 +-
 .../avro/AvroKeyWithMetadataOutputFormat.java   |   3 +
 .../avro/AvroKeyWithMetadataRecordWriter.java   |   1 +
 .../java/datafu/hourglass/fs/PathUtils.java     |  35 +++----
 .../java/datafu/hourglass/jobs/AbstractJob.java |  15 ++-
 .../jobs/AbstractNonIncrementalJob.java         |   8 +-
 ...stractPartitionCollapsingIncrementalJob.java |   2 +
 ...stractPartitionPreservingIncrementalJob.java |   8 +-
 .../datafu/hourglass/jobs/ExecutionPlanner.java |   6 +-
 .../java/datafu/hourglass/jobs/FileCleaner.java |   6 +-
 .../PartitionCollapsingExecutionPlanner.java    |   2 +-
 .../jobs/PartitionCollapsingIncrementalJob.java |   6 +-
 .../PartitionPreservingExecutionPlanner.java    |   2 +-
 .../jobs/PartitionPreservingIncrementalJob.java |   4 +-
 .../datafu/hourglass/jobs/StagedOutputJob.java  |   2 +-
 .../hourglass/mapreduce/CollapsingCombiner.java |   4 +-
 .../hourglass/mapreduce/CollapsingMapper.java   |  12 +--
 .../hourglass/mapreduce/CollapsingReducer.java  |   4 +-
 .../mapreduce/DistributedCacheHelper.java       |   4 +-
 .../hourglass/mapreduce/PartitioningMapper.java |   8 +-
 .../mapreduce/PartitioningReducer.java          |   4 +-
 .../hourglass/model/KeyValueCollector.java      |   6 +-
 .../java/datafu/hourglass/model/Mapper.java     |   4 +-
 datafu-pig/build.gradle                         |  61 +++--------
 .../src/main/java/datafu/pig/bags/BagGroup.java |   5 +-
 .../main/java/datafu/pig/bags/CountEach.java    |   4 +-
 .../main/java/datafu/pig/bags/DistinctBy.java   |   2 -
 .../main/java/datafu/pig/bags/Enumerate.java    |   8 +-
 .../main/java/datafu/pig/bags/PrependToBag.java |  12 +--
 .../java/datafu/pig/bags/ReverseEnumerate.java  |  13 ++-
 .../java/datafu/pig/bags/UnorderedPairs.java    |  10 +-
 .../datafu/pig/geo/HaversineDistInMiles.java    |   5 +-
 .../datafu/pig/hash/lsh/CosineDistanceHash.java |   6 +-
 .../java/datafu/pig/hash/lsh/L1PStableHash.java |   6 +-
 .../java/datafu/pig/hash/lsh/L2PStableHash.java |   6 +-
 .../java/datafu/pig/hash/lsh/LSHFamily.java     |   2 +-
 .../pig/hash/lsh/cosine/HyperplaneLSH.java      |  39 ++++---
 .../pig/hash/lsh/cosine/package-info.java       |   4 +-
 .../datafu/pig/hash/lsh/interfaces/LSH.java     |  29 +++---
 .../pig/hash/lsh/interfaces/LSHCreator.java     |   2 +-
 .../datafu/pig/hash/lsh/interfaces/Sampler.java |   4 +-
 .../pig/hash/lsh/interfaces/package-info.java   |   2 +-
 .../java/datafu/pig/hash/lsh/metric/Cosine.java |  14 +--
 .../java/datafu/pig/hash/lsh/metric/L1.java     |  18 ++--
 .../java/datafu/pig/hash/lsh/metric/L2.java     |  12 ++-
 .../datafu/pig/hash/lsh/metric/MetricUDF.java   |  44 ++++----
 .../pig/hash/lsh/metric/package-info.java       |   2 +-
 .../AbstractStableDistributionFunction.java     |  28 ++---
 .../datafu/pig/hash/lsh/p_stable/L1LSH.java     |  20 ++--
 .../datafu/pig/hash/lsh/p_stable/L2LSH.java     |  11 +-
 .../pig/hash/lsh/p_stable/package-info.java     |   4 +-
 .../java/datafu/pig/hash/lsh/package-info.java  |   2 +-
 .../datafu/pig/hash/lsh/util/DataTypeUtil.java  |   4 +-
 .../java/datafu/pig/linkanalysis/PageRank.java  |  50 +++++----
 .../datafu/pig/linkanalysis/PageRankImpl.java   |   7 +-
 .../main/java/datafu/pig/random/RandInt.java    |   5 +-
 .../java/datafu/pig/sampling/SampleByKey.java   |  18 ++--
 .../datafu/pig/sampling/SimpleRandomSample.java |  53 ++++++----
 .../SimpleRandomSampleWithReplacementVote.java  |  70 ++++++++-----
 .../pig/sampling/WeightedReservoirSample.java   |  14 ++-
 .../java/datafu/pig/sessions/Sessionize.java    |   8 +-
 .../java/datafu/pig/sets/SetOperationsBase.java |   3 -
 .../src/main/java/datafu/pig/stats/Median.java  |   8 +-
 .../main/java/datafu/pig/stats/Quantile.java    |   5 +-
 .../java/datafu/pig/stats/QuantileUtil.java     |   3 -
 .../java/datafu/pig/stats/StreamingMedian.java  |   8 +-
 .../datafu/pig/stats/StreamingQuantile.java     |  16 ++-
 .../src/main/java/datafu/pig/stats/VAR.java     |  12 +--
 .../java/datafu/pig/stats/WilsonBinConf.java    |  11 +-
 .../datafu/pig/stats/entropy/CondEntropy.java   |  28 ++---
 .../stats/entropy/EmpiricalCountEntropy.java    |  52 +++++-----
 .../java/datafu/pig/stats/entropy/Entropy.java  |  23 +++--
 .../java/datafu/pig/util/AliasableEvalFunc.java |  22 ++--
 .../main/java/datafu/pig/util/AssertUDF.java    |  10 +-
 .../src/main/java/datafu/pig/util/Coalesce.java |   9 +-
 .../datafu/pig/util/ContextualEvalFunc.java     |   4 +-
 .../java/datafu/pig/util/DataFuException.java   |   6 +-
 .../src/main/java/datafu/pig/util/InUDF.java    |  18 ++--
 .../datafu/pig/util/TransposeTupleToBag.java    |   7 +-
 .../datafu/test/pig/hash/lsh/LSHPigTest.java    |  18 ++++
 .../java/datafu/test/pig/hash/lsh/LSHTest.java  |  18 ++++
 .../java/datafu/test/pig/util/Base64Test.java   |  18 ++++
 gradle.properties                               |   4 +-
 gradle/buildscript.gradle                       |  26 ++++-
 gradle/dependency-versions.gradle               |  19 ++++
 gradle/release.gradle                           |  92 +++++++++++++++++
 92 files changed, 846 insertions(+), 606 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 1b3c821..8e1b67d 100644
--- a/README.md
+++ b/README.md
@@ -36,61 +36,52 @@ If you'd like to jump in and get started, check out the corresponding guides for
 
 ## Getting Help
 
-Bugs and feature requests can be filed [here](https://issues.apache.org/jira/browse/DATAFU).  For other help please see the [discussion group](http://groups.google.com/group/datafu).
+Bugs and feature requests can be filed [here](https://issues.apache.org/jira/browse/DATAFU).  For other help please see the [website](http://datafu.incubator.apache.org/).
 
 ## Developers
 
-### DataFu Pig
+### Building the Code
 
-#### Building the Code
+To build DataFu from a git checkout or binary release, run:
 
-The Apache DataFu Pig library can be built by running the command below.  More information about working with the source
-code can be found in the [DataFu Pig Contributing Guide](http://datafu.incubator.apache.org/docs/datafu/contributing.html).
+    ./gradlew clean assemble
 
-```
-./gradlew assemble
-```
+To build DataFu from a source release, it is first necessary to download the gradle wrapper script above. This bootstrapping process requires Gradle to be installed on the source machine.  Gradle is available through most package managers or directly from [its website](http://www.gradle.org/).  To bootstrap the wrapper, run:
 
-The built JAR can be found under `datafu-pig/build/libs` by the name `datafu-pig-x.y.z.jar`, where x.y.z is the version.
+    gradle -b bootstrap.gradle
 
-#### Generating Eclipse Files
+After the bootstrap script has completed, the regular gradlew instructions are available.
+
+The datafu-pig JAR can be found under `datafu-pig/build/libs` by the name `datafu-pig-x.y.z.jar`, where x.y.z is the version.  Similarly, the datafu-hourglass can be found in the `datafu-hourglass/build/libs` directory.
+
+### Generating Eclipse Files
 
 This command generates the eclipse project and classpath files:
 
-```
-./gradlew eclipse
-```
+    ./gradlew eclipse
 
 To clean up the eclipse files:
 
-```
-./gradlew cleanEclipse
-```
+    ./gradlew cleanEclipse
 
-#### Running the Tests
+### Running the Tests
 
 To run all the tests:
 
-```
-./gradlew test
-```
+    ./gradlew test
 
-To run tests for a single class, use the `test.single` property.  For example, to run only the QuantileTests:
+To run only the DataFu Pig tests:
 
-```
-./gradlew :datafu-pig:test -Dtest.single=QuantileTests
-```
+    ./gradlew :datafu-pig:test
 
-The tests can also be run from within eclipse.  Note that you may run out of heap when executing tests in Eclipse. To fix this adjust your heap settings for the TestNG plugin. Go to Eclipse->Preferences. Select TestNG->Run/Debug. Add "-Xmx1G" to the JVM args.
+To run only the DataFu Hourglass tests:
 
-### DataFu Hourglass
+    ./gradlew :datafu-hourglass:test
 
-#### Building the Code
+To run tests for a single class, use the `test.single` property.  For example, to run only the QuantileTests:
+
+    ./gradlew :datafu-pig:test -Dtest.single=QuantileTests
+
+The tests can also be run from within eclipse.  Note that you may run out of heap when executing tests in Eclipse. To fix this adjust your heap settings for the TestNG plugin. Go to Eclipse->Preferences. Select TestNG->Run/Debug. Add "-Xmx1G" to the JVM args.
 
-The Apache DataFu Pig library can be built by running the commands below.  More information about working with the source
-code can be found in the [DataFu Hourglass Contributing Guide](http://datafu.incubator.apache.org/docs/hourglass/contributing.html).
 
-```
-cd contrib/hourglass
-ant jar
-```

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/RELEASE.md
----------------------------------------------------------------------
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 0000000..39fbba3
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,56 @@
+Auto-generate all missing headers in files:
+
+    ./gradlew licenseFormatMain
+    ./gradlew licenseFormatTest
+
+Check to see if any files were modified to add the missing headers.  If so be sure to commit the changes before proceeding.
+
+Build the code, run the tests, and perform any other required checks:
+
+    ./gradlew check
+
+Edit `$HOME/.gradle/gradle.properties` and add your GPG key information:
+
+    signing.keyId=01234567                          # Your GPG key ID, as 8 hex digits
+    signing.secretKeyRingFile=/path/to/secring.gpg  # Normally in $HOME/.gnupg/secring.gpg
+    signing.password=YourSuperSecretPassphrase      # Plaintext passphrase to decrypt key
+    nexusUsername=yourname                          # Your username on Apache's LDAP
+    nexusPassword=password                          # Your password on Apache's LDAP
+
+Putting your passwords there in plaintext is unfortunately unavoidable. The
+[nexus plugin](https://github.com/bmuschko/gradle-nexus-plugin) supports asking
+for them interactively, but unfortunately there's a
+[Gradle issue](http://issues.gradle.org/browse/GRADLE-2357) which prevents us
+from reading keyboard input (because we need `org.gradle.jvmargs` set).
+
+To release to a local Maven repository:
+
+    ./gradlew install
+
+You should be able to see all the installed artifacts in the local repository now:
+
+    find ~/.m2/repository/org/apache/datafu/
+
+To build a tarball suitable for an ASF source release (and its accompanying MD5 file):
+
+First, clean any non-checked-in files from git (this removes all such files without prompting):
+
+    git clean -fdx
+
+Alternatively, you can make a fresh clone of the repository to a separate directory:
+
+    git clone https://git-wip-us.apache.org/repos/asf/incubator-datafu.git datafu-release
+    cd datafu-release
+
+To build a signed tarball:
+
+    ./gradlew clean signSourceRelease
+
+Alternatively, to build the tarball without the signatures:
+
+    ./gradlew clean sourceRelease
+
+The tarball can also be signed manually:
+
+    gpg --sign --armor --detach-sig build/distribution/source/datafu-sources-*.tgz
+

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/bootstrap.gradle
----------------------------------------------------------------------
diff --git a/bootstrap.gradle b/bootstrap.gradle
new file mode 100644
index 0000000..8e32f7b
--- /dev/null
+++ b/bootstrap.gradle
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * This file allows a source release of DataFu, which is prohibited
+ * from containing jars, to be able to download the gradle wrapper
+ * and its accompanying jars.
+ */
+
+defaultTasks 'downloadWrapper'
+
+task downloadWrapper(type: Wrapper) {
+    description = "Download the gradle wrapper and requisite files. Overwrites existing wrapper files."
+    gradleVersion = project.gradleVersion
+}

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/build-plugin/build.gradle
----------------------------------------------------------------------
diff --git a/build-plugin/build.gradle b/build-plugin/build.gradle
index eab96e0..9addaba 100644
--- a/build-plugin/build.gradle
+++ b/build-plugin/build.gradle
@@ -31,4 +31,13 @@ cleanEclipse {
     delete ".factorypath"
     delete "bin"
   }
-}
\ No newline at end of file
+}
+
+// no need for source or javadoc jars to be published for build-plugin
+nexus {
+  attachSources = false
+  attachJavadoc = false
+}
+
+// don't publish the build plugin
+configurations.archives.artifacts.removeAll { return it.name == "build-plugin"; }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/build.gradle
----------------------------------------------------------------------
diff --git a/build.gradle b/build.gradle
index 9ed6eb1..c4fbd29 100644
--- a/build.gradle
+++ b/build.gradle
@@ -3,7 +3,7 @@ buildscript {
     mavenCentral()
   }
 
-  apply from: file('gradle/buildscript.gradle'), to: buildscript 
+  apply from: file('gradle/buildscript.gradle'), to: buildscript
 }
 
 allprojects {
@@ -13,6 +13,7 @@ allprojects {
 }
 
 apply from: file("gradle/dependency-versions.gradle")
+apply from: file("gradle/release.gradle")
 
 allprojects {
   apply plugin: 'eclipse'

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/build.gradle
----------------------------------------------------------------------
diff --git a/datafu-hourglass/build.gradle b/datafu-hourglass/build.gradle
index 9cc2b99..b27b483 100644
--- a/datafu-hourglass/build.gradle
+++ b/datafu-hourglass/build.gradle
@@ -1,6 +1,5 @@
 apply plugin: 'java'
 apply plugin: 'license'
-apply plugin: 'maven-publish'
 
 import groovy.xml.MarkupBuilder
 
@@ -12,8 +11,8 @@ buildscript {
   }
 }
 
-cleanEclipse { 
-  doLast { 
+cleanEclipse {
+  doLast {
     delete ".apt_generated"
     delete ".settings"
     delete ".factorypath"
@@ -21,71 +20,6 @@ cleanEclipse {
   }
 }
 
-task sourcesJar(type: Jar) {
-  description 'Creates the sources jar'
-
-  classifier = 'sources'
-  from sourceSets.main.allJava
-}
-
-task javadocJar(type: Jar, dependsOn: javadoc) {
-  description 'Creates the javadoc jar'
-
-  classifier = 'javadoc'
-  from javadoc.destinationDir
-}
-
-artifacts {
-  archives sourcesJar
-  archives javadocJar
-  archives jar
-}
-
-// Note: alternate way to publish: https://github.com/Netflix/gradle-template
-
-publishing {
-  publications {
-    mavenJava(MavenPublication) {
-      artifact sourcesJar
-      artifact javadocJar
-      artifact jar
-
-      pom.withXml {
-        asNode().appendNode("packaging","jar")
-        asNode().appendNode("name","Apache DataFu Hourglass")        
-        asNode().appendNode("description","A framework for incrementally processing data in Hadoop.")
-        asNode().appendNode("url","http://datafu.incubator.apache.org/")
-
-        def licenseNode = asNode().appendNode("licenses").appendNode("license")
-        licenseNode.appendNode("name","The Apache Software License, Version 2.0")
-        licenseNode.appendNode("url","http://www.apache.org/licenses/LICENSE-2.0.txt")
-        licenseNode.appendNode("distribution","repo")
-
-        def dependenciesNode = asNode().appendNode("dependencies")
-        def dependency = dependenciesNode.appendNode("dependency")
-        dependency.appendNode("groupId","log4j")
-        dependency.appendNode("artifactId","log4j")
-        dependency.appendNode("version","$log4jVersion")
-
-        dependency = dependenciesNode.appendNode("dependency")
-        dependency.appendNode("groupId","org.apache.avro")
-        dependency.appendNode("artifactId","avro")
-        dependency.appendNode("version","$avroVersion")
-
-        dependency = dependenciesNode.appendNode("dependency")
-        dependency.appendNode("groupId","org.apache.avro")
-        dependency.appendNode("artifactId","avro-mapred")
-        dependency.appendNode("version","$avroVersion")
-
-        dependency = dependenciesNode.appendNode("dependency")
-        dependency.appendNode("groupId","org.apache.avro")
-        dependency.appendNode("artifactId","avro-compiler")
-        dependency.appendNode("version","$avroVersion")
-      }
-    }
-  }
-}
-
 // create tasks to automatically add the license header
 license {
   header rootProject.file('HEADER')
@@ -114,6 +48,38 @@ dependencies {
   testCompile "org.testng:testng:$testngVersion"
 }
 
+// modify the pom dependencies so we don't include hadoop and the testing related artifacts
+modifyPom {
+  project {
+    dependencies {
+      dependency {
+        groupId 'log4j'
+        artifactId 'log4j'
+        version "$log4jVersion"
+      }
+      dependency {
+        groupId 'org.json'
+        artifactId 'json'
+        version "$jsonVersion"
+      }
+      dependency {
+        groupId 'org.apache.avro'
+        artifactId 'avro'
+        version "$avroVersion"
+      }
+      dependency {
+        groupId 'org.apache.avro'
+        artifactId 'avro-mapred'
+        version "$avroVersion"
+      }
+      dependency {
+        groupId 'org.apache.avro'
+        artifactId 'avro-compiler'
+        version "$avroVersion"
+      }
+    }
+  }
+}
 
 test {
   // enable TestNG support (default is JUnit)

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroDateRangeMetadata.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroDateRangeMetadata.java b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroDateRangeMetadata.java
index 0136ee9..6f38fbe 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroDateRangeMetadata.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroDateRangeMetadata.java
@@ -53,7 +53,7 @@ public class AvroDateRangeMetadata
    * @param fs file system to access path
    * @param path path to get date range for
    * @return date range
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static DateRange getOutputFileDateRange(FileSystem fs, Path path) throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataOutputFormat.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataOutputFormat.java b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataOutputFormat.java
index b37c559..4fb523c 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataOutputFormat.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataOutputFormat.java
@@ -73,6 +73,9 @@ public class AvroKeyWithMetadataOutputFormat<T> extends AvroOutputFormatBase<Avr
      * @param writerSchema The writer schema for the records to write.
      * @param compressionCodec The compression type for the writer file.
      * @param outputStream The target output stream for the records.
+     * @param conf the configuration
+     * @throws IOException IOException
+     * @return record writer
      */
     protected RecordWriter<AvroKey<T>, NullWritable> create(
         Schema writerSchema, CodecFactory compressionCodec, OutputStream outputStream, Configuration conf)

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataRecordWriter.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataRecordWriter.java b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataRecordWriter.java
index 0e61d87..c09e3af 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataRecordWriter.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/avro/AvroKeyWithMetadataRecordWriter.java
@@ -50,6 +50,7 @@ public class AvroKeyWithMetadataRecordWriter<T> extends RecordWriter<AvroKey<T>,
    * @param writerSchema The writer schema for the records in the Avro container file.
    * @param compressionCodec A compression codec factory for the Avro container file.
    * @param outputStream The output stream to write the Avro container file to.
+   * @param conf the configuration
    * @throws IOException If the record writer cannot be opened.
    */
   public AvroKeyWithMetadataRecordWriter(Schema writerSchema, CodecFactory compressionCodec,

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/fs/PathUtils.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/fs/PathUtils.java b/datafu-hourglass/src/main/java/datafu/hourglass/fs/PathUtils.java
index f9580e7..951eec6 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/fs/PathUtils.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/fs/PathUtils.java
@@ -82,10 +82,10 @@ public class PathUtils
   /**
    * Delete all but the last N days of paths matching the "yyyyMMdd" format.
    * 
-   * @param fs
-   * @param path
-   * @param retentionCount
-   * @throws IOException
+   * @param fs the filesystem
+   * @param path path to delete file from
+   * @param retentionCount how many days to keep
+   * @throws IOException IOException
    */
   public static void keepLatestDatedPaths(FileSystem fs, Path path, int retentionCount) throws IOException
   {
@@ -102,10 +102,10 @@ public class PathUtils
   /**
    * Delete all but the last N days of paths matching the "yyyy/MM/dd" format.
    * 
-   * @param fs
-   * @param path
-   * @param retentionCount
-   * @throws IOException
+   * @param fs the filesystem
+   * @param path path to delete files from
+   * @param retentionCount how many days to keep
+   * @throws IOException IOException
    */
   public static void keepLatestNestedDatedPaths(FileSystem fs, Path path, int retentionCount) throws IOException
   {
@@ -128,8 +128,8 @@ public class PathUtils
    * 
    * @param fs file system
    * @param input path to search under
-   * @return paths
-   * @throws IOException
+   * @return paths paths matching pattern
+   * @throws IOException IOException
    */
   public static List<DatePath> findNestedDatedPaths(FileSystem fs, Path input) throws IOException
   {
@@ -174,8 +174,8 @@ public class PathUtils
    * 
    * @param fs file system
    * @param path path to search under
-   * @return paths
-   * @throws IOException
+   * @return paths paths matching pattern
+   * @throws IOException IOException
    */
   public static List<DatePath> findDatedPaths(FileSystem fs, Path path) throws IOException
   {
@@ -209,10 +209,10 @@ public class PathUtils
   /**
    * Gets the schema from a given Avro data file.
    * 
-   * @param fs 
-   * @param path
+   * @param fs the filesystem
+   * @param path path to get schema from
    * @return The schema read from the data file's metadata.
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static Schema getSchemaFromFile(FileSystem fs, Path path) throws IOException
   {
@@ -232,9 +232,10 @@ public class PathUtils
   /**
    * Gets the schema for the first Avro file under the given path.
    * 
+   * @param fs the filesystem
    * @param path path to fetch schema for
    * @return Avro schema
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static Schema getSchemaFromPath(FileSystem fs, Path path) throws IOException
   {
@@ -247,7 +248,7 @@ public class PathUtils
    * @param fs file system
    * @param path path to count bytes for
    * @return total bytes under path
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static long countBytes(FileSystem fs, Path path) throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractJob.java
index 2d242eb..651c80b 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractJob.java
@@ -236,7 +236,7 @@ public abstract class AbstractJob extends Configured
   /**
    * Overridden to provide custom configuration before the job starts.
    * 
-   * @param conf
+   * @param conf configuration
    */
   public void config(Configuration conf)
   {    
@@ -318,7 +318,7 @@ public abstract class AbstractJob extends Configured
    * Only the latest will be kept.  Older paths will be removed.
    * Can also be set with <em>retention.count</em>.
    * 
-   * @param retentionCount
+   * @param retentionCount retention count
    */
   public void setRetentionCount(Integer retentionCount)
   {
@@ -391,7 +391,6 @@ public abstract class AbstractJob extends Configured
    * Gets the file system.
    * 
    * @return File system
-   * @throws IOException 
    */
   protected FileSystem getFileSystem()
   {
@@ -423,7 +422,7 @@ public abstract class AbstractJob extends Configured
    * Creates a random temporary path within the file system.
    * 
    * @return Random temporary path
-   * @throws IOException
+   * @throws IOException IOException
    */
   protected Path createRandomTempPath() throws IOException
   {
@@ -435,7 +434,7 @@ public abstract class AbstractJob extends Configured
    * 
    * @param path Path to create
    * @return The same path that was provided
-   * @throws IOException
+   * @throws IOException IOException
    */
   protected Path ensurePath(Path path) throws IOException
   {
@@ -472,9 +471,9 @@ public abstract class AbstractJob extends Configured
   /**
    * Run the job.
    * 
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
+   * @throws IOException IOException
+   * @throws InterruptedException InterruptedException
+   * @throws ClassNotFoundException ClassNotFoundException
    */
   public abstract void run() throws IOException, InterruptedException, ClassNotFoundException;
   

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractNonIncrementalJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractNonIncrementalJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractNonIncrementalJob.java
index 13db6a4..787d6d6 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractNonIncrementalJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractNonIncrementalJob.java
@@ -99,7 +99,7 @@ public abstract class AbstractNonIncrementalJob extends TimeBasedJob
    * 
    * @param name job name
    * @param props configuration properties
-   * @throws IOException
+   * @throws IOException IOException
    */
   public AbstractNonIncrementalJob(String name, Properties props) throws IOException
   {        
@@ -144,9 +144,9 @@ public abstract class AbstractNonIncrementalJob extends TimeBasedJob
   /**
    * Runs the job.
    * 
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
+   * @throws IOException IOException
+   * @throws InterruptedException InterruptedException
+   * @throws ClassNotFoundException ClassNotFoundException
    */
   @Override
   public void run() throws IOException, InterruptedException, ClassNotFoundException

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionCollapsingIncrementalJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionCollapsingIncrementalJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionCollapsingIncrementalJob.java
index 0ca4e53..e1daa3a 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionCollapsingIncrementalJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionCollapsingIncrementalJob.java
@@ -196,6 +196,7 @@ public abstract class AbstractPartitionCollapsingIncrementalJob extends Incremen
   
   /**
    * Initializes the job.
+   * @throws IOException IOException
    */
   public AbstractPartitionCollapsingIncrementalJob() throws IOException
   {    
@@ -206,6 +207,7 @@ public abstract class AbstractPartitionCollapsingIncrementalJob extends Incremen
    * 
    * @param name job name
    * @param props configuration properties
+   * @throws IOException IOException
    */
   public AbstractPartitionCollapsingIncrementalJob(String name, Properties props) throws IOException
   { 

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionPreservingIncrementalJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionPreservingIncrementalJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionPreservingIncrementalJob.java
index 39c41f8..1d695e8 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionPreservingIncrementalJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/AbstractPartitionPreservingIncrementalJob.java
@@ -174,6 +174,7 @@ public abstract class AbstractPartitionPreservingIncrementalJob extends Incremen
   
   /**
    * Initializes the job.
+   * @throws IOException IOException
    */
   public AbstractPartitionPreservingIncrementalJob() throws IOException
   {     
@@ -184,6 +185,7 @@ public abstract class AbstractPartitionPreservingIncrementalJob extends Incremen
    * 
    * @param name job name
    * @param props configuration properties
+   * @throws IOException IOException
    */
   public AbstractPartitionPreservingIncrementalJob(String name, Properties props) throws IOException
   { 
@@ -217,9 +219,9 @@ public abstract class AbstractPartitionPreservingIncrementalJob extends Incremen
   /**
    * Run the job.
    * 
-   * @throws IOException
-   * @throws InterruptedException
-   * @throws ClassNotFoundException
+   * @throws IOException IOException
+   * @throws InterruptedException InterruptedException
+   * @throws ClassNotFoundException ClassNotFoundException
    */
   @Override
   public void run() throws IOException, InterruptedException, ClassNotFoundException

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/ExecutionPlanner.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/ExecutionPlanner.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/ExecutionPlanner.java
index ce1c1d6..2a7095b 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/ExecutionPlanner.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/ExecutionPlanner.java
@@ -280,7 +280,7 @@ public abstract class ExecutionPlanner
    * 
    * @param path path to search under
    * @return map of date to path
-   * @throws IOException
+   * @throws IOException IOException
    */
   protected SortedMap<Date,DatePath> getDailyData(Path path) throws IOException
   {
@@ -297,7 +297,7 @@ public abstract class ExecutionPlanner
    * 
    * @param path path to search under
    * @return map of date to path
-   * @throws IOException
+   * @throws IOException IOException
    */
   protected SortedMap<Date,DatePath> getDatedData(Path path) throws IOException
   {
@@ -312,7 +312,7 @@ public abstract class ExecutionPlanner
   /**
    * Determine what input data is available.
    * 
-   * @throws IOException
+   * @throws IOException IOException
    */
   protected void loadInputData() throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/FileCleaner.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/FileCleaner.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/FileCleaner.java
index 7248969..267195e 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/FileCleaner.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/FileCleaner.java
@@ -51,7 +51,7 @@ public class FileCleaner
   /**
    * Add a path to be removed later.
    * 
-   * @param path
+   * @param path path to be removed later
    * @return added path
    */
   public Path add(Path path)
@@ -63,7 +63,7 @@ public class FileCleaner
   /**
    * Add a path to be removed later.
    * 
-   * @param path
+   * @param path path to be removed later
    * @return added path
    */
   public String add(String path)
@@ -75,7 +75,7 @@ public class FileCleaner
   /**
    * Removes added paths from the file system.
    * 
-   * @throws IOException
+   * @throws IOException IOException
    */
   @SuppressWarnings("unchecked")
   public void clean() throws IOException

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingExecutionPlanner.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingExecutionPlanner.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingExecutionPlanner.java
index 34368f7..a8dfa54 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingExecutionPlanner.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingExecutionPlanner.java
@@ -197,7 +197,7 @@ public class PartitionCollapsingExecutionPlanner extends ExecutionPlanner
   /**
    * Create the execution plan.
    * 
-   * @throws IOException
+   * @throws IOException IOException
    */
   public void createPlan() throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingIncrementalJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingIncrementalJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingIncrementalJob.java
index 68e776a..c653d96 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingIncrementalJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionCollapsingIncrementalJob.java
@@ -55,7 +55,7 @@ public class PartitionCollapsingIncrementalJob extends AbstractPartitionCollapsi
    * Initializes the job.  The job name is derived from the name of a provided class.
    * 
    * @param cls class to base job name on
-   * @throws IOException
+   * @throws IOException IOException
    */
   public PartitionCollapsingIncrementalJob(@SuppressWarnings("rawtypes") Class cls) throws IOException
   {
@@ -113,7 +113,7 @@ public class PartitionCollapsingIncrementalJob extends AbstractPartitionCollapsi
   /**
    * Set the mapper.
    * 
-   * @param mapper
+   * @param mapper the mapper
    */
   public void setMapper(Mapper<GenericRecord,GenericRecord,GenericRecord> mapper)
   {
@@ -179,7 +179,7 @@ public class PartitionCollapsingIncrementalJob extends AbstractPartitionCollapsi
    * This is only needed when reusing previous output where the intermediate and output schemas are different.
    * New partial output is produced by the reducer from new input that is after the previous output.
    * 
-   * @param merger
+   * @param merger the merger
    */
   public void setMerger(Merger<GenericRecord> merger)
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingExecutionPlanner.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingExecutionPlanner.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingExecutionPlanner.java
index 6ac55a8..2ccbaf5 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingExecutionPlanner.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingExecutionPlanner.java
@@ -91,7 +91,7 @@ public class PartitionPreservingExecutionPlanner extends ExecutionPlanner
   /**
    * Create the execution plan.
    * 
-   * @throws IOException
+   * @throws IOException IOException
    */
   public void createPlan() throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingIncrementalJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingIncrementalJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingIncrementalJob.java
index 99aba47..69d4dcd 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingIncrementalJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/PartitionPreservingIncrementalJob.java
@@ -52,7 +52,7 @@ public class PartitionPreservingIncrementalJob extends AbstractPartitionPreservi
    * Initializes the job.  The job name is derived from the name of a provided class.
    * 
    * @param cls class to base job name on
-   * @throws IOException
+   * @throws IOException IOException
    */
   public PartitionPreservingIncrementalJob(@SuppressWarnings("rawtypes") Class cls) throws IOException
   {
@@ -98,7 +98,7 @@ public class PartitionPreservingIncrementalJob extends AbstractPartitionPreservi
   /**
    * Set the mapper.
    * 
-   * @param mapper
+   * @param mapper the mapper
    */
   public void setMapper(Mapper<GenericRecord,GenericRecord,GenericRecord> mapper)
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/StagedOutputJob.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/StagedOutputJob.java b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/StagedOutputJob.java
index 5178133..c6cb815 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/jobs/StagedOutputJob.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/jobs/StagedOutputJob.java
@@ -128,7 +128,7 @@ public class StagedOutputJob extends Job implements Callable<Boolean>
    * @param conf configuration
    * @param stagingPrefix where to stage output temporarily
    * @param log logger
-   * @throws IOException
+   * @throws IOException IOException
    */
   public StagedOutputJob(Configuration conf, String stagingPrefix, Logger log) throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingCombiner.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingCombiner.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingCombiner.java
index 60ce4fe..e51c53b 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingCombiner.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingCombiner.java
@@ -145,7 +145,7 @@ public class CollapsingCombiner extends ObjectReducer implements DateRangeConfig
   /**
    * Sets the schemas.
    * 
-   * @param schemas
+   * @param schemas schemas used by the combiner
    */
   public void setSchemas(PartitionCollapsingSchemas schemas)
   {
@@ -155,7 +155,7 @@ public class CollapsingCombiner extends ObjectReducer implements DateRangeConfig
   /**
    * Gets the schemas.
    * 
-   * @return schemas
+   * @return schemas schemas used by the combiner
    */
   public PartitionCollapsingSchemas getSchemas()
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingMapper.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingMapper.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingMapper.java
index 7c6c5a2..5dec8fc 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingMapper.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingMapper.java
@@ -120,7 +120,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
   /**
    * Gets the mapper.
    * 
-   * @return mapper
+   * @return mapper the mapper
    */
   public Mapper<GenericRecord,GenericRecord,GenericRecord> getMapper()
   {
@@ -130,7 +130,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
   /**
    * Sets the mapper.
    * 
-   * @param mapper
+   * @param mapper the mapper
    */
   public void setMapper(Mapper<GenericRecord,GenericRecord,GenericRecord> mapper)
   {
@@ -140,7 +140,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
   /**
    * Sets the Avro schemas.
    * 
-   * @param schemas
+   * @param schemas the schemas
    */
   public void setSchemas(PartitionCollapsingSchemas schemas)
   {
@@ -150,7 +150,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
   /**
    * Gets the Avro schemas.
    * 
-   * @return schemas
+   * @return schemas the schemas
    */
   public PartitionCollapsingSchemas getSchemas()
   {
@@ -178,7 +178,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
    * Gets a collector that maps key-value pairs, where each value
    * is tagged with the partition from which it was derived. 
    * 
-   * @return collector
+   * @return collector the collector
    */
   private TimeMapCollector getTimeMapCollector()
   {
@@ -193,7 +193,7 @@ public class CollapsingMapper extends ObjectMapper implements Serializable
   /**
    * Gets a collector that maps key-value pairs as-is.
    * 
-   * @return collector
+   * @return collector the collector
    */
   private IdentityMapCollector getIdentityMapCollector()
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingReducer.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingReducer.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingReducer.java
index 86aa66c..bcff7ae 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingReducer.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/CollapsingReducer.java
@@ -214,7 +214,7 @@ public class CollapsingReducer extends ObjectReducer implements DateRangeConfigu
   /**
    * Sets the Avro schemas.
    * 
-   * @param schemas
+   * @param schemas the schemas
    */
   public void setSchemas(PartitionCollapsingSchemas schemas)
   {
@@ -224,7 +224,7 @@ public class CollapsingReducer extends ObjectReducer implements DateRangeConfigu
   /**
    * Gets the Avro schemas.
    * 
-   * @return
+   * @return the schemas
    */
   private PartitionCollapsingSchemas getSchemas()
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/DistributedCacheHelper.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/DistributedCacheHelper.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/DistributedCacheHelper.java
index 78c8911..3e34c3c 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/DistributedCacheHelper.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/DistributedCacheHelper.java
@@ -45,7 +45,7 @@ public class DistributedCacheHelper
    * @param conf Hadoop configuration
    * @param path Path to deserialize from
    * @return Deserialized object
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static Object readObject(Configuration conf, org.apache.hadoop.fs.Path path) throws IOException
   {
@@ -87,7 +87,7 @@ public class DistributedCacheHelper
    * @param conf Hadoop configuration
    * @param obj Object to serialize
    * @param path Path to serialize object to
-   * @throws IOException
+   * @throws IOException IOException
    */
   public static void writeObject(Configuration conf, Object obj, org.apache.hadoop.fs.Path path) throws IOException
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningMapper.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningMapper.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningMapper.java
index d17385d..f1df195 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningMapper.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningMapper.java
@@ -96,7 +96,7 @@ public class PartitioningMapper extends ObjectMapper implements Serializable
   /**
    * Gets the mapper.
    * 
-   * @return mapper
+   * @return mapper the mapper
    */
   public Mapper<GenericRecord,GenericRecord,GenericRecord> getMapper()
   {
@@ -106,7 +106,7 @@ public class PartitioningMapper extends ObjectMapper implements Serializable
   /**
    * Sets the mapper.
    * 
-   * @param mapper
+   * @param mapper the mapper
    */
   public void setMapper(Mapper<GenericRecord,GenericRecord,GenericRecord> mapper)
   {
@@ -116,7 +116,7 @@ public class PartitioningMapper extends ObjectMapper implements Serializable
   /**
    * Sets the Avro schemas.
    * 
-   * @param schemas
+   * @param schemas the schemas
    */
   public void setSchemas(PartitionPreservingSchemas schemas)
   {
@@ -126,7 +126,7 @@ public class PartitioningMapper extends ObjectMapper implements Serializable
   /**
    * Gets the Avro schemas.
    * 
-   * @return schemas
+   * @return schemas the schemas
    */
   public PartitionPreservingSchemas getSchemas()
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningReducer.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningReducer.java b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningReducer.java
index 1ce4995..a78610d 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningReducer.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/mapreduce/PartitioningReducer.java
@@ -145,7 +145,7 @@ public class PartitioningReducer extends ObjectReducer implements Serializable
   /**
    * Sets the Avro schemas.
    * 
-   * @param schemas
+   * @param schemas the schemas
    */
   public void setSchemas(PartitionPreservingSchemas schemas)
   {
@@ -155,7 +155,7 @@ public class PartitioningReducer extends ObjectReducer implements Serializable
   /**
    * Gets the Avro schemas
    * 
-   * @return schemas
+   * @return schemas the schemas
    */
   public PartitionPreservingSchemas getSchemas()
   {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/model/KeyValueCollector.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/model/KeyValueCollector.java b/datafu-hourglass/src/main/java/datafu/hourglass/model/KeyValueCollector.java
index 4d37a85..6351e83 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/model/KeyValueCollector.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/model/KeyValueCollector.java
@@ -23,7 +23,7 @@ import java.io.IOException;
 
 /**
  * Provided to an instance of {@link Mapper} to collect key-value pairs.
- * 
+ *
  * @author "Matthew Hayes"
  *
  * @param <K> Key type
@@ -36,8 +36,8 @@ public interface KeyValueCollector<K,V>
    * 
    * @param key Key to be collected
    * @param value Value to be collected
-   * @throws IOException
-   * @throws InterruptedException
+   * @throws IOException IOException
+   * @throws InterruptedException InterruptedException
    */
   void collect(K key,V value)  throws IOException, InterruptedException;
 }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-hourglass/src/main/java/datafu/hourglass/model/Mapper.java
----------------------------------------------------------------------
diff --git a/datafu-hourglass/src/main/java/datafu/hourglass/model/Mapper.java b/datafu-hourglass/src/main/java/datafu/hourglass/model/Mapper.java
index 902478a..447e495 100644
--- a/datafu-hourglass/src/main/java/datafu/hourglass/model/Mapper.java
+++ b/datafu-hourglass/src/main/java/datafu/hourglass/model/Mapper.java
@@ -38,8 +38,8 @@ public interface Mapper<In,OutKey,OutVal> extends Serializable
    * 
    * @param input Input value
    * @param collector Collects output key-value pairs
-   * @throws IOException
-   * @throws InterruptedException
+   * @throws IOException IOException
+   * @throws InterruptedException InterruptedException
    */
   void map(In input, KeyValueCollector<OutKey,OutVal> collector)  throws IOException, InterruptedException;
 }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/build.gradle
----------------------------------------------------------------------
diff --git a/datafu-pig/build.gradle b/datafu-pig/build.gradle
index e21a5b1..2e00d40 100644
--- a/datafu-pig/build.gradle
+++ b/datafu-pig/build.gradle
@@ -1,7 +1,6 @@
 apply plugin: 'java'
 apply plugin: 'license'
 apply plugin: 'gradle-autojar'
-apply plugin: 'maven-publish'
 apply plugin: 'download-task'
 
 import groovy.xml.MarkupBuilder
@@ -79,21 +78,7 @@ jar
   classifier = "core"
 }
 
-task sourcesJar(type: Jar) {
-  description 'Creates the sources jar'
-
-  classifier = 'sources'
-  from sourceSets.main.allJava
-}
-
-task javadocJar(type: Jar, dependsOn: javadoc) {
-  description 'Creates the javadoc jar'
-
-  classifier = 'javadoc'
-  from javadoc.destinationDir
-}
-
-ext 
+ext
 {
   autojarBuildDir = tasks.jar.destinationDir
 }
@@ -126,7 +111,7 @@ task jarWithDependenciesNamespaced(dependsOn: jarWithDependencies) {
 }
 
 task finalJar(type: Jar, dependsOn: jarWithDependenciesNamespaced) {
-  description 'Creates the final jar' 
+  description 'Creates the final jar'
 
   from(zipTree(jarWithDependenciesNamespaced.outputFile))
 }
@@ -135,41 +120,9 @@ task finalJar(type: Jar, dependsOn: jarWithDependenciesNamespaced) {
 configurations.archives.artifacts.removeAll { return it.classifier == "core"; }
 
 artifacts {
-  archives sourcesJar
-  archives javadocJar
   archives finalJar
 }
 
-// Note: alternate way to publish: https://github.com/Netflix/gradle-template
-
-publishing {
-  publications {
-    mavenJava(MavenPublication) {
-      artifact sourcesJar
-      artifact javadocJar
-      artifact finalJar
-
-      pom.withXml {
-        asNode().appendNode("packaging","jar")
-        asNode().appendNode("name","Apache DataFu")        
-        asNode().appendNode("description","A collection of user-defined functions for working with large-scale data in Hadoop and Pig.")
-        asNode().appendNode("url","http://datafu.incubator.apache.org/")
-
-        def licenseNode = asNode().appendNode("licenses").appendNode("license")
-        licenseNode.appendNode("name","The Apache Software License, Version 2.0")
-        licenseNode.appendNode("url","http://www.apache.org/licenses/LICENSE-2.0.txt")
-        licenseNode.appendNode("distribution","repo")
-
-        def dependenciesNode = asNode().appendNode("dependencies")
-        def dependency = dependenciesNode.appendNode("dependency")
-        dependency.appendNode("groupId","joda-time")
-        dependency.appendNode("artifactId","joda-time")
-        dependency.appendNode("version","$jodaTimeVersion")
-      }
-    }
-  }
-}
-
 // create tasks to automatically add the license header
 license {
   header rootProject.file('HEADER')
@@ -208,6 +161,16 @@ dependencies {
   testCompile project(":build-plugin")
 }
 
+modifyPom {
+  project {
+    dependencies {
+      // No dependencies because everything we need is autojarred.
+      // The only exception is joda-time, but this is already a pig dependency
+      // so it should already be available.
+    }
+  }
+}
+
 compileTestJava.doFirst {
   options.compilerArgs = ['-processor', 'org.adrianwalker.multilinestring.MultilineProcessor']
 }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/BagGroup.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/BagGroup.java b/datafu-pig/src/main/java/datafu/pig/bags/BagGroup.java
index 409709c..386def3 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/BagGroup.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/BagGroup.java
@@ -46,6 +46,8 @@ import datafu.pig.util.AliasableEvalFunc;
  * The following example groups input_bag by k.  The output is a bag having tuples
  * consisting of the group key k and a bag with the corresponding (k,v) tuples from input_bag
  * for that key.
+ * </p>
+ *
  * <pre>
  * {@code
  * define BagGroup datafu.pig.bags.BagGroup();
@@ -59,9 +61,7 @@ import datafu.pig.util.AliasableEvalFunc;
  * -- ({(1,{(1,A),(1,B)}),(2,{(2,A),(2,B),(2,C)}),(3,{(3,A)})})
  * }
  * </pre>
- * </p>
  *
- * <p>
  * If the key k is not needed within the input_bag for the output, it can be projected
  * out like so:
  * <pre>
@@ -77,7 +77,6 @@ import datafu.pig.util.AliasableEvalFunc;
  * -- ({(1,{(A),(B)}),(2,{(A),(B),(C)}),(3,{(A)})})
  * }
  * </pre>
- * </p>
  *
  * @author wvaughan
  *

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/CountEach.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/CountEach.java b/datafu-pig/src/main/java/datafu/pig/bags/CountEach.java
index cfb0152..2e37c00 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/CountEach.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/CountEach.java
@@ -34,8 +34,7 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 
 /**
  * Generates a count of the number of times each distinct tuple appears in a bag.
- * 
- * <p>
+ *
  * Example:
  * <pre>
  * {@code
@@ -55,7 +54,6 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
  * output_flatten = FOREACH input GENERATE CountEachFlatten(B);
  * } 
  * </pre>
- * </p>
  */
 public class CountEach extends AccumulatorEvalFunc<DataBag>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/DistinctBy.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/DistinctBy.java b/datafu-pig/src/main/java/datafu/pig/bags/DistinctBy.java
index a79e4de..24198ef 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/DistinctBy.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/DistinctBy.java
@@ -56,8 +56,6 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
  * -- ({(a,1),(b,2),(c,3),(d,4)})
  * } 
  * </pre>
- * 
- * @param map Any number of strings specifying field positions
  */
 public class DistinctBy extends AccumulatorEvalFunc<DataBag>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/Enumerate.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/Enumerate.java b/datafu-pig/src/main/java/datafu/pig/bags/Enumerate.java
index 8a0d072..d12cde4 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/Enumerate.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/Enumerate.java
@@ -32,17 +32,17 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 
 /**
  * Enumerate a bag, appending to each tuple its index within the bag.
- * 
- * <p>
+ *
  * For example:
  * <pre>
- *   {(A),(B),(C),(D)} => {(A,0),(B,1),(C,2),(D,3)}
+ *   {(A),(B),(C),(D)} =&gt; {(A,0),(B,1),(C,2),(D,3)}
  * </pre>
+ *
+ * <p>
  * The first constructor parameter (optional) dictates the starting index of the counting.
  * This UDF implements the accumulator interface, reducing DataBag materialization costs.
  * </p>
  *
- * <p>
  * Example:
  * <pre>
  * {@code

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/PrependToBag.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/PrependToBag.java b/datafu-pig/src/main/java/datafu/pig/bags/PrependToBag.java
index 9292871..3fa120a 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/PrependToBag.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/PrependToBag.java
@@ -31,28 +31,26 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 import datafu.pig.util.SimpleEvalFunc;
 
 /**
- * Prepends a tuple to a bag. 
- * 
+ * Prepends a tuple to a bag.
+ *
  * <p>N.B. this copies the entire input bag, so don't use it for large bags.</p>
- * 
- * <p>
+ *
  * Example:
  * <pre>
  * {@code
  * define PrependToBag datafu.pig.bags.PrependToBag();
- * 
+ *
  * -- input:
  * -- ({(1),(2),(3)},(4))
  * -- ({(10),(20),(30),(40),(50)},(60))
  * input = LOAD 'input' AS (B: bag{T: tuple(v:INT)}, T: tuple(v:INT));
-
+ *
  * -- output:
  * -- ({(4),(1),(2),(3)})
  * -- ({(60),(10),(20),(30),(40),(50)})
  * output = FOREACH input GENERATE PrependToBag(B,T) as B;
  * }
  * </pre>
- * </p>
  */
 public class PrependToBag extends SimpleEvalFunc<DataBag>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/ReverseEnumerate.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/ReverseEnumerate.java b/datafu-pig/src/main/java/datafu/pig/bags/ReverseEnumerate.java
index c86ffcf..a31a6ab 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/ReverseEnumerate.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/ReverseEnumerate.java
@@ -32,20 +32,20 @@ import org.apache.pig.impl.logicalLayer.schema.Schema;
 import datafu.pig.util.SimpleEvalFunc;
 
 /**
- * Enumerate a bag, appending to each tuple its index within the bag, with indices being produced in 
- * descending order. 
- * 
- * <p>
+ * Enumerate a bag, appending to each tuple its index within the bag, with indices being produced in
+ * descending order.
+ *
  * For example:
  * <pre>
- *   {(A),(B),(C),(D)} => {(A,3),(B,2),(C,1),(D,0)}
+ *   {(A),(B),(C),(D)} =&gt; {(A,3),(B,2),(C,1),(D,0)}
  * </pre>
+ *
+ * <p>
  * The first constructor parameter (optional) dictates the starting index of the counting. As the
  * UDF requires the size of the bag for reverse counting, this UDF does <b>not</b> implement the
  * accumulator interface and suffers from the slight performance penalty of DataBag materialization.
  * </p>
  *
- * <p>
  * Example:
  * <pre>
  * {@code
@@ -60,7 +60,6 @@ import datafu.pig.util.SimpleEvalFunc;
  * output = FOREACH input GENERATE ReverseEnumerate(B);
  * }
  * </pre>
- * </p>
  */
 public class ReverseEnumerate extends SimpleEvalFunc<DataBag>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/bags/UnorderedPairs.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/bags/UnorderedPairs.java b/datafu-pig/src/main/java/datafu/pig/bags/UnorderedPairs.java
index a1d149e..3a989ba 100644
--- a/datafu-pig/src/main/java/datafu/pig/bags/UnorderedPairs.java
+++ b/datafu-pig/src/main/java/datafu/pig/bags/UnorderedPairs.java
@@ -33,23 +33,21 @@ import org.apache.pig.tools.pigstats.PigStatusReporter;
 
 /**
  * Generates pairs of all items in a bag.
- * 
- * <p>
+ *
  * Example:
  * <pre>
  * {@code
  * define UnorderedPairs datafu.pig.bags.UnorderedPairs();
- * 
+ *
  * -- input:
  * -- ({(1),(2),(3),(4)})
  * input = LOAD 'input' AS (B: bag {T: tuple(v:INT)});
- * 
+ *
  * -- output:
  * -- ({((1),(2)),((1),(3)),((1),(4)),((2),(3)),((2),(4)),((3),(4))})
  * output = FOREACH input GENERATE UnorderedPairs(B);
- * } 
+ * }
  * </pre>
- * </p>
  */
 public class UnorderedPairs extends EvalFunc<DataBag>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/geo/HaversineDistInMiles.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/geo/HaversineDistInMiles.java b/datafu-pig/src/main/java/datafu/pig/geo/HaversineDistInMiles.java
index d1e3988..5ad9d51 100644
--- a/datafu-pig/src/main/java/datafu/pig/geo/HaversineDistInMiles.java
+++ b/datafu-pig/src/main/java/datafu/pig/geo/HaversineDistInMiles.java
@@ -26,16 +26,15 @@ import datafu.pig.util.SimpleEvalFunc;
 
 /**
  * Computes the distance (in miles) between two latitude-longitude pairs 
- * using the {@link <a href="http://en.wikipedia.org/wiki/Haversine_formula" target="_blank">Haversine formula</a>}.
+ * using the <a href="http://en.wikipedia.org/wiki/Haversine_formula" target="_blank">Haversine formula</a>.
  *
- * <p>
  * Example:
  * <pre>
  * {@code
  * -- input is a TSV of two latitude and longitude pairs
  * input = LOAD 'input' AS (lat1 : double, long1 : double, lat2 : double, long2 : double);
  * output = FOREACH input GENERATE datafu.pig.geo.HaversineDistInMiles(lat1, long1, lat2, long2) as distance;
- * }</pre></p>
+ * }</pre>
  */
 public class HaversineDistInMiles extends SimpleEvalFunc<Double>
 {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/CosineDistanceHash.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/CosineDistanceHash.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/CosineDistanceHash.java
index 1664362..c1c5e02 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/CosineDistanceHash.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/CosineDistanceHash.java
@@ -27,7 +27,7 @@ import datafu.pig.hash.lsh.interfaces.LSH;
 import datafu.pig.hash.lsh.interfaces.LSHCreator;
 
 /**
- * From wikipedia's article on {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}:
+ * From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
  * <pre>
  * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
  * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
@@ -35,7 +35,7 @@ import datafu.pig.hash.lsh.interfaces.LSHCreator;
  * </pre>
  * 
  * In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
- * close together (with high probability) according to {@link <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>}
+ * close together (with high probability) according to <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>
  * into the same buckets.  Each LSH maps a vector onto one side or the other of a random hyperplane, thereby producing a single
  * bit as the hash value.  Multiple, independent, hashes can be run on the same input and aggregated together to form a more
  * broad domain than a single bit.
@@ -123,7 +123,7 @@ public class CosineDistanceHash extends LSHFunc
      * };
      * 
      * -- Filter out the hashes which resulted in no matches
-     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
+     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
      * 
      * -- group by the query
      * NEIGHBORS_GRP = group NOT_NULL by query_pt;

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L1PStableHash.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/L1PStableHash.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L1PStableHash.java
index 2dd42cd..b69c2cf 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/L1PStableHash.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L1PStableHash.java
@@ -27,7 +27,7 @@ import datafu.pig.hash.lsh.interfaces.LSHCreator;
 import datafu.pig.hash.lsh.p_stable.L1LSH;
 
 /**
- * From wikipedia's article on {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}:
+ * From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
  * <pre>
  * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
  * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
@@ -35,7 +35,7 @@ import datafu.pig.hash.lsh.p_stable.L1LSH;
  * </pre>
  * 
  * In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
- * close together (with high probability) according to the {@link <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L1</a>}
+ * close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L1</a>
  * distance metric into the same buckets.  This implementation uses a 1-stable distribution (a Cauchy distribution) in order
  * to accomplish this.
  * 
@@ -136,7 +136,7 @@ public class L1PStableHash extends LSHFunc
      * };
      * 
      * -- Filter out the hashes which resulted in no matches
-     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
+     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
      * 
      * -- group by the query
      * NEIGHBORS_GRP = group NOT_NULL by query_pt;

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L2PStableHash.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/L2PStableHash.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L2PStableHash.java
index 588f199..9ae261b 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/L2PStableHash.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/L2PStableHash.java
@@ -27,7 +27,7 @@ import datafu.pig.hash.lsh.interfaces.LSHCreator;
 import datafu.pig.hash.lsh.p_stable.L2LSH;
 
 /**
- * From wikipedia's article on {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}:
+ * From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
  * <pre>
  * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
  * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
@@ -35,7 +35,7 @@ import datafu.pig.hash.lsh.p_stable.L2LSH;
  * </pre>
  * 
  * In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
- * close together (with high probability) according to the {@link <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L2</a>}
+ * close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L2</a>
  * distance metric into the same buckets.  This implementation uses a 2-stable distribution (a Gaussian distribution) in order
  * to accomplish this.
  * 
@@ -136,7 +136,7 @@ public class L2PStableHash extends LSHFunc{
      * };
      * 
      * -- Filter out the hashes which resulted in no matches
-     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) > 0;
+     * NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
      * 
      * -- group by the query
      * NEIGHBORS_GRP = group NOT_NULL by query_pt;

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/LSHFamily.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/LSHFamily.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/LSHFamily.java
index 394109c..a12dc45 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/LSHFamily.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/LSHFamily.java
@@ -48,7 +48,7 @@ public class LSHFamily {
   /**
    * Compute the family of k-hashes for a vector.
    * 
-   * @param vector
+   * @param vector the vector
    * @return An iterable of hashes
    */
   public Iterable<Long> apply(final RealVector vector) {

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/HyperplaneLSH.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/HyperplaneLSH.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/HyperplaneLSH.java
index deff98e..0f37845 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/HyperplaneLSH.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/HyperplaneLSH.java
@@ -27,52 +27,59 @@ import org.apache.commons.math.random.UnitSphereRandomVectorGenerator;
 import datafu.pig.hash.lsh.interfaces.LSH;
 
 /**
- * From wikipedia's article on {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}:
+ * From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ *
  * <pre>
- * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
- * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data.
+ * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability
  * (the number of buckets being much smaller than the universe of possible input items).
  * </pre>
- * 
+ *
+ * <p>
  * In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
- * close together (with high probability) according to {@link <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>}
+ * close together (with high probability) according to <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>
  * into the same buckets.  Each LSH maps a vector onto one side or the other of a random hyperplane, thereby producing a single
  * bit as the hash value.  Multiple, independent, hashes can be run on the same input and aggregated together to form a more
  * broad domain than a single bit.
- * 
+ * </p>
+ *
+ * <p>
  * For more information, see Charikar, Moses S.. (2002). "Similarity Estimation Techniques from Rounding Algorithms". Proceedings of the 34th Annual ACM Symposium on Theory of Computing 2002.
- * 
- * 
+ * </p>
  */
 public class HyperplaneLSH extends LSH
 {
-   
- 
     private RealVector r;
-    
+
     /**
      * Locality sensitive hash that maps vectors onto 0,1 in such a way that colliding
-     * vectors are "near" one another according to cosine similarity with high probability.  
-     * 
+     * vectors are "near" one another according to cosine similarity with high probability.
+     *
      * <p>
      * Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
      * This repetition is accomplished by wrapping instances of the LSH in a LSHFamily, which does the combination.
-     * 
+     * </p>
+     *
+     * <p>
      * The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
      * In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
      * (i.e. the number of vectors within a distance according to cosine similarity).
+     * </p>
+     *
+     * @param dim The dimension of the vectors which are to be hashed
+     * @param rg random number generator
      */
     public HyperplaneLSH(int dim, RandomGenerator rg)
     {
         super(dim, rg);
-        
+
         UnitSphereRandomVectorGenerator generator = new UnitSphereRandomVectorGenerator(dim, rg);
         //compute our vector representing a hyperplane of dimension dim by taking a random vector
         //located on the unit sphere
         double[] normalVector = generator.nextVector();
         r = new ArrayRealVector(normalVector);
     }
-  
+
 
     /**
      * Compute which side of the hyperplane that the parameter is on.  

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/package-info.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/package-info.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/package-info.java
index 2e44920..bd9a085 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/package-info.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/cosine/package-info.java
@@ -18,7 +18,7 @@
  */
 
 /**
- * Implementation of {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>} 
- * for {@link <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>}
+ * Implementation of <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>
+ * for <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>.
  */
 package datafu.pig.hash.lsh.cosine;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSH.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSH.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSH.java
index 69d4043..d7ed82e 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSH.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSH.java
@@ -23,23 +23,24 @@ import org.apache.commons.math.linear.RealVector;
 import org.apache.commons.math.random.RandomGenerator;
 
 /**
- * An abstract class representing a locality sensitive hash. From wikipedia's article on {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}:
- * <pre>
- * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
- * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ * An abstract class representing a locality sensitive hash. From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ *
+ * <p>
+ * Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data.
+ * The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability
  * (the number of buckets being much smaller than the universe of possible input items).
- * </pre>
+ * </p>
  * @author cstella
  *
  */
-public abstract class LSH 
+public abstract class LSH
 {
   protected RandomGenerator rg;
   protected int dim;
-  
+
   /**
    * Construct a locality sensitive hash.  Note, one may pass a pre-seeded generator.
-   * 
+   *
    * @param dim The dimension of the vectors which are to be hashed
    * @param rg The random generator to use internally.
    */
@@ -48,24 +49,24 @@ public abstract class LSH
     this.dim = dim;
     this.rg = rg;
   }
-  
+
   /**
-   * 
+   *
    * @return The random generator from this LSH
    */
   public RandomGenerator getRandomGenerator() { return rg;}
   /**
-   * 
+   *
    * @return The dimension of the vectors which this LSh supports
    */
   public int getDim() { return dim; }
-  
+
   /**
    * Hash a vector.
-   * 
+   *
    * @param vector A vector to be hashed
    * @return A hash which collides with vectors close according to some metric (implementation dependent).
    */
   public abstract long apply(RealVector vector);
-  
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSHCreator.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSHCreator.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSHCreator.java
index e3dc5b4..679f4ef 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSHCreator.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/LSHCreator.java
@@ -88,7 +88,7 @@ public abstract class LSHCreator
    * 
    * @param rg The random generator to use when constructing the family
    * @return The family of locality sensitive hashes
-   * @throws MathException
+   * @throws MathException MathException
    */
   public  LSHFamily constructFamily(RandomGenerator rg) throws MathException
     { 

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/Sampler.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/Sampler.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/Sampler.java
index 0c57a0d..1ccb623 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/Sampler.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/Sampler.java
@@ -30,10 +30,10 @@ import org.apache.commons.math.random.RandomDataImpl;
 public interface Sampler {
   /**
    * Generate a sample
-   * 
+   *
    * @param randomData The distribution used
    * @return A sample
-   * @throws MathException
+   * @throws MathException MathException
    */
   public double sample(RandomDataImpl randomData) throws MathException ;
 }

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/package-info.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/package-info.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/package-info.java
index 5357473..13c84e3 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/package-info.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/interfaces/package-info.java
@@ -18,6 +18,6 @@
  */
 
 /**
- * Interfaces used in the implementation of {@link <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>}.
+ * Interfaces used in the implementation of <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>.
  */
 package datafu.pig.hash.lsh.interfaces;

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/Cosine.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/Cosine.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/Cosine.java
index 22222a1..565b9f4 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/Cosine.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/Cosine.java
@@ -23,10 +23,10 @@ import org.apache.commons.math.linear.RealVector;
 
 /**
  * A UDF used to find a vector v in a bag such that for query point q, metric m and threshold t
- * m(v,q) < t.  In other words, find the first vector in the bag within a threshold distance away.
+ * m(v,q) &lt; t.  In other words, find the first vector in the bag within a threshold distance away.
  * 
  *  It returns one of the tuples of the bag of vectors.  The metric used is 
- * {@link <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>}, 
+ * <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>, 
  * which technically does not form a metric, but I'm stretching the definition here.
  * 
  * @see datafu.pig.hash.lsh.CosineDistanceHash CosineDistanceHash for an example
@@ -38,7 +38,7 @@ public class Cosine extends MetricUDF {
   /**
    * Create a new Cosine Metric UDF with a given dimension.
    * 
-   * @param sDim
+   * @param sDim dimension
    */
   public Cosine(String sDim) {
     super(sDim); 
@@ -46,8 +46,8 @@ public class Cosine extends MetricUDF {
   
   /**
    * Cosine similarity.
-   * @param v1
-   * @param v2
+   * @param v1 first vector
+   * @param v2 second vector
    * @return The cosine of the angle between the vectors
    */
   public static double distance(RealVector v1, RealVector v2) {
@@ -56,8 +56,8 @@ public class Cosine extends MetricUDF {
 
   /**
    * Cosine similarity.
-   * @param v1
-   * @param v2
+   * @param v1 first vector
+   * @param v2 second vector
    * @return Roughly the cosine of the angle between the vectors
    */
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/0f9b853b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/L1.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/L1.java b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/L1.java
index 311c2ed..8c93150 100644
--- a/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/L1.java
+++ b/datafu-pig/src/main/java/datafu/pig/hash/lsh/metric/L1.java
@@ -23,12 +23,14 @@ import org.apache.commons.math.linear.RealVector;
 
 /**
  * A UDF used to find a vector v in a bag such that for query point q, metric m and threshold t
- * m(v,q) < t.  In other words, find the first vector in the bag within a threshold distance away.
- * 
- *  It returns one of the tuples of the bag of vectors using {@link <a href="http://en.wikipedia.org/wiki/Taxicab_geometry" target="_blank">L1 distance</a>}, 
+ * m(v,q) &lt; t.  In other words, find the first vector in the bag within a threshold distance away.
+ *
+ * <p>
+ * It returns one of the tuples of the bag of vectors using <a href="http://en.wikipedia.org/wiki/Taxicab_geometry" target="_blank">L1 distance</a>, 
  * distance between two vectors.  This is otherwise known as
  * the manhattan distance, taxicab distance or city block distance.
- * 
+ * </p>
+ *
  * @see datafu.pig.hash.lsh.L1PStableHash L1PStableHash for an example
  * @author cstella
  *
@@ -37,14 +39,14 @@ public class L1 extends MetricUDF {
 
   /**
    * Create a new L1 Metric UDF with a given dimension.
-   * 
-   * @param sDim
+   *
+   * @param sDim dimension
    */
   public L1(String sDim) {
     super(sDim);
-   
+
   }
-  
+
   public static double distance(RealVector v1, RealVector v2) {
     return v1.getL1Distance(v2);
   }


Mime
View raw message