datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wvaug...@apache.org
Subject [51/51] [partial] git commit: DATAFU-20 Initial commit of website content
Date Mon, 27 Jan 2014 23:51:16 GMT
DATAFU-20 Initial commit of website content

Signed-off-by: William Vaughan <wvaughan@linkedin.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/424e3b48
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/424e3b48
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/424e3b48

Branch: refs/heads/master
Commit: 424e3b4857c0f2db13fcfd65812b08962b8e004a
Parents: 38a670e
Author: Matt Hayes <mhayes@linkedin.com>
Authored: Thu Jan 23 14:02:44 2014 -0800
Committer: William Vaughan <wvaughan@linkedin.com>
Committed: Mon Jan 27 15:10:59 2014 -0800

----------------------------------------------------------------------
 site/.gitignore                                 |   17 +
 site/Gemfile                                    |   24 +
 site/Gemfile.lock                               |  130 +
 site/README.md                                  |   31 +
 site/config.rb                                  |   87 +
 site/lib/pig.rb                                 |   73 +
 .../2012-01-10-introducing-datafu.html.markdown |  115 +
 ...-01-24-datafu-the-wd-40-of-big-data.markdown |  105 +
 site/source/blog/2013-09-04-datafu-1-0.markdown |  597 ++
 ...cremental-data-processing-in-hadoop.markdown |  444 ++
 site/source/blog/index.html.erb                 |   19 +
 .../docs/datafu/1.0.0/allclasses-frame.html     |  135 +
 .../docs/datafu/1.0.0/allclasses-noframe.html   |  135 +
 .../docs/datafu/1.0.0/constant-values.html      |  174 +
 .../1.0.0/datafu/pig/bags/AppendToBag.html      |  341 +
 .../datafu/1.0.0/datafu/pig/bags/BagConcat.html |  356 +
 .../datafu/1.0.0/datafu/pig/bags/BagGroup.html  |  355 +
 .../1.0.0/datafu/pig/bags/BagLeftOuterJoin.html |  361 +
 .../datafu/1.0.0/datafu/pig/bags/BagSplit.html  |  344 +
 .../datafu/1.0.0/datafu/pig/bags/CountEach.html |  404 +
 .../1.0.0/datafu/pig/bags/DistinctBy.html       |  390 +
 .../1.0.0/datafu/pig/bags/EmptyBagToNull.html   |  313 +
 .../datafu/pig/bags/EmptyBagToNullFields.html   |  328 +
 .../datafu/1.0.0/datafu/pig/bags/Enumerate.html |  407 +
 .../datafu/pig/bags/FirstTupleFromBag.html      |  340 +
 .../1.0.0/datafu/pig/bags/NullToEmptyBag.html   |  313 +
 .../1.0.0/datafu/pig/bags/PrependToBag.html     |  345 +
 .../1.0.0/datafu/pig/bags/ReverseEnumerate.html |  364 +
 .../1.0.0/datafu/pig/bags/UnorderedPairs.html   |  328 +
 .../datafu/pig/bags/class-use/AppendToBag.html  |  144 +
 .../datafu/pig/bags/class-use/BagConcat.html    |  144 +
 .../datafu/pig/bags/class-use/BagGroup.html     |  144 +
 .../pig/bags/class-use/BagLeftOuterJoin.html    |  144 +
 .../datafu/pig/bags/class-use/BagSplit.html     |  144 +
 .../datafu/pig/bags/class-use/CountEach.html    |  144 +
 .../datafu/pig/bags/class-use/DistinctBy.html   |  144 +
 .../pig/bags/class-use/EmptyBagToNull.html      |  144 +
 .../bags/class-use/EmptyBagToNullFields.html    |  144 +
 .../datafu/pig/bags/class-use/Enumerate.html    |  144 +
 .../pig/bags/class-use/FirstTupleFromBag.html   |  144 +
 .../pig/bags/class-use/NullToEmptyBag.html      |  144 +
 .../datafu/pig/bags/class-use/PrependToBag.html |  144 +
 .../pig/bags/class-use/ReverseEnumerate.html    |  144 +
 .../pig/bags/class-use/UnorderedPairs.html      |  144 +
 .../1.0.0/datafu/pig/bags/package-frame.html    |   60 +
 .../1.0.0/datafu/pig/bags/package-summary.html  |  233 +
 .../1.0.0/datafu/pig/bags/package-tree.html     |  164 +
 .../1.0.0/datafu/pig/bags/package-use.html      |  144 +
 .../datafu/pig/geo/HaversineDistInMiles.html    |  362 +
 .../pig/geo/class-use/HaversineDistInMiles.html |  144 +
 .../1.0.0/datafu/pig/geo/package-frame.html     |   32 +
 .../1.0.0/datafu/pig/geo/package-summary.html   |  173 +
 .../1.0.0/datafu/pig/geo/package-tree.html      |  157 +
 .../1.0.0/datafu/pig/geo/package-use.html       |  144 +
 .../docs/datafu/1.0.0/datafu/pig/hash/MD5.html  |  309 +
 .../1.0.0/datafu/pig/hash/class-use/MD5.html    |  144 +
 .../1.0.0/datafu/pig/hash/package-frame.html    |   32 +
 .../1.0.0/datafu/pig/hash/package-summary.html  |  172 +
 .../1.0.0/datafu/pig/hash/package-tree.html     |  157 +
 .../1.0.0/datafu/pig/hash/package-use.html      |  144 +
 .../1.0.0/datafu/pig/linkanalysis/PageRank.html |  478 ++
 .../datafu/pig/linkanalysis/PageRankImpl.html   |  848 +++
 .../pig/linkanalysis/class-use/PageRank.html    |  144 +
 .../linkanalysis/class-use/PageRankImpl.html    |  144 +
 .../datafu/pig/linkanalysis/package-frame.html  |   34 +
 .../pig/linkanalysis/package-summary.html       |  176 +
 .../datafu/pig/linkanalysis/package-tree.html   |  158 +
 .../datafu/pig/linkanalysis/package-use.html    |  144 +
 .../datafu/1.0.0/datafu/pig/random/RandInt.html |  326 +
 .../datafu/pig/random/class-use/RandInt.html    |  144 +
 .../1.0.0/datafu/pig/random/package-frame.html  |   32 +
 .../datafu/pig/random/package-summary.html      |  172 +
 .../1.0.0/datafu/pig/random/package-tree.html   |  157 +
 .../1.0.0/datafu/pig/random/package-use.html    |  144 +
 .../pig/sampling/ReservoirSample.Final.html     |  303 +
 .../pig/sampling/ReservoirSample.Initial.html   |  303 +
 .../sampling/ReservoirSample.Intermediate.html  |  303 +
 .../datafu/pig/sampling/ReservoirSample.html    |  491 ++
 .../1.0.0/datafu/pig/sampling/SampleByKey.html  |  376 +
 .../datafu/pig/sampling/WeightedSample.html     |  377 +
 .../class-use/ReservoirSample.Final.html        |  144 +
 .../class-use/ReservoirSample.Initial.html      |  144 +
 .../class-use/ReservoirSample.Intermediate.html |  144 +
 .../pig/sampling/class-use/ReservoirSample.html |  144 +
 .../pig/sampling/class-use/SampleByKey.html     |  144 +
 .../pig/sampling/class-use/WeightedSample.html  |  144 +
 .../datafu/pig/sampling/package-frame.html      |   42 +
 .../datafu/pig/sampling/package-summary.html    |  193 +
 .../1.0.0/datafu/pig/sampling/package-tree.html |  161 +
 .../1.0.0/datafu/pig/sampling/package-use.html  |  144 +
 .../1.0.0/datafu/pig/sessions/SessionCount.html |  370 +
 .../1.0.0/datafu/pig/sessions/Sessionize.html   |  401 +
 .../pig/sessions/class-use/SessionCount.html    |  144 +
 .../pig/sessions/class-use/Sessionize.html      |  144 +
 .../datafu/pig/sessions/package-frame.html      |   34 +
 .../datafu/pig/sessions/package-summary.html    |  177 +
 .../1.0.0/datafu/pig/sessions/package-tree.html |  158 +
 .../1.0.0/datafu/pig/sessions/package-use.html  |  144 +
 .../1.0.0/datafu/pig/sets/SetIntersect.html     |  337 +
 .../datafu/1.0.0/datafu/pig/sets/SetUnion.html  |  314 +
 .../datafu/pig/sets/class-use/SetIntersect.html |  144 +
 .../datafu/pig/sets/class-use/SetUnion.html     |  144 +
 .../1.0.0/datafu/pig/sets/package-frame.html    |   34 +
 .../1.0.0/datafu/pig/sets/package-summary.html  |  176 +
 .../1.0.0/datafu/pig/sets/package-tree.html     |  157 +
 .../1.0.0/datafu/pig/sets/package-use.html      |  144 +
 .../1.0.0/datafu/pig/stats/MarkovPairs.html     |  333 +
 .../datafu/1.0.0/datafu/pig/stats/Median.html   |  284 +
 .../datafu/1.0.0/datafu/pig/stats/Quantile.html |  383 +
 .../1.0.0/datafu/pig/stats/QuantileUtil.html    |  279 +
 .../1.0.0/datafu/pig/stats/StreamingMedian.html |  288 +
 .../datafu/pig/stats/StreamingQuantile.html     |  453 ++
 .../1.0.0/datafu/pig/stats/VAR.Final.html       |  289 +
 .../1.0.0/datafu/pig/stats/VAR.Initial.html     |  289 +
 .../datafu/pig/stats/VAR.Intermediate.html      |  289 +
 .../docs/datafu/1.0.0/datafu/pig/stats/VAR.html |  630 ++
 .../1.0.0/datafu/pig/stats/WilsonBinConf.html   |  382 +
 .../datafu/pig/stats/class-use/MarkovPairs.html |  144 +
 .../datafu/pig/stats/class-use/Median.html      |  144 +
 .../datafu/pig/stats/class-use/Quantile.html    |  181 +
 .../pig/stats/class-use/QuantileUtil.html       |  144 +
 .../pig/stats/class-use/StreamingMedian.html    |  144 +
 .../pig/stats/class-use/StreamingQuantile.html  |  181 +
 .../datafu/pig/stats/class-use/VAR.Final.html   |  144 +
 .../datafu/pig/stats/class-use/VAR.Initial.html |  144 +
 .../pig/stats/class-use/VAR.Intermediate.html   |  144 +
 .../1.0.0/datafu/pig/stats/class-use/VAR.html   |  144 +
 .../pig/stats/class-use/WilsonBinConf.html      |  144 +
 .../1.0.0/datafu/pig/stats/package-frame.html   |   52 +
 .../1.0.0/datafu/pig/stats/package-summary.html |  218 +
 .../1.0.0/datafu/pig/stats/package-tree.html    |  165 +
 .../1.0.0/datafu/pig/stats/package-use.html     |  178 +
 .../datafu/pig/urls/UserAgentClassify.html      |  295 +
 .../pig/urls/class-use/UserAgentClassify.html   |  144 +
 .../1.0.0/datafu/pig/urls/package-frame.html    |   32 +
 .../1.0.0/datafu/pig/urls/package-summary.html  |  172 +
 .../1.0.0/datafu/pig/urls/package-tree.html     |  157 +
 .../1.0.0/datafu/pig/urls/package-use.html      |  144 +
 .../datafu/pig/util/AliasableEvalFunc.html      |  783 ++
 .../datafu/1.0.0/datafu/pig/util/Assert.html    |  321 +
 .../datafu/1.0.0/datafu/pig/util/BoolToInt.html |  294 +
 .../datafu/1.0.0/datafu/pig/util/Coalesce.html  |  369 +
 .../datafu/pig/util/ContextualEvalFunc.html     |  359 +
 .../1.0.0/datafu/pig/util/DataFuException.html  |  402 +
 .../1.0.0/datafu/pig/util/FieldNotFound.html    |  291 +
 .../docs/datafu/1.0.0/datafu/pig/util/In.html   |  319 +
 .../datafu/1.0.0/datafu/pig/util/IntToBool.html |  294 +
 .../1.0.0/datafu/pig/util/SimpleEvalFunc.html   |  400 +
 .../datafu/pig/util/TransposeTupleToBag.html    |  356 +
 .../pig/util/class-use/AliasableEvalFunc.html   |  226 +
 .../1.0.0/datafu/pig/util/class-use/Assert.html |  144 +
 .../datafu/pig/util/class-use/BoolToInt.html    |  144 +
 .../datafu/pig/util/class-use/Coalesce.html     |  144 +
 .../pig/util/class-use/ContextualEvalFunc.html  |  243 +
 .../pig/util/class-use/DataFuException.html     |  144 +
 .../pig/util/class-use/FieldNotFound.html       |  144 +
 .../1.0.0/datafu/pig/util/class-use/In.html     |  144 +
 .../datafu/pig/util/class-use/IntToBool.html    |  144 +
 .../pig/util/class-use/SimpleEvalFunc.html      |  406 +
 .../pig/util/class-use/TransposeTupleToBag.html |  144 +
 .../1.0.0/datafu/pig/util/package-frame.html    |   61 +
 .../1.0.0/datafu/pig/util/package-summary.html  |  225 +
 .../1.0.0/datafu/pig/util/package-tree.html     |  178 +
 .../1.0.0/datafu/pig/util/package-use.html      |  310 +
 .../docs/datafu/1.0.0/deprecated-list.html      |  146 +
 site/source/docs/datafu/1.0.0/help-doc.html     |  223 +
 site/source/docs/datafu/1.0.0/index-all.html    |  979 +++
 site/source/docs/datafu/1.0.0/index.html        |   39 +
 .../docs/datafu/1.0.0/overview-frame.html       |   62 +
 .../docs/datafu/1.0.0/overview-summary.html     |  196 +
 .../source/docs/datafu/1.0.0/overview-tree.html |  188 +
 site/source/docs/datafu/1.0.0/package-list      |   11 +
 .../docs/datafu/1.0.0/resources/inherit.gif     |  Bin 0 -> 57 bytes
 .../docs/datafu/1.0.0/serialized-form.html      |  205 +
 site/source/docs/datafu/1.0.0/stylesheet.css    |   29 +
 .../docs/datafu/1.1.0/allclasses-frame.html     |  149 +
 .../docs/datafu/1.1.0/allclasses-noframe.html   |  149 +
 .../docs/datafu/1.1.0/constant-values.html      |  174 +
 .../1.1.0/datafu/pig/bags/AppendToBag.html      |  341 +
 .../datafu/1.1.0/datafu/pig/bags/BagConcat.html |  356 +
 .../datafu/1.1.0/datafu/pig/bags/BagGroup.html  |  355 +
 .../1.1.0/datafu/pig/bags/BagLeftOuterJoin.html |  361 +
 .../datafu/1.1.0/datafu/pig/bags/BagSplit.html  |  344 +
 .../datafu/1.1.0/datafu/pig/bags/CountEach.html |  404 +
 .../1.1.0/datafu/pig/bags/DistinctBy.html       |  390 +
 .../1.1.0/datafu/pig/bags/EmptyBagToNull.html   |  313 +
 .../datafu/pig/bags/EmptyBagToNullFields.html   |  328 +
 .../datafu/1.1.0/datafu/pig/bags/Enumerate.html |  407 +
 .../datafu/pig/bags/FirstTupleFromBag.html      |  340 +
 .../1.1.0/datafu/pig/bags/NullToEmptyBag.html   |  313 +
 .../1.1.0/datafu/pig/bags/PrependToBag.html     |  345 +
 .../1.1.0/datafu/pig/bags/ReverseEnumerate.html |  364 +
 .../1.1.0/datafu/pig/bags/UnorderedPairs.html   |  328 +
 .../datafu/pig/bags/class-use/AppendToBag.html  |  144 +
 .../datafu/pig/bags/class-use/BagConcat.html    |  144 +
 .../datafu/pig/bags/class-use/BagGroup.html     |  144 +
 .../pig/bags/class-use/BagLeftOuterJoin.html    |  144 +
 .../datafu/pig/bags/class-use/BagSplit.html     |  144 +
 .../datafu/pig/bags/class-use/CountEach.html    |  144 +
 .../datafu/pig/bags/class-use/DistinctBy.html   |  144 +
 .../pig/bags/class-use/EmptyBagToNull.html      |  144 +
 .../bags/class-use/EmptyBagToNullFields.html    |  144 +
 .../datafu/pig/bags/class-use/Enumerate.html    |  144 +
 .../pig/bags/class-use/FirstTupleFromBag.html   |  144 +
 .../pig/bags/class-use/NullToEmptyBag.html      |  144 +
 .../datafu/pig/bags/class-use/PrependToBag.html |  144 +
 .../pig/bags/class-use/ReverseEnumerate.html    |  144 +
 .../pig/bags/class-use/UnorderedPairs.html      |  144 +
 .../1.1.0/datafu/pig/bags/package-frame.html    |   60 +
 .../1.1.0/datafu/pig/bags/package-summary.html  |  233 +
 .../1.1.0/datafu/pig/bags/package-tree.html     |  164 +
 .../1.1.0/datafu/pig/bags/package-use.html      |  144 +
 .../datafu/pig/geo/HaversineDistInMiles.html    |  362 +
 .../pig/geo/class-use/HaversineDistInMiles.html |  144 +
 .../1.1.0/datafu/pig/geo/package-frame.html     |   32 +
 .../1.1.0/datafu/pig/geo/package-summary.html   |  173 +
 .../1.1.0/datafu/pig/geo/package-tree.html      |  157 +
 .../1.1.0/datafu/pig/geo/package-use.html       |  144 +
 .../docs/datafu/1.1.0/datafu/pig/hash/MD5.html  |  309 +
 .../docs/datafu/1.1.0/datafu/pig/hash/SHA.html  |  304 +
 .../1.1.0/datafu/pig/hash/class-use/MD5.html    |  144 +
 .../1.1.0/datafu/pig/hash/class-use/SHA.html    |  144 +
 .../1.1.0/datafu/pig/hash/package-frame.html    |   34 +
 .../1.1.0/datafu/pig/hash/package-summary.html  |  176 +
 .../1.1.0/datafu/pig/hash/package-tree.html     |  157 +
 .../1.1.0/datafu/pig/hash/package-use.html      |  144 +
 .../1.1.0/datafu/pig/linkanalysis/PageRank.html |  478 ++
 .../datafu/pig/linkanalysis/PageRankImpl.html   |  848 +++
 .../pig/linkanalysis/class-use/PageRank.html    |  144 +
 .../linkanalysis/class-use/PageRankImpl.html    |  144 +
 .../datafu/pig/linkanalysis/package-frame.html  |   34 +
 .../pig/linkanalysis/package-summary.html       |  176 +
 .../datafu/pig/linkanalysis/package-tree.html   |  158 +
 .../datafu/pig/linkanalysis/package-use.html    |  144 +
 .../datafu/1.1.0/datafu/pig/random/RandInt.html |  326 +
 .../datafu/pig/random/class-use/RandInt.html    |  144 +
 .../1.1.0/datafu/pig/random/package-frame.html  |   32 +
 .../datafu/pig/random/package-summary.html      |  172 +
 .../1.1.0/datafu/pig/random/package-tree.html   |  157 +
 .../1.1.0/datafu/pig/random/package-use.html    |  144 +
 .../pig/sampling/ReservoirSample.Final.html     |  303 +
 .../pig/sampling/ReservoirSample.Initial.html   |  303 +
 .../sampling/ReservoirSample.Intermediate.html  |  303 +
 .../datafu/pig/sampling/ReservoirSample.html    |  503 ++
 .../1.1.0/datafu/pig/sampling/SampleByKey.html  |  376 +
 .../pig/sampling/SimpleRandomSample.Final.html  |  303 +
 .../sampling/SimpleRandomSample.Initial.html    |  303 +
 .../SimpleRandomSample.Intermediate.html        |  303 +
 .../datafu/pig/sampling/SimpleRandomSample.html |  495 ++
 .../datafu/pig/sampling/WeightedSample.html     |  381 +
 .../class-use/ReservoirSample.Final.html        |  144 +
 .../class-use/ReservoirSample.Initial.html      |  144 +
 .../class-use/ReservoirSample.Intermediate.html |  144 +
 .../pig/sampling/class-use/ReservoirSample.html |  144 +
 .../pig/sampling/class-use/SampleByKey.html     |  144 +
 .../class-use/SimpleRandomSample.Final.html     |  144 +
 .../class-use/SimpleRandomSample.Initial.html   |  144 +
 .../SimpleRandomSample.Intermediate.html        |  144 +
 .../sampling/class-use/SimpleRandomSample.html  |  144 +
 .../pig/sampling/class-use/WeightedSample.html  |  144 +
 .../datafu/pig/sampling/package-frame.html      |   50 +
 .../datafu/pig/sampling/package-summary.html    |  209 +
 .../1.1.0/datafu/pig/sampling/package-tree.html |  164 +
 .../1.1.0/datafu/pig/sampling/package-use.html  |  144 +
 .../1.1.0/datafu/pig/sessions/SessionCount.html |  370 +
 .../1.1.0/datafu/pig/sessions/Sessionize.html   |  401 +
 .../pig/sessions/class-use/SessionCount.html    |  144 +
 .../pig/sessions/class-use/Sessionize.html      |  144 +
 .../datafu/pig/sessions/package-frame.html      |   34 +
 .../datafu/pig/sessions/package-summary.html    |  177 +
 .../1.1.0/datafu/pig/sessions/package-tree.html |  158 +
 .../1.1.0/datafu/pig/sessions/package-use.html  |  144 +
 .../1.1.0/datafu/pig/sets/SetIntersect.html     |  337 +
 .../datafu/1.1.0/datafu/pig/sets/SetUnion.html  |  314 +
 .../datafu/pig/sets/class-use/SetIntersect.html |  144 +
 .../datafu/pig/sets/class-use/SetUnion.html     |  144 +
 .../1.1.0/datafu/pig/sets/package-frame.html    |   34 +
 .../1.1.0/datafu/pig/sets/package-summary.html  |  176 +
 .../1.1.0/datafu/pig/sets/package-tree.html     |  157 +
 .../1.1.0/datafu/pig/sets/package-use.html      |  144 +
 .../1.1.0/datafu/pig/stats/MarkovPairs.html     |  333 +
 .../datafu/1.1.0/datafu/pig/stats/Median.html   |  284 +
 .../datafu/1.1.0/datafu/pig/stats/Quantile.html |  383 +
 .../1.1.0/datafu/pig/stats/QuantileUtil.html    |  279 +
 .../1.1.0/datafu/pig/stats/StreamingMedian.html |  288 +
 .../datafu/pig/stats/StreamingQuantile.html     |  453 ++
 .../1.1.0/datafu/pig/stats/VAR.Final.html       |  289 +
 .../1.1.0/datafu/pig/stats/VAR.Initial.html     |  289 +
 .../datafu/pig/stats/VAR.Intermediate.html      |  289 +
 .../docs/datafu/1.1.0/datafu/pig/stats/VAR.html |  630 ++
 .../1.1.0/datafu/pig/stats/WilsonBinConf.html   |  382 +
 .../datafu/pig/stats/class-use/MarkovPairs.html |  144 +
 .../datafu/pig/stats/class-use/Median.html      |  144 +
 .../datafu/pig/stats/class-use/Quantile.html    |  181 +
 .../pig/stats/class-use/QuantileUtil.html       |  144 +
 .../pig/stats/class-use/StreamingMedian.html    |  144 +
 .../pig/stats/class-use/StreamingQuantile.html  |  181 +
 .../datafu/pig/stats/class-use/VAR.Final.html   |  144 +
 .../datafu/pig/stats/class-use/VAR.Initial.html |  144 +
 .../pig/stats/class-use/VAR.Intermediate.html   |  144 +
 .../1.1.0/datafu/pig/stats/class-use/VAR.html   |  144 +
 .../pig/stats/class-use/WilsonBinConf.html      |  144 +
 .../1.1.0/datafu/pig/stats/package-frame.html   |   52 +
 .../1.1.0/datafu/pig/stats/package-summary.html |  218 +
 .../1.1.0/datafu/pig/stats/package-tree.html    |  165 +
 .../1.1.0/datafu/pig/stats/package-use.html     |  178 +
 .../datafu/pig/urls/UserAgentClassify.html      |  295 +
 .../pig/urls/class-use/UserAgentClassify.html   |  144 +
 .../1.1.0/datafu/pig/urls/package-frame.html    |   32 +
 .../1.1.0/datafu/pig/urls/package-summary.html  |  172 +
 .../1.1.0/datafu/pig/urls/package-tree.html     |  157 +
 .../1.1.0/datafu/pig/urls/package-use.html      |  144 +
 .../datafu/pig/util/AliasableEvalFunc.html      |  783 ++
 .../datafu/1.1.0/datafu/pig/util/Assert.html    |  281 +
 .../datafu/1.1.0/datafu/pig/util/AssertUDF.html |  324 +
 .../datafu/1.1.0/datafu/pig/util/BoolToInt.html |  294 +
 .../datafu/1.1.0/datafu/pig/util/Coalesce.html  |  369 +
 .../datafu/pig/util/ContextualEvalFunc.html     |  359 +
 .../1.1.0/datafu/pig/util/DataFuException.html  |  402 +
 .../1.1.0/datafu/pig/util/FieldNotFound.html    |  291 +
 .../docs/datafu/1.1.0/datafu/pig/util/In.html   |  281 +
 .../datafu/1.1.0/datafu/pig/util/InUDF.html     |  322 +
 .../datafu/1.1.0/datafu/pig/util/IntToBool.html |  294 +
 .../1.1.0/datafu/pig/util/SimpleEvalFunc.html   |  400 +
 .../datafu/pig/util/TransposeTupleToBag.html    |  356 +
 .../pig/util/class-use/AliasableEvalFunc.html   |  226 +
 .../1.1.0/datafu/pig/util/class-use/Assert.html |  144 +
 .../datafu/pig/util/class-use/AssertUDF.html    |  180 +
 .../datafu/pig/util/class-use/BoolToInt.html    |  144 +
 .../datafu/pig/util/class-use/Coalesce.html     |  144 +
 .../pig/util/class-use/ContextualEvalFunc.html  |  243 +
 .../pig/util/class-use/DataFuException.html     |  144 +
 .../pig/util/class-use/FieldNotFound.html       |  144 +
 .../1.1.0/datafu/pig/util/class-use/In.html     |  144 +
 .../1.1.0/datafu/pig/util/class-use/InUDF.html  |  180 +
 .../datafu/pig/util/class-use/IntToBool.html    |  144 +
 .../pig/util/class-use/SimpleEvalFunc.html      |  414 +
 .../pig/util/class-use/TransposeTupleToBag.html |  144 +
 .../1.1.0/datafu/pig/util/package-frame.html    |   65 +
 .../1.1.0/datafu/pig/util/package-summary.html  |  233 +
 .../1.1.0/datafu/pig/util/package-tree.html     |  182 +
 .../1.1.0/datafu/pig/util/package-use.html      |  323 +
 .../docs/datafu/1.1.0/deprecated-list.html      |  166 +
 site/source/docs/datafu/1.1.0/help-doc.html     |  223 +
 site/source/docs/datafu/1.1.0/index-all.html    | 1045 +++
 site/source/docs/datafu/1.1.0/index.html        |   39 +
 .../docs/datafu/1.1.0/overview-frame.html       |   62 +
 .../docs/datafu/1.1.0/overview-summary.html     |  196 +
 .../source/docs/datafu/1.1.0/overview-tree.html |  195 +
 site/source/docs/datafu/1.1.0/package-list      |   11 +
 .../docs/datafu/1.1.0/resources/inherit.gif     |  Bin 0 -> 57 bytes
 .../docs/datafu/1.1.0/serialized-form.html      |  205 +
 site/source/docs/datafu/1.1.0/stylesheet.css    |   29 +
 .../docs/datafu/1.2.0/allclasses-frame.html     |  163 +
 .../docs/datafu/1.2.0/allclasses-noframe.html   |  163 +
 .../docs/datafu/1.2.0/constant-values.html      |  234 +
 .../1.2.0/datafu/pig/bags/AppendToBag.html      |  341 +
 .../datafu/1.2.0/datafu/pig/bags/BagConcat.html |  356 +
 .../datafu/1.2.0/datafu/pig/bags/BagGroup.html  |  355 +
 .../1.2.0/datafu/pig/bags/BagLeftOuterJoin.html |  361 +
 .../datafu/1.2.0/datafu/pig/bags/BagSplit.html  |  344 +
 .../datafu/1.2.0/datafu/pig/bags/CountEach.html |  404 +
 .../1.2.0/datafu/pig/bags/DistinctBy.html       |  390 +
 .../1.2.0/datafu/pig/bags/EmptyBagToNull.html   |  313 +
 .../datafu/pig/bags/EmptyBagToNullFields.html   |  328 +
 .../datafu/1.2.0/datafu/pig/bags/Enumerate.html |  407 +
 .../datafu/pig/bags/FirstTupleFromBag.html      |  340 +
 .../1.2.0/datafu/pig/bags/NullToEmptyBag.html   |  313 +
 .../1.2.0/datafu/pig/bags/PrependToBag.html     |  345 +
 .../1.2.0/datafu/pig/bags/ReverseEnumerate.html |  364 +
 .../1.2.0/datafu/pig/bags/UnorderedPairs.html   |  328 +
 .../datafu/pig/bags/class-use/AppendToBag.html  |  144 +
 .../datafu/pig/bags/class-use/BagConcat.html    |  144 +
 .../datafu/pig/bags/class-use/BagGroup.html     |  144 +
 .../pig/bags/class-use/BagLeftOuterJoin.html    |  144 +
 .../datafu/pig/bags/class-use/BagSplit.html     |  144 +
 .../datafu/pig/bags/class-use/CountEach.html    |  144 +
 .../datafu/pig/bags/class-use/DistinctBy.html   |  144 +
 .../pig/bags/class-use/EmptyBagToNull.html      |  144 +
 .../bags/class-use/EmptyBagToNullFields.html    |  144 +
 .../datafu/pig/bags/class-use/Enumerate.html    |  144 +
 .../pig/bags/class-use/FirstTupleFromBag.html   |  144 +
 .../pig/bags/class-use/NullToEmptyBag.html      |  144 +
 .../datafu/pig/bags/class-use/PrependToBag.html |  144 +
 .../pig/bags/class-use/ReverseEnumerate.html    |  144 +
 .../pig/bags/class-use/UnorderedPairs.html      |  144 +
 .../1.2.0/datafu/pig/bags/package-frame.html    |   60 +
 .../1.2.0/datafu/pig/bags/package-summary.html  |  233 +
 .../1.2.0/datafu/pig/bags/package-tree.html     |  164 +
 .../1.2.0/datafu/pig/bags/package-use.html      |  144 +
 .../datafu/pig/geo/HaversineDistInMiles.html    |  362 +
 .../pig/geo/class-use/HaversineDistInMiles.html |  144 +
 .../1.2.0/datafu/pig/geo/package-frame.html     |   32 +
 .../1.2.0/datafu/pig/geo/package-summary.html   |  173 +
 .../1.2.0/datafu/pig/geo/package-tree.html      |  157 +
 .../1.2.0/datafu/pig/geo/package-use.html       |  144 +
 .../docs/datafu/1.2.0/datafu/pig/hash/MD5.html  |  309 +
 .../docs/datafu/1.2.0/datafu/pig/hash/SHA.html  |  304 +
 .../1.2.0/datafu/pig/hash/class-use/MD5.html    |  144 +
 .../1.2.0/datafu/pig/hash/class-use/SHA.html    |  144 +
 .../1.2.0/datafu/pig/hash/package-frame.html    |   34 +
 .../1.2.0/datafu/pig/hash/package-summary.html  |  176 +
 .../1.2.0/datafu/pig/hash/package-tree.html     |  157 +
 .../1.2.0/datafu/pig/hash/package-use.html      |  144 +
 .../1.2.0/datafu/pig/linkanalysis/PageRank.html |  478 ++
 .../datafu/pig/linkanalysis/PageRankImpl.html   |  848 +++
 .../pig/linkanalysis/class-use/PageRank.html    |  144 +
 .../linkanalysis/class-use/PageRankImpl.html    |  144 +
 .../datafu/pig/linkanalysis/package-frame.html  |   34 +
 .../pig/linkanalysis/package-summary.html       |  176 +
 .../datafu/pig/linkanalysis/package-tree.html   |  158 +
 .../datafu/pig/linkanalysis/package-use.html    |  144 +
 .../datafu/1.2.0/datafu/pig/random/RandInt.html |  326 +
 .../datafu/pig/random/class-use/RandInt.html    |  144 +
 .../1.2.0/datafu/pig/random/package-frame.html  |   32 +
 .../datafu/pig/random/package-summary.html      |  172 +
 .../1.2.0/datafu/pig/random/package-tree.html   |  157 +
 .../1.2.0/datafu/pig/random/package-use.html    |  144 +
 .../pig/sampling/ReservoirSample.Final.html     |  303 +
 .../pig/sampling/ReservoirSample.Initial.html   |  303 +
 .../sampling/ReservoirSample.Intermediate.html  |  303 +
 .../datafu/pig/sampling/ReservoirSample.html    |  503 ++
 .../1.2.0/datafu/pig/sampling/SampleByKey.html  |  376 +
 .../pig/sampling/SimpleRandomSample.Final.html  |  303 +
 .../sampling/SimpleRandomSample.Initial.html    |  303 +
 .../SimpleRandomSample.Intermediate.html        |  303 +
 .../datafu/pig/sampling/SimpleRandomSample.html |  450 ++
 ...eRandomSampleWithReplacementElect.Final.html |  289 +
 ...andomSampleWithReplacementElect.Initial.html |  289 +
 ...SampleWithReplacementElect.Intermediate.html |  289 +
 .../SimpleRandomSampleWithReplacementElect.html |  479 ++
 .../SimpleRandomSampleWithReplacementVote.html  |  508 ++
 .../datafu/pig/sampling/WeightedSample.html     |  381 +
 .../class-use/ReservoirSample.Final.html        |  144 +
 .../class-use/ReservoirSample.Initial.html      |  144 +
 .../class-use/ReservoirSample.Intermediate.html |  144 +
 .../pig/sampling/class-use/ReservoirSample.html |  144 +
 .../pig/sampling/class-use/SampleByKey.html     |  144 +
 .../class-use/SimpleRandomSample.Final.html     |  144 +
 .../class-use/SimpleRandomSample.Initial.html   |  144 +
 .../SimpleRandomSample.Intermediate.html        |  144 +
 .../sampling/class-use/SimpleRandomSample.html  |  144 +
 ...eRandomSampleWithReplacementElect.Final.html |  144 +
 ...andomSampleWithReplacementElect.Initial.html |  144 +
 ...SampleWithReplacementElect.Intermediate.html |  144 +
 .../SimpleRandomSampleWithReplacementElect.html |  144 +
 .../SimpleRandomSampleWithReplacementVote.html  |  144 +
 .../pig/sampling/class-use/WeightedSample.html  |  144 +
 .../datafu/pig/sampling/package-frame.html      |   60 +
 .../datafu/pig/sampling/package-summary.html    |  230 +
 .../1.2.0/datafu/pig/sampling/package-tree.html |  164 +
 .../1.2.0/datafu/pig/sampling/package-use.html  |  144 +
 .../1.2.0/datafu/pig/sessions/SessionCount.html |  370 +
 .../1.2.0/datafu/pig/sessions/Sessionize.html   |  401 +
 .../pig/sessions/class-use/SessionCount.html    |  144 +
 .../pig/sessions/class-use/Sessionize.html      |  144 +
 .../datafu/pig/sessions/package-frame.html      |   34 +
 .../datafu/pig/sessions/package-summary.html    |  177 +
 .../1.2.0/datafu/pig/sessions/package-tree.html |  158 +
 .../1.2.0/datafu/pig/sessions/package-use.html  |  144 +
 .../1.2.0/datafu/pig/sets/SetDifference.html    |  348 +
 .../1.2.0/datafu/pig/sets/SetIntersect.html     |  337 +
 .../datafu/1.2.0/datafu/pig/sets/SetUnion.html  |  314 +
 .../pig/sets/class-use/SetDifference.html       |  144 +
 .../datafu/pig/sets/class-use/SetIntersect.html |  144 +
 .../datafu/pig/sets/class-use/SetUnion.html     |  144 +
 .../1.2.0/datafu/pig/sets/package-frame.html    |   36 +
 .../1.2.0/datafu/pig/sets/package-summary.html  |  180 +
 .../1.2.0/datafu/pig/sets/package-tree.html     |  157 +
 .../1.2.0/datafu/pig/sets/package-use.html      |  144 +
 .../datafu/pig/stats/HyperLogLogPlusPlus.html   |  403 +
 .../1.2.0/datafu/pig/stats/MarkovPairs.html     |  333 +
 .../datafu/1.2.0/datafu/pig/stats/Median.html   |  284 +
 .../datafu/1.2.0/datafu/pig/stats/Quantile.html |  383 +
 .../1.2.0/datafu/pig/stats/QuantileUtil.html    |  279 +
 .../1.2.0/datafu/pig/stats/StreamingMedian.html |  288 +
 .../datafu/pig/stats/StreamingQuantile.html     |  453 ++
 .../1.2.0/datafu/pig/stats/VAR.Final.html       |  289 +
 .../1.2.0/datafu/pig/stats/VAR.Initial.html     |  289 +
 .../datafu/pig/stats/VAR.Intermediate.html      |  289 +
 .../docs/datafu/1.2.0/datafu/pig/stats/VAR.html |  630 ++
 .../1.2.0/datafu/pig/stats/WilsonBinConf.html   |  382 +
 .../stats/class-use/HyperLogLogPlusPlus.html    |  144 +
 .../datafu/pig/stats/class-use/MarkovPairs.html |  144 +
 .../datafu/pig/stats/class-use/Median.html      |  144 +
 .../datafu/pig/stats/class-use/Quantile.html    |  181 +
 .../pig/stats/class-use/QuantileUtil.html       |  144 +
 .../pig/stats/class-use/StreamingMedian.html    |  144 +
 .../pig/stats/class-use/StreamingQuantile.html  |  181 +
 .../datafu/pig/stats/class-use/VAR.Final.html   |  144 +
 .../datafu/pig/stats/class-use/VAR.Initial.html |  144 +
 .../pig/stats/class-use/VAR.Intermediate.html   |  144 +
 .../1.2.0/datafu/pig/stats/class-use/VAR.html   |  144 +
 .../pig/stats/class-use/WilsonBinConf.html      |  144 +
 .../1.2.0/datafu/pig/stats/package-frame.html   |   54 +
 .../1.2.0/datafu/pig/stats/package-summary.html |  222 +
 .../1.2.0/datafu/pig/stats/package-tree.html    |  165 +
 .../1.2.0/datafu/pig/stats/package-use.html     |  178 +
 .../datafu/pig/urls/UserAgentClassify.html      |  295 +
 .../pig/urls/class-use/UserAgentClassify.html   |  144 +
 .../1.2.0/datafu/pig/urls/package-frame.html    |   32 +
 .../1.2.0/datafu/pig/urls/package-summary.html  |  172 +
 .../1.2.0/datafu/pig/urls/package-tree.html     |  157 +
 .../1.2.0/datafu/pig/urls/package-use.html      |  144 +
 .../datafu/pig/util/AliasableEvalFunc.html      |  783 ++
 .../datafu/1.2.0/datafu/pig/util/Assert.html    |  281 +
 .../datafu/1.2.0/datafu/pig/util/AssertUDF.html |  324 +
 .../datafu/1.2.0/datafu/pig/util/BoolToInt.html |  294 +
 .../datafu/1.2.0/datafu/pig/util/Coalesce.html  |  369 +
 .../datafu/pig/util/ContextualEvalFunc.html     |  359 +
 .../1.2.0/datafu/pig/util/DataFuException.html  |  402 +
 .../1.2.0/datafu/pig/util/FieldNotFound.html    |  291 +
 .../docs/datafu/1.2.0/datafu/pig/util/In.html   |  281 +
 .../datafu/1.2.0/datafu/pig/util/InUDF.html     |  322 +
 .../datafu/1.2.0/datafu/pig/util/IntToBool.html |  294 +
 .../1.2.0/datafu/pig/util/SimpleEvalFunc.html   |  400 +
 .../datafu/pig/util/TransposeTupleToBag.html    |  356 +
 .../pig/util/class-use/AliasableEvalFunc.html   |  226 +
 .../1.2.0/datafu/pig/util/class-use/Assert.html |  144 +
 .../datafu/pig/util/class-use/AssertUDF.html    |  180 +
 .../datafu/pig/util/class-use/BoolToInt.html    |  144 +
 .../datafu/pig/util/class-use/Coalesce.html     |  144 +
 .../pig/util/class-use/ContextualEvalFunc.html  |  243 +
 .../pig/util/class-use/DataFuException.html     |  144 +
 .../pig/util/class-use/FieldNotFound.html       |  144 +
 .../1.2.0/datafu/pig/util/class-use/In.html     |  144 +
 .../1.2.0/datafu/pig/util/class-use/InUDF.html  |  180 +
 .../datafu/pig/util/class-use/IntToBool.html    |  144 +
 .../pig/util/class-use/SimpleEvalFunc.html      |  414 +
 .../pig/util/class-use/TransposeTupleToBag.html |  144 +
 .../1.2.0/datafu/pig/util/package-frame.html    |   65 +
 .../1.2.0/datafu/pig/util/package-summary.html  |  233 +
 .../1.2.0/datafu/pig/util/package-tree.html     |  182 +
 .../1.2.0/datafu/pig/util/package-use.html      |  323 +
 .../docs/datafu/1.2.0/deprecated-list.html      |  166 +
 site/source/docs/datafu/1.2.0/help-doc.html     |  223 +
 site/source/docs/datafu/1.2.0/index-all.html    | 1134 +++
 site/source/docs/datafu/1.2.0/index.html        |   39 +
 .../docs/datafu/1.2.0/overview-frame.html       |   62 +
 .../docs/datafu/1.2.0/overview-summary.html     |  196 +
 .../source/docs/datafu/1.2.0/overview-tree.html |  195 +
 site/source/docs/datafu/1.2.0/package-list      |   11 +
 .../docs/datafu/1.2.0/resources/inherit.gif     |  Bin 0 -> 57 bytes
 .../docs/datafu/1.2.0/serialized-form.html      |  205 +
 site/source/docs/datafu/1.2.0/stylesheet.css    |   29 +
 .../docs/datafu/contributing.html.markdown      |   51 +
 .../datafu/getting-started.html.markdown.erb    |  134 +
 site/source/docs/datafu/guide.html.markdown.erb |   40 +
 .../guide/bag-operations.html.markdown.erb      |  174 +
 .../datafu/guide/estimation.html.markdown.erb   |   48 +
 .../docs/datafu/guide/hashing.html.markdown.erb |   58 +
 .../guide/link-analysis.html.markdown.erb       |   34 +
 .../more-tips-and-tricks.html.markdown.erb      |  128 +
 .../datafu/guide/sampling.html.markdown.erb     |  240 +
 .../datafu/guide/sessions.html.markdown.erb     |  141 +
 .../guide/set-operations.html.markdown.erb      |   72 +
 .../datafu/guide/statistics.html.markdown.erb   |   65 +
 .../docs/datafu/javadoc.html.markdown.erb       |   14 +
 .../docs/hourglass/0.1.3/allclasses-frame.html  |  151 +
 .../hourglass/0.1.3/allclasses-noframe.html     |  151 +
 .../docs/hourglass/0.1.3/constant-values.html   |  222 +
 .../hourglass/avro/AvroDateRangeMetadata.html   |  350 +
 .../AvroKeyValueWithMetadataOutputFormat.html   |  333 +
 .../AvroKeyValueWithMetadataRecordWriter.html   |  375 +
 ...etadataOutputFormat.RecordWriterFactory.html |  274 +
 .../avro/AvroKeyWithMetadataOutputFormat.html   |  359 +
 .../avro/AvroKeyWithMetadataRecordWriter.html   |  347 +
 .../avro/AvroMultipleInputsKeyInputFormat.html  |  304 +
 .../hourglass/avro/AvroMultipleInputsUtil.html  |  292 +
 ...InputFormat.CombinedAvroKeyRecordReader.html |  293 +
 .../avro/CombinedAvroKeyInputFormat.html        |  336 +
 .../avro/class-use/AvroDateRangeMetadata.html   |  144 +
 .../AvroKeyValueWithMetadataOutputFormat.html   |  144 +
 .../AvroKeyValueWithMetadataRecordWriter.html   |  144 +
 ...etadataOutputFormat.RecordWriterFactory.html |  178 +
 .../AvroKeyWithMetadataOutputFormat.html        |  144 +
 .../AvroKeyWithMetadataRecordWriter.html        |  144 +
 .../AvroMultipleInputsKeyInputFormat.html       |  144 +
 .../avro/class-use/AvroMultipleInputsUtil.html  |  144 +
 ...InputFormat.CombinedAvroKeyRecordReader.html |  144 +
 .../class-use/CombinedAvroKeyInputFormat.html   |  144 +
 .../datafu/hourglass/avro/package-frame.html    |   50 +
 .../datafu/hourglass/avro/package-summary.html  |  210 +
 .../datafu/hourglass/avro/package-tree.html     |  174 +
 .../datafu/hourglass/avro/package-use.html      |  170 +
 .../0.1.3/datafu/hourglass/fs/DatePath.html     |  426 ++
 .../0.1.3/datafu/hourglass/fs/DateRange.html    |  281 +
 .../0.1.3/datafu/hourglass/fs/PathUtils.html    |  591 ++
 .../datafu/hourglass/fs/class-use/DatePath.html |  389 +
 .../hourglass/fs/class-use/DateRange.html       |  301 +
 .../hourglass/fs/class-use/PathUtils.html       |  144 +
 .../datafu/hourglass/fs/package-frame.html      |   36 +
 .../datafu/hourglass/fs/package-summary.html    |  182 +
 .../0.1.3/datafu/hourglass/fs/package-tree.html |  154 +
 .../0.1.3/datafu/hourglass/fs/package-use.html  |  233 +
 .../datafu/hourglass/jobs/AbstractJob.html      |  904 +++
 .../AbstractNonIncrementalJob.BaseCombiner.html |  264 +
 .../AbstractNonIncrementalJob.BaseMapper.html   |  264 +
 .../AbstractNonIncrementalJob.BaseReducer.html  |  264 +
 .../jobs/AbstractNonIncrementalJob.Report.html  |  355 +
 .../jobs/AbstractNonIncrementalJob.html         |  585 ++
 ...artitionCollapsingIncrementalJob.Report.html |  400 +
 ...stractPartitionCollapsingIncrementalJob.html |  801 ++
 ...artitionPreservingIncrementalJob.Report.html |  355 +
 ...stractPartitionPreservingIncrementalJob.html |  678 ++
 .../hourglass/jobs/DateRangeConfigurable.html   |  219 +
 .../datafu/hourglass/jobs/DateRangePlanner.html |  271 +
 .../datafu/hourglass/jobs/ExecutionPlanner.html |  799 ++
 .../datafu/hourglass/jobs/FileCleaner.html      |  309 +
 .../datafu/hourglass/jobs/IncrementalJob.html   |  591 ++
 .../jobs/MaxInputDataExceededException.html     |  253 +
 .../PartitionCollapsingExecutionPlanner.html    |  561 ++
 .../jobs/PartitionCollapsingIncrementalJob.html |  784 ++
 .../PartitionPreservingExecutionPlanner.html    |  437 ++
 .../jobs/PartitionPreservingIncrementalJob.html |  660 ++
 .../datafu/hourglass/jobs/ReduceEstimator.html  |  329 +
 .../0.1.3/datafu/hourglass/jobs/Setup.html      |  217 +
 .../datafu/hourglass/jobs/StagedOutputJob.html  |  533 ++
 .../datafu/hourglass/jobs/TimeBasedJob.html     |  527 ++
 .../datafu/hourglass/jobs/TimePartitioner.html  |  383 +
 .../hourglass/jobs/class-use/AbstractJob.html   |  231 +
 .../AbstractNonIncrementalJob.BaseCombiner.html |  180 +
 .../AbstractNonIncrementalJob.BaseMapper.html   |  180 +
 .../AbstractNonIncrementalJob.BaseReducer.html  |  180 +
 .../AbstractNonIncrementalJob.Report.html       |  180 +
 .../class-use/AbstractNonIncrementalJob.html    |  144 +
 ...artitionCollapsingIncrementalJob.Report.html |  180 +
 ...stractPartitionCollapsingIncrementalJob.html |  180 +
 ...artitionPreservingIncrementalJob.Report.html |  180 +
 ...stractPartitionPreservingIncrementalJob.html |  180 +
 .../jobs/class-use/DateRangeConfigurable.html   |  188 +
 .../jobs/class-use/DateRangePlanner.html        |  144 +
 .../jobs/class-use/ExecutionPlanner.html        |  188 +
 .../hourglass/jobs/class-use/FileCleaner.html   |  144 +
 .../jobs/class-use/IncrementalJob.html          |  206 +
 .../MaxInputDataExceededException.html          |  144 +
 .../PartitionCollapsingExecutionPlanner.html    |  144 +
 .../PartitionCollapsingIncrementalJob.html      |  144 +
 .../PartitionPreservingExecutionPlanner.html    |  144 +
 .../PartitionPreservingIncrementalJob.html      |  144 +
 .../jobs/class-use/ReduceEstimator.html         |  144 +
 .../datafu/hourglass/jobs/class-use/Setup.html  |  188 +
 .../jobs/class-use/StagedOutputJob.html         |  185 +
 .../hourglass/jobs/class-use/TimeBasedJob.html  |  223 +
 .../jobs/class-use/TimePartitioner.html         |  144 +
 .../datafu/hourglass/jobs/package-frame.html    |   89 +
 .../datafu/hourglass/jobs/package-summary.html  |  329 +
 .../datafu/hourglass/jobs/package-tree.html     |  188 +
 .../datafu/hourglass/jobs/package-use.html      |  271 +
 .../mapreduce/AvroKeyValueIdentityMapper.html   |  304 +
 .../hourglass/mapreduce/CollapsingCombiner.html |  463 ++
 .../hourglass/mapreduce/CollapsingMapper.html   |  458 ++
 .../hourglass/mapreduce/CollapsingReducer.html  |  553 ++
 .../hourglass/mapreduce/DelegatingCombiner.html |  355 +
 .../hourglass/mapreduce/DelegatingMapper.html   |  355 +
 .../hourglass/mapreduce/DelegatingReducer.html  |  355 +
 .../mapreduce/DistributedCacheHelper.html       |  296 +
 .../hourglass/mapreduce/ObjectMapper.html       |  280 +
 .../hourglass/mapreduce/ObjectProcessor.html    |  306 +
 .../hourglass/mapreduce/ObjectReducer.html      |  283 +
 .../datafu/hourglass/mapreduce/Parameters.html  |  305 +
 .../mapreduce/PartitioningCombiner.html         |  340 +
 .../hourglass/mapreduce/PartitioningMapper.html |  412 +
 .../mapreduce/PartitioningReducer.html          |  446 ++
 .../class-use/AvroKeyValueIdentityMapper.html   |  144 +
 .../mapreduce/class-use/CollapsingCombiner.html |  144 +
 .../mapreduce/class-use/CollapsingMapper.html   |  144 +
 .../mapreduce/class-use/CollapsingReducer.html  |  144 +
 .../mapreduce/class-use/DelegatingCombiner.html |  144 +
 .../mapreduce/class-use/DelegatingMapper.html   |  144 +
 .../mapreduce/class-use/DelegatingReducer.html  |  144 +
 .../class-use/DistributedCacheHelper.html       |  144 +
 .../mapreduce/class-use/ObjectMapper.html       |  217 +
 .../mapreduce/class-use/ObjectProcessor.html    |  237 +
 .../mapreduce/class-use/ObjectReducer.html      |  241 +
 .../mapreduce/class-use/Parameters.html         |  144 +
 .../class-use/PartitioningCombiner.html         |  144 +
 .../mapreduce/class-use/PartitioningMapper.html |  144 +
 .../class-use/PartitioningReducer.html          |  144 +
 .../hourglass/mapreduce/package-frame.html      |   60 +
 .../hourglass/mapreduce/package-summary.html    |  232 +
 .../hourglass/mapreduce/package-tree.html       |  169 +
 .../datafu/hourglass/mapreduce/package-use.html |  209 +
 .../datafu/hourglass/model/Accumulator.html     |  277 +
 .../hourglass/model/KeyValueCollector.html      |  225 +
 .../0.1.3/datafu/hourglass/model/Mapper.html    |  231 +
 .../0.1.3/datafu/hourglass/model/Merger.html    |  231 +
 .../hourglass/model/class-use/Accumulator.html  |  377 +
 .../model/class-use/KeyValueCollector.html      |  181 +
 .../hourglass/model/class-use/Mapper.html       |  289 +
 .../hourglass/model/class-use/Merger.html       |  265 +
 .../datafu/hourglass/model/package-frame.html   |   38 +
 .../datafu/hourglass/model/package-summary.html |  184 +
 .../datafu/hourglass/model/package-tree.html    |  153 +
 .../datafu/hourglass/model/package-use.html     |  232 +
 .../schemas/PartitionCollapsingSchemas.html     |  448 ++
 .../schemas/PartitionPreservingSchemas.html     |  426 ++
 .../hourglass/schemas/TaskSchemas.Builder.html  |  312 +
 .../datafu/hourglass/schemas/TaskSchemas.html   |  288 +
 .../class-use/PartitionCollapsingSchemas.html   |  220 +
 .../class-use/PartitionPreservingSchemas.html   |  212 +
 .../schemas/class-use/TaskSchemas.Builder.html  |  196 +
 .../schemas/class-use/TaskSchemas.html          |  235 +
 .../datafu/hourglass/schemas/package-frame.html |   38 +
 .../hourglass/schemas/package-summary.html      |  186 +
 .../datafu/hourglass/schemas/package-tree.html  |  155 +
 .../datafu/hourglass/schemas/package-use.html   |  220 +
 .../docs/hourglass/0.1.3/deprecated-list.html   |  146 +
 site/source/docs/hourglass/0.1.3/help-doc.html  |  223 +
 site/source/docs/hourglass/0.1.3/index-all.html | 1439 ++++
 site/source/docs/hourglass/0.1.3/index.html     |   39 +
 .../docs/hourglass/0.1.3/overview-frame.html    |   52 +
 .../docs/hourglass/0.1.3/overview-summary.html  |  188 +
 .../docs/hourglass/0.1.3/overview-tree.html     |  225 +
 site/source/docs/hourglass/0.1.3/package-list   |    6 +
 .../docs/hourglass/0.1.3/resources/inherit.gif  |  Bin 0 -> 57 bytes
 .../docs/hourglass/0.1.3/serialized-form.html   |  592 ++
 site/source/docs/hourglass/0.1.3/stylesheet.css |   29 +
 .../docs/hourglass/concepts.html.markdown       |   52 +
 .../docs/hourglass/contributing.html.markdown   |   19 +
 .../hourglass/getting-started.html.markdown.erb |  211 +
 .../docs/hourglass/javadoc.html.markdown.erb    |    9 +
 .../fonts/glyphicons-halflings-regular.eot      |  Bin 0 -> 20290 bytes
 .../fonts/glyphicons-halflings-regular.svg      |  229 +
 .../fonts/glyphicons-halflings-regular.ttf      |  Bin 0 -> 41236 bytes
 .../fonts/glyphicons-halflings-regular.woff     |  Bin 0 -> 23292 bytes
 .../images/Hourglass-Concepts-Collapsing.png    |  Bin 0 -> 7310 bytes
 .../Hourglass-Concepts-CollapsingReuse.png      |  Bin 0 -> 8282 bytes
 .../images/Hourglass-Concepts-Preserving.png    |  Bin 0 -> 7421 bytes
 site/source/images/Hourglass-Example1-Step1.png |  Bin 0 -> 7360 bytes
 site/source/images/Hourglass-Example1-Step2.png |  Bin 0 -> 8467 bytes
 .../Hourglass-Example2-DistinctMembers.png      |  Bin 0 -> 17701 bytes
 .../images/Hourglass-MapCombineReduce.png       |  Bin 0 -> 6849 bytes
 site/source/images/boxplot.png                  |  Bin 0 -> 22933 bytes
 site/source/images/egg-logo.png                 |  Bin 0 -> 8626 bytes
 .../images/glyphicons-halflings-regular.eot     |  Bin 0 -> 20290 bytes
 .../images/glyphicons-halflings-regular.svg     |  229 +
 .../images/glyphicons-halflings-regular.ttf     |  Bin 0 -> 41236 bytes
 .../images/glyphicons-halflings-regular.woff    |  Bin 0 -> 23292 bytes
 site/source/index.markdown.erb                  |   43 +
 site/source/javascripts/all.js                  |    2 +
 site/source/javascripts/bootstrap.js            | 2006 +++++
 site/source/javascripts/bootstrap.min.js        |    7 +
 site/source/javascripts/jquery-2.0.3.min.js     |    6 +
 site/source/layouts/_docs_nav.erb               |   15 +
 site/source/layouts/_footer.erb                 |    9 +
 site/source/layouts/_header.erb                 |   10 +
 site/source/layouts/blog.erb                    |   16 +
 site/source/layouts/docs.erb                    |   17 +
 site/source/layouts/layout.erb                  |   44 +
 site/source/sitemap.xml.builder                 |   15 +
 site/source/stylesheets/all.less                |   52 +
 site/source/stylesheets/bootstrap-theme.css     |  397 +
 site/source/stylesheets/bootstrap-theme.min.css |    7 +
 site/source/stylesheets/bootstrap.css           | 7118 ++++++++++++++++++
 site/source/stylesheets/bootstrap.min.css       |    7 +
 site/source/stylesheets/highlight.css.erb       |    1 +
 site/source/stylesheets/normalize.css           |  375 +
 758 files changed, 176299 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/.gitignore
----------------------------------------------------------------------
diff --git a/site/.gitignore b/site/.gitignore
new file mode 100644
index 0000000..c779352
--- /dev/null
+++ b/site/.gitignore
@@ -0,0 +1,17 @@
+# See http://help.github.com/ignore-files/ for more about ignoring files.
+#
+# If you find yourself ignoring temporary files generated by your text editor
+# or operating system, you probably want to add a global ignore instead:
+# git config --global core.excludesfile ~/.gitignore_global
+
+# Ignore bundler config
+/.bundle
+
+# Ignore the build directory
+/build
+
+# Ignore Sass' cache
+/.sass-cache
+
+# Ignore .DS_store file
+.DS_Store

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/Gemfile
----------------------------------------------------------------------
diff --git a/site/Gemfile b/site/Gemfile
new file mode 100644
index 0000000..0daab67
--- /dev/null
+++ b/site/Gemfile
@@ -0,0 +1,24 @@
+# If you have OpenSSL installed, we recommend updating
+# the following line to use "https"
+source 'http://rubygems.org'
+
+gem "middleman", "~>3.2.0"
+
+# Live-reloading plugin
+gem "middleman-livereload", "~> 3.1.0"
+
+gem "middleman-blog"
+
+gem "middleman-syntax"
+
+gem "nokogiri"
+
+gem "builder"
+
+gem "redcarpet"
+
+gem 'therubyracer'
+gem "less"
+
+# For faster file watcher updates on Windows:
+gem "wdm", "~> 0.1.0", :platforms => [:mswin, :mingw]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/Gemfile.lock
----------------------------------------------------------------------
diff --git a/site/Gemfile.lock b/site/Gemfile.lock
new file mode 100644
index 0000000..4aabe41
--- /dev/null
+++ b/site/Gemfile.lock
@@ -0,0 +1,130 @@
+GEM
+  remote: http://rubygems.org/
+  specs:
+    activesupport (3.2.16)
+      i18n (~> 0.6, >= 0.6.4)
+      multi_json (~> 1.0)
+    addressable (2.3.5)
+    atomic (1.1.14)
+    builder (3.1.4)
+    chunky_png (1.2.9)
+    coffee-script (2.2.0)
+      coffee-script-source
+      execjs
+    coffee-script-source (1.6.3)
+    commonjs (0.2.6)
+    compass (0.12.2)
+      chunky_png (~> 1.2)
+      fssm (>= 0.2.7)
+      sass (~> 3.1)
+    em-websocket (0.5.0)
+      eventmachine (>= 0.12.9)
+      http_parser.rb (~> 0.5.3)
+    eventmachine (1.0.3)
+    execjs (1.4.0)
+      multi_json (~> 1.0)
+    ffi (1.9.3)
+    fssm (0.2.10)
+    haml (4.0.4)
+      tilt
+    hike (1.2.3)
+    http_parser.rb (0.5.3)
+    i18n (0.6.9)
+    kramdown (1.3.0)
+    less (2.2.2)
+      commonjs (~> 0.2.6)
+    libv8 (3.11.8.13)
+    listen (1.3.1)
+      rb-fsevent (>= 0.9.3)
+      rb-inotify (>= 0.9)
+      rb-kqueue (>= 0.2)
+    middleman (3.2.0)
+      coffee-script (~> 2.2.0)
+      compass (>= 0.12.2)
+      execjs (~> 1.4.0)
+      haml (>= 3.1.6)
+      kramdown (~> 1.2)
+      middleman-core (= 3.2.0)
+      middleman-sprockets (>= 3.1.2)
+      sass (>= 3.1.20)
+      uglifier (~> 2.1.0)
+    middleman-blog (3.5.0)
+      addressable (~> 2.3.5)
+      middleman-core (~> 3.2)
+      tzinfo (>= 0.3.0)
+    middleman-core (3.2.0)
+      activesupport (~> 3.2.6)
+      bundler (~> 1.1)
+      i18n (~> 0.6.1)
+      listen (~> 1.1)
+      rack (>= 1.4.5)
+      rack-test (~> 0.6.1)
+      thor (>= 0.15.2, < 2.0)
+      tilt (~> 1.3.6)
+    middleman-livereload (3.1.0)
+      em-websocket (>= 0.2.0)
+      middleman-core (>= 3.0.2)
+      multi_json (~> 1.0)
+      rack-livereload
+    middleman-sprockets (3.2.0)
+      middleman-core (~> 3.2)
+      sprockets (~> 2.1)
+      sprockets-helpers (~> 1.0.0)
+      sprockets-sass (~> 1.0.0)
+    middleman-syntax (1.2.1)
+      middleman-core (~> 3.0)
+      rouge (~> 0.3.0)
+    multi_json (1.8.2)
+    nokogiri (1.5.6)
+    rack (1.5.2)
+    rack-livereload (0.3.15)
+      rack
+    rack-test (0.6.2)
+      rack (>= 1.0)
+    rb-fsevent (0.9.3)
+    rb-inotify (0.9.2)
+      ffi (>= 0.5.0)
+    rb-kqueue (0.2.0)
+      ffi (>= 0.5.0)
+    redcarpet (2.2.2)
+    ref (1.0.2)
+    rouge (0.3.10)
+      thor
+    sass (3.2.12)
+    sprockets (2.10.1)
+      hike (~> 1.2)
+      multi_json (~> 1.0)
+      rack (~> 1.0)
+      tilt (~> 1.1, != 1.3.0)
+    sprockets-helpers (1.0.1)
+      sprockets (~> 2.0)
+    sprockets-sass (1.0.2)
+      sprockets (~> 2.0)
+      tilt (~> 1.1)
+    therubyracer (0.11.3)
+      libv8 (~> 3.11.8.12)
+      ref
+    thor (0.18.1)
+    thread_safe (0.1.3)
+      atomic
+    tilt (1.3.7)
+    tzinfo (1.1.0)
+      thread_safe (~> 0.1)
+    uglifier (2.1.2)
+      execjs (>= 0.3.0)
+      multi_json (~> 1.0, >= 1.0.2)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  builder
+  less
+  middleman (~> 3.2.0)
+  middleman-blog
+  middleman-livereload (~> 3.1.0)
+  middleman-syntax
+  nokogiri
+  redcarpet
+  therubyracer
+  wdm (~> 0.1.0)

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/README.md
----------------------------------------------------------------------
diff --git a/site/README.md b/site/README.md
new file mode 100644
index 0000000..5faf22b
--- /dev/null
+++ b/site/README.md
@@ -0,0 +1,31 @@
+# Apache DataFu website
+
+We use [Middleman](http://middlemanapp.com/) to generate the website content.  This requires Ruby.
+
+## Setup
+
+Install bundler if you don't already have it:
+
+    gem install bundler
+
+Install gems required by website (includes middleman):
+
+    bundle install
+
+## Run the Server
+
+Middleman includes a server that can be run locally.  When making changes to the website
+it is usually good practice to run the server to see what the changes look like in a
+browser.
+
+    bundle exec middleman
+
+Now visit [http://localhost:4567/](http://localhost:4567/) to see the website in action.
+
+## Build the website
+
+The static content can be built with:
+
+    bundle exec build
+
+This will produces the content in the `/build` directory.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/config.rb
----------------------------------------------------------------------
diff --git a/site/config.rb b/site/config.rb
new file mode 100644
index 0000000..f1df961
--- /dev/null
+++ b/site/config.rb
@@ -0,0 +1,87 @@
+
+Dir['./lib/*'].each { |f| require f }
+
+###
+# Compass
+###
+
+# Change Compass configuration
+# compass_config do |config|
+#   config.output_style = :compact
+# end
+
+###
+# Page options, layouts, aliases and proxies
+###
+
+# Per-page layout changes:
+#
+# With no layout
+# page "/path/to/file.html", :layout => false
+#
+# With alternative layout
+# page "/path/to/file.html", :layout => :otherlayout
+#
+# A path which all have the same layout
+# with_layout :admin do
+#   page "/admin/*"
+# end
+
+# Proxy pages (http://middlemanapp.com/dynamic-pages/)
+# proxy "/this-page-has-no-template.html", "/template-file.html", :locals => {
+#  :which_fake_page => "Rendering a fake page with a local variable" }
+
+###
+# Helpers
+###
+
+# Automatic image dimensions on image_tag helper
+# activate :automatic_image_sizes
+
+# Reload the browser automatically whenever files change
+# activate :livereload
+
+# Methods defined in the helpers block are available in templates
+# helpers do
+#   def some_helper
+#     "Helping"
+#   end
+# end
+
+set :css_dir, 'stylesheets'
+
+set :js_dir, 'javascripts'
+
+set :images_dir, 'images'
+
+activate :syntax
+
+set :markdown_engine, :redcarpet
+set :markdown, :tables => true, :autolink => true, :gh_blockcode => true, :fenced_code_blocks => true, :with_toc_data => true
+
+# Build-specific configuration
+configure :build do
+  # For example, change the Compass output style for deployment
+  # activate :minify_css
+
+  # Minify Javascript on build
+  # activate :minify_javascript
+
+  # Enable cache buster
+  # activate :asset_hash
+
+  # Use relative URLs
+  # activate :relative_assets
+
+  # Or use a different image path
+  # set :http_prefix, "/Content/images/"
+end
+
+activate :blog do |blog|
+  blog.prefix = "blog"
+  blog.layout = "blog"
+end
+
+page "/", :layout => "docs"
+page "/docs/*", :layout => "docs"
+

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/lib/pig.rb
----------------------------------------------------------------------
diff --git a/site/lib/pig.rb b/site/lib/pig.rb
new file mode 100644
index 0000000..f193822
--- /dev/null
+++ b/site/lib/pig.rb
@@ -0,0 +1,73 @@
+
+require 'rouge'
+
+class Pig < Rouge::RegexLexer
+  desc "Pig"
+  tag 'pig'
+  filenames '*.pig'
+  mimetypes 'text/x-pig'
+
+  def self.keywords
+    @keywords ||= Set.new %w(
+      ASSERT COGROUP CROSS DEFINE DISTINCT FILTER
+      FOREACH GROUP IMPORT JOIN LIMIT LOAD MAPREDUCE
+      ORDER BY SAMPLE SPLIT STORE STREAM UNION
+      GENERATE ALL DUMP AS REGISTER USING ASC DESC ANY 
+      FULL INNER OUTER EXEC DESCRIBE CASE EXPLAIN 
+      ILLUSTRATE IS INTO IF LEFT RIGHT MATCHES PARALLEL
+      ROLLUP SHIP AND OR NOT
+
+      AVG MIN MAX SIZE TOKENIZE FLATTEN RANK CUBE COUNT
+      CONCAT SUM SQRT COUNT_STAR
+      )
+  end
+
+  state :root do
+    rule /\s+/m, 'Text'
+    rule /--.*?\n/, 'Comment.Single'
+    rule %r(/\*), 'Comment.Multiline', :multiline_comments
+    rule /\d+/, 'Literal.Number.Integer'
+    rule /'/, 'Literal.String.Single', :single_string
+    rule /"/, 'Name.Variable', :double_string
+    rule /`/, 'Name.Variable', :backtick
+
+    rule /[$]?\w[\w\d]*/ do |m|
+      if self.class.keywords.include? m[0].upcase
+        token 'Keyword'
+      else
+        token 'Name'
+      end
+    end
+
+    rule %r([+*/<>=~!@#%^&|?^-]), 'Operator'
+    rule /[;:(){}\[\],.]/, 'Punctuation'
+  end
+
+  state :multiline_comments do
+    rule %r(/[*]), 'Comment.Multiline', :multiline_comments
+    rule %r([*]/), 'Comment.Multiline', :pop!
+    rule %r([^/*]+), 'Comment.Multiline'
+    rule %r([/*]), 'Comment.Multiline'
+  end
+
+  state :backtick do
+    rule /\\./, 'Literal.String.Escape'
+    rule /``/, 'Literal.String.Escape'
+    rule /`/, 'Name.Variable', :pop!
+    rule /[^\\`]+/, 'Name.Variable'
+  end
+
+  state :single_string do
+    rule /\\./, 'Literal.String.Escape'
+    rule /''/, 'Literal.String.Escape'
+    rule /'/, 'Literal.String.Single', :pop!
+    rule /[^\\']+/, 'Literal.String.Single'
+  end
+
+  state :double_string do
+    rule /\\./, 'Literal.String.Escape'
+    rule /""/, 'Literal.String.Escape'
+    rule /"/, 'Name.Variable', :pop!
+    rule /[^\\"]+/, 'Name.Variable'
+  end
+end

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/source/blog/2012-01-10-introducing-datafu.html.markdown
----------------------------------------------------------------------
diff --git a/site/source/blog/2012-01-10-introducing-datafu.html.markdown b/site/source/blog/2012-01-10-introducing-datafu.html.markdown
new file mode 100644
index 0000000..1ca9bfc
--- /dev/null
+++ b/site/source/blog/2012-01-10-introducing-datafu.html.markdown
@@ -0,0 +1,115 @@
+---
+title: Introducing DataFu, an open source collection of useful Apache Pig UDFs
+author: Matthew Hayes
+---
+
+At LinkedIn, we make extensive use of [Apache Pig](http://pig.apache.org/) for performing [data analysis on Hadoop](http://engineering.linkedin.com/hadoop/user-engagement-powered-apache-pig-and-hadoop). Pig is a simple, high-level programming language that consists of just a few dozen operators and makes it easy to write MapReduce jobs. For more advanced tasks, Pig also supports [User Defined Functions](http://pig.apache.org/docs/r0.9.1/udf.html) (UDFs), which let you integrate custom code in Java, Python, and JavaScript into your Pig scripts.
+
+Over time, as we worked on data intensive products such as [People You May Know](http://www.linkedin.com/pymk-results) and [Skills](http://www.linkedin.com/skills/), we developed a large number of UDFs at LinkedIn. Today, I'm happy to announce that we have consolidated these UDFs into a single, general-purpose library called [DataFu](https://github.com/linkedin/datafu) and we are open sourcing it under the Apache 2.0 license.
+
+DataFu includes UDFs for common statistics tasks, PageRank, set operations, bag operations, and a comprehensive suite of tests. Read on to learn more.
+
+### What's included?
+
+Here's a taste of what you can do with DataFu:
+
+* Run [PageRank](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/linkanalysis/PageRank.html) on a large number of independent graphs.
+* Perform set operations such as [intersect](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/sets/SetIntersect.html) and [union](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/sets/SetUnion.html).
+* Compute the [haversine](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/geo/HaversineDistInMiles.html) distance between two points on the globe.
+* Create an [assertion](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/util/ASSERT.html) on input data which will cause the script to fail if the condition is not met.
+* Perform various operations on bags such as [append a tuple](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/AppendToBag.html), [prepend a tuple](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/PrependToBag.html), [concatenate bags](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/BagConcat.html), [generate unordered pairs](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/bags/UnorderedPairs.html), etc.
+* And [lots more](http://sna-projects.com/datafu/javadoc/0.0.4/).
+
+### Example: Computing Quantiles
+
+Let's walk through an example of how we could use DataFu. We will compute [quantiles](http://en.wikipedia.org/wiki/Quantile) for a fake data set. You can grab all the code for this example, including scripts to generate test data, from this gist.
+
+Let’s imagine that we collected 10,000 temperature readings from three sensors and have stored the data in [HDFS](http://hadoop.apache.org/hdfs/) under the name temperature.txt. The readings follow a normal distribution with mean values of 60, 50, and 40 degrees and standard deviation values of 5, 10, and 3.
+
+![box plot](/images/boxplot.png)
+
+We can use DataFu to compute quantiles using the [Quantile UDF])(http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/stats/Quantile.html). The constructor for the UDF takes the quantiles to be computed. In this case we provide 0.25, 0.5, and 0.75 to compute the 25th, 50th, and 75th percentiles (a.k.a [quartiles](http://en.wikipedia.org/wiki/Quartile)). We also provide 0.0 and 1.0 to compute the min and max.
+
+Quantile UDF example script:
+
+```pig
+define Quartile datafu.pig.stats.Quantile('0.0','0.25','0.5','0.75','1.0');
+ 
+temperature = LOAD 'temperature.txt' AS (id:chararray, temp:double);
+ 
+temperature = GROUP temperature BY id;
+ 
+temperature_quartiles = FOREACH temperature {
+  sorted = ORDER temperature by temp; -- must be sorted
+  GENERATE group as id, Quartile(sorted.temp) as quartiles;
+}
+ 
+DUMP temperature_quartiles
+```
+
+Quantile UDF example output, 10,000 measurements:
+
+    (1,(41.58171454288797,56.559375253601715,59.91093458980706,63.335574106080365,79.2841731889925))
+    (2,(14.393515179526304,43.39558395897533,50.081758806889766,56.54245916209963,91.03574746442487))
+    (3,(29.865710766927595,37.86257868882021,39.97075970657039,41.989584898364704,51.31349575866486))
+
+The values in each row of the output are the min, 25th percentile, 50th percentile (median), 75th percentile, and max.
+
+### StreamingQuantile UDF
+
+The Quantile UDF determines the quantiles by reading the input values for a key in sorted order and picking out the quantiles based on the size of the input DataBag. Alternatively we can estimate quantiles using the [StreamingQuantile UDF](http://sna-projects.com/datafu/javadoc/0.0.4/index.html?datafu/pig/stats/StreamingQuantile.html), contributed to DataFu by [Josh Wills of Cloudera](http://www.linkedin.com/pub/josh-wills/0/82b/138), which does not require that the input data be sorted.
+
+StreamingQuantile UDF example script:
+
+```pig
+define Quartile datafu.pig.stats.StreamingQuantile('0.0','0.25','0.5','0.75','1.0');
+ 
+temperature = LOAD 'temperature.txt' AS (id:chararray, temp:double);
+ 
+temperature = GROUP temperature BY id;
+ 
+temperature_quartiles = FOREACH temperature {
+  -- sort not necessary
+  GENERATE group as id, Quartile(temperature.temp) as quartiles;
+}
+ 
+DUMP temperature_quartiles
+```
+
+StreamingQuantile UDF example output, 10,000 measurements:
+
+    (1,(41.58171454288797,56.24183579452584,59.61727093346221,62.919576028265375,79.2841731889925))
+    (2,(14.393515179526304,42.55929349057328,49.50432161293486,56.020101184758644,91.03574746442487))
+    (3,(29.865710766927595,37.64744333815733,39.84941055349095,41.77693877565934,51.31349575866486))
+
+Notice that the 25th, 50th, and 75th percentile values computed by StreamingQuantile are fairly close to the exact values computed by Quantile.
+
+### Accuracy vs. Runtime
+
+StreamingQuantile samples the data with in-memory buffers. It implements the [Accumulator interface](http://pig.apache.org/docs/r0.7.0/udf.html#Accumulator+Interface), which makes it much more efficient than the Quantile UDF for very large input data. Where Quantile needs access to all the input data, StreamingQuantile can be fed the data incrementally. With Quantile, the input data will be spilled to disk as the DataBag is materialized if it is too large to fit in memory. For very large input data, this can be significant.
+
+To demonstrate this, we can change our experiment so that instead of processing three sets of 10,000 measurements, we will process three sets of 1 billion. Let’s compare the output of Quantile and StreamingQuantile on this data set:
+
+Quantile UDF example output, 1 billion measurements:
+
+    (1,(30.524038,56.62764,60.000134,63.372384,90.561695))
+    (2,(-9.845137,43.25512,49.999536,56.74441,109.714687))
+    (3,(21.564769,37.976644,40.000025,42.023622,58.057268))
+
+StreamingQuantile UDF example output, 1 billion measurements:
+
+    (1,(30.524038,55.993967,59.488968,62.775554,90.561695))
+    (2,(-9.845137,41.95725,48.977708,55.554239,109.714687))
+    (3,(21.564769,37.569332,39.692373,41.666762,58.057268))
+
+The 25th, 50th, and 75th percentile values computed using StreamingQuantile are only estimates, but they are pretty close to the exact values computed with Quantile. With StreamingQuantile and Quantile there is a tradeoff between accuracy and runtime. The script using Quantile takes **5 times as long** to run as the one using StreamingQuantile when the input is the three sets of 1 billion measurements.
+
+###Testing
+
+DataFu has a suite of unit tests for each UDF. Instead of just testing the Java code for a UDF directly, which might overlook issues with the way the UDF works in an actual Pig script, we used [PigUnit](http://pig.apache.org/docs/r0.8.1/pigunit.html) to do our testing. This let us run Pig scripts locally and still integrate our tests into a framework such as [JUnit](http://www.junit.org/) or [TestNG](http://testng.org/).
+
+We have also integrated the code coverage tracking tool [Cobertura](http://cobertura.sourceforge.net/) into our Ant build file. This helps us flag areas in DataFu which lack sufficient testing.
+
+### Conclusion
+
+We hope this gives you a taste of what you can do with DataFu. We are accepting contributions, so if you are interested in helping out, please fork the [code](https://github.com/linkedin/datafu) and send us your pull requests!
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/424e3b48/site/source/blog/2013-01-24-datafu-the-wd-40-of-big-data.markdown
----------------------------------------------------------------------
diff --git a/site/source/blog/2013-01-24-datafu-the-wd-40-of-big-data.markdown b/site/source/blog/2013-01-24-datafu-the-wd-40-of-big-data.markdown
new file mode 100644
index 0000000..fef337e
--- /dev/null
+++ b/site/source/blog/2013-01-24-datafu-the-wd-40-of-big-data.markdown
@@ -0,0 +1,105 @@
+---
+title: DataFu, The WD-40 of Big Data
+author: Matthew Hayes, Sam Shah
+---
+
+If Pig is the “[duct tape for big data](http://blog.linkedin.com/2010/07/01/linkedin-apache-pig/)“, then DataFu is the WD-40. Or something.
+
+No, seriously, DataFu is a collection of Pig UDFs for data analysis on Hadoop. DataFu includes routines for common statistics tasks (e.g., median, variance), PageRank, set operations, and bag operations.
+
+It’s helpful to understand the history of the library. Over the years, we developed several routines that were used across LinkedIn and were thrown together into an internal package we affectionately called “littlepiggy.” The unfortunate part, and this is true of many such efforts, is that the UDFs were ill-documented, ill-organized, and easily got broken when someone made a change. Along came [PigUnit](http://pig.apache.org/docs/r0.10.0/test.html#pigunit), which allowed UDF testing, so we spent the time to clean up these routines by adding documentation and rigorous unit tests. From this “datafoo” package, we thought this would help the community at large, and there you have DataFu.
+
+So what can this library do for you? Let’s look at one of the classical examples that showcase the power and flexibility of Pig: sessionizing a click stream.
+
+<pre>
+<code>
+A = load ‘clicks’;
+B = group A by user;
+C = foreach B {
+  C1 = order A by timestamp;
+  generate user, <em>Sessonize</em>(C1);
+}
+D = group C by session_id;
+E = foreach D generate group as session_id, (MAX(C.timestamp) - MIN(C.timestamp)) as session_length;
+F = group E all;
+G = foreach F generate
+  AVG(E.session_length) as avg_session_length,
+  SQRT(<em>VAR</em>(E.session_length)) as sd_session_length,
+  <em>MEDIAN</em>(E.session_length) as median_session_length,
+  <em>Q75</em>(E.session_length) as session_length_75pct,
+  <em>Q90</em>(E.session_length) as session_length_90pct,
+  <em>Q95</em>(E.session_length) as session_length_95pct;
+</code>
+</pre>
+
+(In fact, this is basically the example for the Accumulator interface that was added in Pig 0.6.)
+
+Here, we’re just computing some summary statistics on a sessionized click stream. Pig does the heavy lifting of transforming your query into MapReduce goodness, but DataFu fills in the gaps by providing the missing routines for every italicized function.
+
+You can grab sample data and code you can run on your own for this sessionization example below.
+
+### Sessionization Example
+
+Suppose that we have a stream of page views from which we have extracted a member ID and UNIX timestamp. It might look something like this:
+
+    memberId timestamp      url
+    1        1357718725941  /
+    1        1357718871442  /profile
+    1        1357719038706  /inbox
+    1        1357719110742  /groups
+    ...
+    2        1357752955401  /inbox
+    2        1357752982385  /profile
+    ...
+
+The full data set for this example can be found [here](https://gist.github.com/raw/4614332/8231534822295e4626af75b3341239177ec44fbe/clicks.csv).
+
+Using DataFu we can assign session IDs to each of these events and group by session ID in order to compute the length of each session. From there we can complete the exercise by simply applying the statistics UDFs provided by DataFu.
+
+```pig
+REGISTER piggybank.jar;
+REGISTER datafu-0.0.6.jar;
+REGISTER guava-13.0.1.jar; -- needed by StreamingQuantile
+ 
+DEFINE UnixToISO   org.apache.pig.piggybank.evaluation.datetime.convert.UnixToISO();
+DEFINE Sessionize  datafu.pig.sessions.Sessionize('10m');
+DEFINE Median      datafu.pig.stats.Median();
+DEFINE Quantile    datafu.pig.stats.StreamingQuantile('0.75','0.90','0.95');
+DEFINE VAR         datafu.pig.stats.VAR();
+ 
+pv = LOAD 'clicks.csv' USING PigStorage(',') AS (memberId:int, time:long, url:chararray);
+ 
+pv = FOREACH pv
+     -- Sessionize expects an ISO string
+     GENERATE UnixToISO(time) as isoTime,
+              time,
+              memberId;
+ 
+pv_sessionized = FOREACH (GROUP pv BY memberId) {
+  ordered = ORDER pv BY isoTime;
+  GENERATE FLATTEN(Sessionize(ordered)) AS (isoTime, time, memberId, sessionId);
+};
+ 
+pv_sessionized = FOREACH pv_sessionized GENERATE sessionId, time;
+ 
+-- compute length of each session in minutes
+session_times = FOREACH (GROUP pv_sessionized BY sessionId)
+                GENERATE group as sessionId,
+                         (MAX(pv_sessionized.time)-MIN(pv_sessionized.time))
+                            / 1000.0 / 60.0 as session_length;
+ 
+-- compute stats on session length
+session_stats = FOREACH (GROUP session_times ALL) {
+  ordered = ORDER session_times BY session_length;
+  GENERATE
+    AVG(ordered.session_length) as avg_session,
+    SQRT(VAR(ordered.session_length)) as std_dev_session,
+    Median(ordered.session_length) as median_session,
+    Quantile(ordered.session_length) as quantiles_session;
+};
+ 
+DUMP session_stats
+--(15.737532575757575,31.29552045993877,(2.848041666666667),(14.648516666666666,31.88788333333333,86.69525))
+```
+
+This is just a taste. There’s plenty more in the library for you to peruse. Take a look [here](http://data.linkedin.com/opensource/datafu). DataFu is freely available under the Apache 2 license. We welcome contributions, so please send us your pull requests!
\ No newline at end of file


Mime
View raw message