datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mha...@apache.org
Subject [46/51] [partial] incubator-datafu git commit: DATAFU-110 Update website for 1.3.0 release
Date Tue, 17 Nov 2015 23:50:32 GMT
http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/158f9991/site/source/docs/datafu/1.3.0/datafu/pig/hash/SHA.html
----------------------------------------------------------------------
diff --git a/site/source/docs/datafu/1.3.0/datafu/pig/hash/SHA.html b/site/source/docs/datafu/1.3.0/datafu/pig/hash/SHA.html
new file mode 100644
index 0000000..f690658
--- /dev/null
+++ b/site/source/docs/datafu/1.3.0/datafu/pig/hash/SHA.html
@@ -0,0 +1,335 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (1.8.0_60) on Tue Nov 17 10:08:51 PST 2015 -->
+<title>SHA (datafu-pig 1.3.0 API)</title>
+<meta name="date" content="2015-11-17">
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+<script type="text/javascript" src="../../../script.js"></script>
+</head>
+<body>
+<script type="text/javascript"><!--
+    try {
+        if (location.href.indexOf('is-external=true') == -1) {
+            parent.document.title="SHA (datafu-pig 1.3.0 API)";
+        }
+    }
+    catch(err) {
+    }
+//-->
+var methods = {"i0":10};
+var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
+var altColor = "altColor";
+var rowColor = "rowColor";
+var tableTab = "tableTab";
+var activeTableTab = "activeTableTab";
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar.top">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.top.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../datafu/pig/hash/MD5.html" title="class in datafu.pig.hash"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li>Next&nbsp;Class</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?datafu/pig/hash/SHA.html" target="_top">Frames</a></li>
+<li><a href="SHA.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.org.apache.pig.EvalFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash</div>
+<h2 title="Class SHA" class="title">Class SHA</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;T&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../datafu/pig/util/SimpleEvalFunc.html" title="class in datafu.pig.util">datafu.pig.util.SimpleEvalFunc</a>&lt;java.lang.String&gt;</li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.SHA</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="typeNameLabel">SHA</span>
+extends <a href="../../../datafu/pig/util/SimpleEvalFunc.html" title="class in datafu.pig.util">SimpleEvalFunc</a>&lt;java.lang.String&gt;</pre>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested.class.summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field.summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../datafu/pig/hash/SHA.html#SHA--">SHA</a></span>()</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../datafu/pig/hash/SHA.html#SHA-java.lang.String-">SHA</a></span>(java.lang.String&nbsp;algorithm)</code>&nbsp;</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr id="i0" class="altColor">
+<td class="colFirst"><code>java.lang.String</code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../datafu/pig/hash/SHA.html#call-java.lang.String-">call</a></span>(java.lang.String&nbsp;value)</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.datafu.pig.util.SimpleEvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.util.<a href="../../../datafu/pig/util/SimpleEvalFunc.html" title="class in datafu.pig.util">SimpleEvalFunc</a></h3>
+<code><a href="../../../datafu/pig/util/SimpleEvalFunc.html#exec-org.apache.pig.data.Tuple-">exec</a>, <a href="../../../datafu/pig/util/SimpleEvalFunc.html#getReturnType--">getReturnType</a>, <a href="../../../datafu/pig/util/SimpleEvalFunc.html#outputSchema-org.apache.pig.impl.logicalLayer.schema.Schema-">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="SHA--">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SHA</h4>
+<pre>public&nbsp;SHA()</pre>
+</li>
+</ul>
+<a name="SHA-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>SHA</h4>
+<pre>public&nbsp;SHA(java.lang.String&nbsp;algorithm)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="call-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>call</h4>
+<pre>public&nbsp;java.lang.String&nbsp;call(java.lang.String&nbsp;value)</pre>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar.bottom">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.bottom.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../datafu/pig/hash/MD5.html" title="class in datafu.pig.hash"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li>Next&nbsp;Class</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?datafu/pig/hash/SHA.html" target="_top">Frames</a></li>
+<li><a href="SHA.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.org.apache.pig.EvalFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/158f9991/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/CosineDistanceHash.html
----------------------------------------------------------------------
diff --git a/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/CosineDistanceHash.html b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/CosineDistanceHash.html
new file mode 100644
index 0000000..162c126
--- /dev/null
+++ b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/CosineDistanceHash.html
@@ -0,0 +1,488 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (1.8.0_60) on Tue Nov 17 10:08:51 PST 2015 -->
+<title>CosineDistanceHash (datafu-pig 1.3.0 API)</title>
+<meta name="date" content="2015-11-17">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+<script type="text/javascript" src="../../../../script.js"></script>
+</head>
+<body>
+<script type="text/javascript"><!--
+    try {
+        if (location.href.indexOf('is-external=true') == -1) {
+            parent.document.title="CosineDistanceHash (datafu-pig 1.3.0 API)";
+        }
+    }
+    catch(err) {
+    }
+//-->
+var methods = {"i0":10,"i1":10};
+var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
+var altColor = "altColor";
+var rowColor = "rowColor";
+var tableTab = "tableTab";
+var activeTableTab = "activeTableTab";
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar.top">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.top.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev&nbsp;Class</li>
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/CosineDistanceHash.html" target="_top">Frames</a></li>
+<li><a href="CosineDistanceHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class CosineDistanceHash" class="title">Class CosineDistanceHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.CosineDistanceHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="typeNameLabel">CosineDistanceHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to <a href="http://en.wikipedia.org/wiki/Cosine_similarity" target="_blank">Cosine Similarity</a>
+ into the same buckets.  Each LSH maps a vector onto one side or the other of a random hyperplane, thereby producing a single
+ bit as the hash value.  Multiple, independent, hashes can be run on the same input and aggregated together to form a more
+ broad domain than a single bit.
+ 
+ For more information, see Charikar, Moses S.. (2002). "Similarity Estimation Techniques from Rounding Algorithms". Proceedings of the 34th Annual ACM Symposium on Theory of Computing 2002.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested.class.summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field.summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#CosineDistanceHash-java.lang.String-java.lang.String-java.lang.String-">CosineDistanceHash</a></span>(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#CosineDistanceHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">CosineDistanceHash</a></span>(java.lang.String&nbsp;sDim,
+                  java.lang.String&nbsp;sRepeat,
+                  java.lang.String&nbsp;sNumHashes,
+                  java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto 0,1 in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr id="i0" class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#createLSHCreator--">createLSHCreator</a></span>()</code>&nbsp;</td>
+</tr>
+<tr id="i1" class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html#getDimension--">getDimension</a></span>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec-org.apache.pig.data.Tuple-">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed--">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema-org.apache.pig.impl.logicalLayer.schema.Schema-">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="CosineDistanceHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CosineDistanceHash</h4>
+<pre>public&nbsp;CosineDistanceHash(java.lang.String&nbsp;sDim,
+                          java.lang.String&nbsp;sRepeat,
+                          java.lang.String&nbsp;sNumHashes,
+                          java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto 0,1 in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a CosineDistanceHash of 
+ --   3 dimensional data
+ --   1500 internal hashes (being combined into one hash)
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.CosineDistanceHash('3', '1500', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L2();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case Cosine distance) to find the first point within
+ -- a parameterized threshold (in this case, .001).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+ tuples from 
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, .001, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl>
+<dt><span class="paramLabel">Parameters:</span></dt>
+<dd><code>sDim</code> - Dimension of the vectors</dd>
+<dd><code>sRepeat</code> - Number of internal repetitions</dd>
+<dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd>
+<dd><code>sSeed</code> - Seed to use when constructing LSH family</dd>
+</dl>
+</li>
+</ul>
+<a name="CosineDistanceHash-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>CosineDistanceHash</h4>
+<pre>public&nbsp;CosineDistanceHash(java.lang.String&nbsp;sDim,
+                          java.lang.String&nbsp;sRepeat,
+                          java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator--">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator--">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension--">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension--">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar.bottom">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.bottom.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev&nbsp;Class</li>
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/CosineDistanceHash.html" target="_top">Frames</a></li>
+<li><a href="CosineDistanceHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/158f9991/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L1PStableHash.html
----------------------------------------------------------------------
diff --git a/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L1PStableHash.html b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L1PStableHash.html
new file mode 100644
index 0000000..5dd98a2
--- /dev/null
+++ b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L1PStableHash.html
@@ -0,0 +1,505 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (1.8.0_60) on Tue Nov 17 10:08:51 PST 2015 -->
+<title>L1PStableHash (datafu-pig 1.3.0 API)</title>
+<meta name="date" content="2015-11-17">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+<script type="text/javascript" src="../../../../script.js"></script>
+</head>
+<body>
+<script type="text/javascript"><!--
+    try {
+        if (location.href.indexOf('is-external=true') == -1) {
+            parent.document.title="L1PStableHash (datafu-pig 1.3.0 API)";
+        }
+    }
+    catch(err) {
+    }
+//-->
+var methods = {"i0":10,"i1":10};
+var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
+var altColor = "altColor";
+var rowColor = "rowColor";
+var tableTab = "tableTab";
+var activeTableTab = "activeTableTab";
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar.top">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.top.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L1PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L1PStableHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class L1PStableHash" class="title">Class L1PStableHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.L1PStableHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="typeNameLabel">L1PStableHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L1</a>
+ distance metric into the same buckets.  This implementation uses a 1-stable distribution (a Cauchy distribution) in order
+ to accomplish this.
+ 
+ For more information, see Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested.class.summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field.summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#seed">seed</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#L1PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">L1PStableHash</a></span>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#L1PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">L1PStableHash</a></span>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr id="i0" class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#createLSHCreator--">createLSHCreator</a></span>()</code>&nbsp;</td>
+</tr>
+<tr id="i1" class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html#getDimension--">getDimension</a></span>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec-org.apache.pig.data.Tuple-">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed--">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema-org.apache.pig.impl.logicalLayer.schema.Schema-">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="L1PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>L1PStableHash</h4>
+<pre>public&nbsp;L1PStableHash(java.lang.String&nbsp;sDim,
+                     java.lang.String&nbsp;sW,
+                     java.lang.String&nbsp;sRepeat,
+                     java.lang.String&nbsp;sNumHashes,
+                     java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ This UDF, indeed all p-stable LSH functions, are parameterized with a quantization parameter (w or r in the literature
+ , depending on where you look).  Consider the following excerpt from Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.
+ 
+ <pre>
+ Decreasing the width of the projection (w) decreases the probability of collision for any two points. 
+ Thus, it has the same effect as increasing k . As a result, we would like to set w as small as possible
+ and in this way decrease the number of projections we need to make. 
+ </pre>
+ 
+ In the literature, the quantization parameter (or width of the projection) is found empirically given a sample of
+ the data and the likely threshold of for the metric.  Tuning this parameter is very important for the performance
+ of this algorithm.
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a L1PStableHash of 
+ --   3 dimensional data
+ --   projection width of 150
+ --   1 internal hashes 
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.L1PStableHash('3', '150', '1', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L1();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case L1 (aka city block) distance) to find the first point within
+ -- a parameterized threshold (in this case, 1000).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+  
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, 1000, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl>
+<dt><span class="paramLabel">Parameters:</span></dt>
+<dd><code>sDim</code> - Dimension of the vectors</dd>
+<dd><code>sW</code> - A double representing the quantization parameter (also known as the projection width)</dd>
+<dd><code>sRepeat</code> - Number of internal repetitions (generally this should be 1 as the p-stable hashes have a larger range than one bit)</dd>
+<dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd>
+<dd><code>sSeed</code> - Seed to use when constructing LSH family</dd>
+</dl>
+</li>
+</ul>
+<a name="L1PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>L1PStableHash</h4>
+<pre>public&nbsp;L1PStableHash(java.lang.String&nbsp;sDim,
+                     java.lang.String&nbsp;sW,
+                     java.lang.String&nbsp;sRepeat,
+                     java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator--">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator--">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension--">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension--">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar.bottom">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.bottom.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/CosineDistanceHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L1PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L1PStableHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/158f9991/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L2PStableHash.html
----------------------------------------------------------------------
diff --git a/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L2PStableHash.html b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L2PStableHash.html
new file mode 100644
index 0000000..ebb47d9
--- /dev/null
+++ b/site/source/docs/datafu/1.3.0/datafu/pig/hash/lsh/L2PStableHash.html
@@ -0,0 +1,507 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<!-- Generated by javadoc (1.8.0_60) on Tue Nov 17 10:08:51 PST 2015 -->
+<title>L2PStableHash (datafu-pig 1.3.0 API)</title>
+<meta name="date" content="2015-11-17">
+<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
+<script type="text/javascript" src="../../../../script.js"></script>
+</head>
+<body>
+<script type="text/javascript"><!--
+    try {
+        if (location.href.indexOf('is-external=true') == -1) {
+            parent.document.title="L2PStableHash (datafu-pig 1.3.0 API)";
+        }
+    }
+    catch(err) {
+    }
+//-->
+var methods = {"i0":10,"i1":10};
+var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
+var altColor = "altColor";
+var rowColor = "rowColor";
+var tableTab = "tableTab";
+var activeTableTab = "activeTableTab";
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar.top">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.top.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L2PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L2PStableHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">datafu.pig.hash.lsh</div>
+<h2 title="Class L2PStableHash" class="title">Class L2PStableHash</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.apache.pig.EvalFunc&lt;org.apache.pig.data.DataBag&gt;</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">datafu.pig.hash.lsh.LSHFunc</a></li>
+<li>
+<ul class="inheritance">
+<li>datafu.pig.hash.lsh.L2PStableHash</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="typeNameLabel">L2PStableHash</span>
+extends <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></pre>
+<div class="block">From wikipedia's article on <a href="http://en.wikipedia.org/wiki/Locality-sensitive_hashing" target="_blank">Locality Sensitive Hashing</a>:
+ <pre>
+ Locality-sensitive hashing (LSH) is a method of performing probabilistic dimension reduction of high-dimensional data. 
+ The basic idea is to hash the input items so that similar items are mapped to the same buckets with high probability 
+ (the number of buckets being much smaller than the universe of possible input items).
+ </pre>
+ 
+ In particular, this implementation implements a locality sensitive hashing scheme which maps high-dimensional vectors which are
+ close together (with high probability) according to the <a href="http://en.wikipedia.org/wiki/Lp_space" target="_blank">L2</a>
+ distance metric into the same buckets.  This implementation uses a 2-stable distribution (a Gaussian distribution) in order
+ to accomplish this.
+ 
+ For more information, see Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested.class.summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested.classes.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>org.apache.pig.EvalFunc.SchemaType</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field.summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#lsh">lsh</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>log, pigLogger, reporter, returnType</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#L2PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">L2PStableHash</a></span>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#L2PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">L2PStableHash</a></span>(java.lang.String&nbsp;sDim,
+             java.lang.String&nbsp;sW,
+             java.lang.String&nbsp;sRepeat,
+             java.lang.String&nbsp;sNumHashes,
+             java.lang.String&nbsp;sSeed)</code>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr id="i0" class="altColor">
+<td class="colFirst"><code>protected <a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a></code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#createLSHCreator--">createLSHCreator</a></span>()</code>&nbsp;</td>
+</tr>
+<tr id="i1" class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><span class="memberNameLink"><a href="../../../../datafu/pig/hash/lsh/L2PStableHash.html#getDimension--">getDimension</a></span>()</code>&nbsp;</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;datafu.pig.hash.lsh.<a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></h3>
+<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#exec-org.apache.pig.data.Tuple-">exec</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getSeed--">getSeed</a>, <a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#outputSchema-org.apache.pig.impl.logicalLayer.schema.Schema-">outputSchema</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.org.apache.pig.EvalFunc">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.apache.pig.EvalFunc</h3>
+<code>allowCompileTimeCalculation, finish, getArgToFuncMapping, getCacheFiles, getInputSchema, getLogger, getPigLogger, getReporter, getReturnType, getSchemaName, getSchemaType, getShipFiles, isAsynchronous, progress, setInputSchema, setPigLogger, setReporter, setUDFContextSignature, warn</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor.detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="L2PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>L2PStableHash</h4>
+<pre>public&nbsp;L2PStableHash(java.lang.String&nbsp;sDim,
+                     java.lang.String&nbsp;sW,
+                     java.lang.String&nbsp;sRepeat,
+                     java.lang.String&nbsp;sNumHashes,
+                     java.lang.String&nbsp;sSeed)</pre>
+<div class="block">Locality sensitive hash that maps vectors onto a long in such a way that colliding
+ vectors are "near" one another according to cosine similarity with high probability.  
+ 
+ <p>
+ Generally, multiple LSH are combined via repetition to increase the range of the hash function to the full set of longs.
+ The number of functions which you want to internally repeat is specified by the sRepeat parameter.
+ 
+ The size of the hash family corresponds to the number of independent hashes you want to apply to the data.
+ In a k-near neighbors style of searching, this corresponds to the number of neighbors you want to find
+ (i.e. the number of vectors within a distance according to cosine similarity).
+ 
+ This UDF, indeed all p-stable LSH functions are parameterized with a quantization parameter (w or r in the literature
+ , depending on where you look).  Consider the following excerpt from Datar, M.; Immorlica, N.; Indyk, P.; Mirrokni, V.S. (2004). "Locality-Sensitive Hashing Scheme Based on p-Stable Distributions". Proceedings of the Symposium on Computational Geometry.
+ 
+ <pre>
+ Decreasing the width of the projection (w) decreases the probability of collision for any two points. 
+ Thus, it has the same effect as increasing k . As a result, we would like to set w as small as possible
+ and in this way decrease the number of projections we need to make. 
+ </pre>
+ 
+ In the literature, the quantization parameter (or width of the projection) is found empirically given a sample of
+ the data and the likely threshold of for the metric.  Tuning this parameter is very important for the performance
+ of this algorithm.
+ 
+ <p>
+ Consider the following example where we input some 3-dimensional points and a set of 3-dimensional queries
+ and find the nearest neighbors of the query points:
+ <pre>
+ -- Create a L2PStableHash of 
+ --   3 dimensional data
+ --   projection width of 200
+ --   1 internal hashes 
+ --   family of 5 hashes
+ --   with a seed of 0
+ 
+ -- This creates a bag of tuples:
+ --   lsh_id:Integer the family ID (in this case, 0-4)
+ --   hash:Long the hash 
+ 
+ define LSH datafu.pig.hash.lsh.L2PStableHash('3', '200', '1', '5', '0');
+ define METRIC datafu.pig.hash.lsh.metric.L2();
+
+ PTS = LOAD 'input' AS (dim1:double, dim2:double, dim3:double);
+ 
+ --hash the input points
+ PTS_HASHED = foreach PTS generate TOTUPLE(dim1, dim2, dim3) as pt
+                    , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)));
+ 
+ -- the hash family ID and the hash should group the input points into partitions
+ PARTITIONS = group PTS_HASHED by (lsh_id, hash);
+ 
+ -- take in the query points and hash them
+ QUERIES = LOAD 'queries' as (dim1:double, dim2:double, dim3:double);
+ QUERIES_HASHED = foreach QUERIES generate TOTUPLE(dim1, dim2, dim3) as query_pt
+                        , FLATTEN(LSH(TOTUPLE(dim1, dim2, dim3)))
+                        ;
+ 
+ -- join the hashed query points with the (presumably larger) list of input data split by partitions
+ QUERIES_W_PARTS = join QUERIES_HASHED by (lsh_id, hash), PARTITIONS by (group.$0, group.$1);
+ 
+ -- Now, use the appropriate METRIC UDF (in this case L2 (aka Euclidean) distance) to find the first point within
+ -- a parameterized threshold (in this case, 1000).  It takes:
+ --   query_pt:Tuple the query point
+ --   threshold:Double the threshold, so that if the distance between the query point and a point
+ --                    in the partition is less than this threshold, it returns the point (and stops searching)
+ --   partition:Bag The bag of tuples in the partition.
+ 
+ tuples from 
+ NEAR_NEIGHBORS = foreach QUERIES_W_PARTS generate query_pt as query_pt
+                                                 , METRIC(query_pt, 1000, PTS_HASHED) as neighbor
+                                                 ;
+ describe NEAR_NEIGHBORS;
+ describe NEAR_NEIGHBORS;
+ -- {query_pt: (dim1: double,dim2: double,dim3: double)
+ -- ,neighbor: (pt: (dim1: double,dim2: double,dim3: double)
+ --            ,lsh::lsh_id: int
+ --            ,lsh::hash: long
+ --            )
+ -- }
+ 
+ -- project out the query and the matching point
+ NEIGHBORS_PROJ = foreach NEAR_NEIGHBORS {
+  generate query_pt as query_pt, neighbor.pt as matching_pts;
+ };
+ 
+ -- Filter out the hashes which resulted in no matches
+ NOT_NULL = filter NEIGHBORS_PROJ by SIZE(matching_pts) &gt; 0;
+ 
+ -- group by the query
+ NEIGHBORS_GRP = group NOT_NULL by query_pt;
+ describe NEIGHBORS_GRP;
+ 
+ -- Generate the query, the number of matches and the bag of matching points
+ NEIGHBOR_CNT = foreach NEIGHBORS_GRP{
+    MATCHING_PTS = foreach NOT_NULL generate FLATTEN(matching_pts);
+    DIST_MATCHING_PTS = DISTINCT MATCHING_PTS;
+    generate group as query_pt, COUNT(NOT_NULL), DIST_MATCHING_PTS;
+ };
+ describe NEIGHBOR_CNT;
+ -- NEIGHBOR_CNT: {query_pt: (dim1: double,dim2: double,dim3: double)
+ --               ,long
+ --               ,DIST_MATCHING_PTS: { (matching_pts::dim1: double,matching_pts::dim2: double,matching_pts::dim3: double)
+ --                              }
+ --               }
+ STORE NEIGHBOR_CNT INTO 'neighbors';
+ </pre></div>
+<dl>
+<dt><span class="paramLabel">Parameters:</span></dt>
+<dd><code>sDim</code> - Dimension of the vectors</dd>
+<dd><code>sW</code> - A double representing the quantization parameter (also known as the projection width)</dd>
+<dd><code>sRepeat</code> - Number of internal repetitions (generally this should be 1 as the p-stable hashes have a larger range than one bit)</dd>
+<dd><code>sNumHashes</code> - Size of the hash family (if you're looking for k near neighbors, this is the k)</dd>
+<dd><code>sSeed</code> - Seed to use when constructing LSH family</dd>
+</dl>
+</li>
+</ul>
+<a name="L2PStableHash-java.lang.String-java.lang.String-java.lang.String-java.lang.String-">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>L2PStableHash</h4>
+<pre>public&nbsp;L2PStableHash(java.lang.String&nbsp;sDim,
+                     java.lang.String&nbsp;sW,
+                     java.lang.String&nbsp;sRepeat,
+                     java.lang.String&nbsp;sNumHashes)</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method.detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="createLSHCreator--">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>createLSHCreator</h4>
+<pre>protected&nbsp;<a href="../../../../datafu/pig/hash/lsh/interfaces/LSHCreator.html" title="class in datafu.pig.hash.lsh.interfaces">LSHCreator</a>&nbsp;createLSHCreator()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#createLSHCreator--">createLSHCreator</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+<a name="getDimension--">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getDimension</h4>
+<pre>protected&nbsp;int&nbsp;getDimension()</pre>
+<dl>
+<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
+<dd><code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html#getDimension--">getDimension</a></code>&nbsp;in class&nbsp;<code><a href="../../../../datafu/pig/hash/lsh/LSHFunc.html" title="class in datafu.pig.hash.lsh">LSHFunc</a></code></dd>
+</dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar.bottom">
+<!--   -->
+</a>
+<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
+<a name="navbar.bottom.firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../../overview-summary.html">Overview</a></li>
+<li><a href="package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../../index-all.html">Index</a></li>
+<li><a href="../../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../../datafu/pig/hash/lsh/L1PStableHash.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
+<li><a href="../../../../datafu/pig/hash/lsh/LSHFamily.html" title="class in datafu.pig.hash.lsh"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../../index.html?datafu/pig/hash/lsh/L2PStableHash.html" target="_top">Frames</a></li>
+<li><a href="L2PStableHash.html" target="_top">No&nbsp;Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested.classes.inherited.from.class.org.apache.pig.EvalFunc">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#fields.inherited.from.class.datafu.pig.hash.lsh.LSHFunc">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method.detail">Method</a></li>
+</ul>
+</div>
+<a name="skip.navbar.bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>


Mime
View raw message