helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ka...@apache.org
Subject svn commit: r1624796 [19/26] - in /helix/site-content: ./ 0.6.1-incubating-docs/ 0.6.1-incubating-docs/recipes/ 0.6.1-incubating-docs/releasenotes/ 0.6.2-incubating-docs/ 0.6.2-incubating-docs/recipes/ 0.6.2-incubating-docs/releasenotes/ 0.6.3-docs/ 0....
Date Sun, 14 Sep 2014 01:47:40 GMT
Added: helix/site-content/0.7.1-docs/recipes/rsync_replicated_file_store.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.7.1-docs/recipes/rsync_replicated_file_store.html?rev=1624796&view=auto
==============================================================================
--- helix/site-content/0.7.1-docs/recipes/rsync_replicated_file_store.html (added)
+++ helix/site-content/0.7.1-docs/recipes/rsync_replicated_file_store.html Sun Sep 14 01:47:34 2014
@@ -0,0 +1,363 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2014-09-13
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix - Near-Realtime Rsync Replicated File System</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap.min.css" rel="stylesheet" />
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="../css/docs.css" rel="stylesheet" />
+		<link href="../css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="../css/lightbox.css" rel="stylesheet" />
+		
+		<link href="../css/site.css" rel="stylesheet" />
+		<link href="../css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-recipes-rsync_replicated_file_store project-071-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="../..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="../index.html" title="Helix 0.7.1 (beta)">Helix 0.7.1 (beta) </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../download.html" title="Download">Download </a></li>
+									<li><a href="../Building.html" title="Building">Building </a></li>
+									<li><a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="../Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown active">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li class="active"><a href="" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li><a href="service_discovery.html" title="Service Discovery">Service Discovery </a></li>
+									<li><a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a></li>
+									<li><a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../../" id="bannerLeft"><img src="../../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="../" title="Release 0.7.1 (beta)">Release 0.7.1 (beta) </a></li>
+				<li class="divider">/</li>
+				<li>Near-Realtime Rsync Replicated File System</li>
+				<li class="publishDate version-date pull-right">Last Published: 2014-09-13</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Near-Realtime_Rsync_Replicated_File_System">Near-Realtime Rsync Replicated File System</h2>
+ </div> 
+ <div class="section"> 
+  <h3 id="Quick_Demo">Quick Demo</h3> 
+  <ul> 
+   <li>This demo starts 3 instances with id’s as <tt>localhost_12001, localhost_12002, localhost_12003</tt></li> 
+   <li>Each instance stores its files under <tt>/tmp/&lt;id&gt;/filestore</tt></li> 
+   <li><tt>localhost_12001</tt> is designated as the master, and <tt>localhost_12002</tt> and <tt>localhost_12003</tt> are the slaves</li> 
+   <li>Files written to the master are replicated to the slaves automatically. In this demo, a.txt and b.txt are written to <tt>/tmp/localhost_12001/filestore</tt> and they get replicated to other folders.</li> 
+   <li>When the master is stopped, <tt>localhost_12002</tt> is promoted to master.</li> 
+   <li>The other slave <tt>localhost_12003</tt> stops replicating from <tt>localhost_12001</tt> and starts replicating from new master <tt>localhost_12002</tt></li> 
+   <li>Files written to new master <tt>localhost_12002</tt> are replicated to <tt>localhost_12003</tt></li> 
+   <li>In the end state of this quick demo, <tt>localhost_12002</tt> is the master and <tt>localhost_12003</tt> is the slave. Manually create files under <tt>/tmp/localhost_12002/filestore</tt> and see that appear in <tt>/tmp/localhost_12003/filestore</tt></li> 
+   <li>Ignore the interrupted exceptions on the console :-)</li> 
+  </ul> 
+  <div class="source"> 
+   <pre>git clone https://git-wip-us.apache.org/repos/asf/helix.git
+cd helix
+git checkout tags/helix-0.7.1
+cd recipes/rsync-replicated-file-system/
+mvn clean install package -DskipTests
+cd target/rsync-replicated-file-system-pkg/bin
+chmod +x *
+./quickdemo
+
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Overview">Overview</h3> 
+  <p>There are many applications that require storage for storing large number of relatively small data files. Examples include media stores to store small videos, images, mail attachments etc. Each of these objects is typically kilobytes, often no larger than a few megabytes. An additional distinguishing feature of these use cases is that files are typically only added or deleted, rarely updated. When there are updates, they do not have any concurrency requirements.</p> 
+  <p>These are much simpler requirements than what general purpose distributed file system have to satisfy; these would include concurrent access to files, random access for reads and updates, posix compliance, and others. To satisfy those requirements, general DFSs are also pretty complex that are expensive to build and maintain.</p> 
+  <p>A different implementation of a distributed file system includes HDFS which is inspired by Google’s GFS. This is one of the most widely used distributed file system that forms the main data storage platform for Hadoop. HDFS is primary aimed at processing very large data sets and distributes files across a cluster of commodity servers by splitting up files in fixed size chunks. HDFS is not particularly well suited for storing a very large number of relatively tiny files.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="File_Store">File Store</h3> 
+  <p>It’s possible to build a vastly simpler system for the class of applications that have simpler requirements as we have pointed out.</p> 
+  <ul> 
+   <li>Large number of files but each file is relatively small</li> 
+   <li>Access is limited to create, delete and get entire files</li> 
+   <li>No updates to files that are already created (or it’s feasible to delete the old file and create a new one)</li> 
+  </ul> 
+  <p>We call this system a Partitioned File Store (PFS) to distinguish it from other distributed file systems. This system needs to provide the following features:</p> 
+  <ul> 
+   <li>CRD access to large number of small files</li> 
+   <li>Scalability: Files should be distributed across a large number of commodity servers based on the storage requirement</li> 
+   <li>Fault-tolerance: Each file should be replicated on multiple servers so that individual server failures do not reduce availability</li> 
+   <li>Elasticity: It should be possible to add capacity to the cluster easily</li> 
+  </ul> 
+  <p>Apache Helix is a generic cluster management framework that makes it very easy to provide scalability, fault-tolerance and elasticity features. rsync can be easily used as a replication channel between servers so that each file gets replicated on multiple servers.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Design">Design</h3> 
+  <div class="section"> 
+   <h4 id="High_Level">High Level</h4> 
+   <ul> 
+    <li>Partition the file system based on the file name</li> 
+    <li>At any time a single writer can write, we call this a master</li> 
+    <li>For redundancy, we need to have additional replicas called slave. Slaves can optionally serve reads</li> 
+    <li>Slave replicates data from the master</li> 
+    <li>When a master fails, a slave gets promoted to master</li> 
+   </ul> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Transaction_Log">Transaction Log</h4> 
+   <p>Every write on the master will result in creation/deletion of one or more files. In order to maintain timeline consistency slaves need to apply the changes in the same order To facilitate this, the master logs each transaction in a file and each transaction is associated with an 64 bit ID in which the 32 LSB represents a sequence number and MSB represents the generation number The sequence number gets incremented on every transaction and the generation is incremented when a new master is elected</p> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Replication">Replication</h4> 
+   <p>Replication is required for slaves to keep up with changes on the master. Every time the slave applies a change it checkpoints the last applied transaction ID. During restarts, this allows the slave to pull changes from the last checkpointed ID. Similar to master, the slave logs each transaction to the transaction logs but instead of generating new transaction ID, it uses the same ID generated by the master.</p> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Failover">Failover</h4> 
+   <p>When a master fails, a new slave will be promoted to master. If the previous master node is reachable, then the new master will flush all the changes from previous the master before taking up mastership. The new master will record the end transaction ID of the current generation and then start a new generation with sequence starting from 1. After this the master will begin accepting writes.</p> 
+   <p><img src="../images/PFS-Generic.png" alt="Partitioned File Store" /></p> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Rsync-based_Solution">Rsync-based Solution</h3> 
+  <p><img src="../images/RSYNC_BASED_PFS.png" alt="Rsync based File Store" /></p> 
+  <p>This application demonstrates a file store that uses rsync as the replication mechanism. One can envision a similar system where instead of using rsync, one can implement a custom solution to notify the slave of the changes and also provide an api to pull the change files.</p> 
+  <div class="section"> 
+   <h4 id="Concepts">Concepts</h4> 
+   <ul> 
+    <li>file_store_dir: Root directory for the actual data files</li> 
+    <li>change_log_dir: The transaction logs are generated under this folder</li> 
+    <li>check_point_dir: The slave stores the check points ( last processed transaction) here</li> 
+   </ul> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Master">Master</h4> 
+   <ul> 
+    <li>File server: This component supports file uploads and downloads and writes the files to <tt>file_store_dir</tt>. This is not included in this application. The idea is that most applications have different ways of implementing this component and have some associated business logic. It is not hard to come up with such a component if needed.</li> 
+    <li>File store watcher: This component watches the <tt>file_store_dir</tt> directory on the local file system for any changes and notifies the registered listeners of the changes</li> 
+    <li>Change log generator: This registers as a listener of the file store watcher and on each notification logs the changes into a file under <tt>change_log_dir</tt></li> 
+   </ul> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Slave">Slave</h4> 
+   <ul> 
+    <li>File server: This component on the slave will only support reads</li> 
+    <li>Cluster state observer: Slave observes the cluster state and is able to know who is the current master</li> 
+    <li>Replicator: This has two subcomponents 
+     <ul> 
+      <li>Periodic rsync of change log: This is a background process that periodically rsyncs the <tt>change_log_dir</tt> of the master to its local directory</li> 
+      <li>Change Log Watcher: This watches the <tt>change_log_dir</tt> for changes and notifies the registered listeners of the change</li> 
+      <li>On demand rsync invoker: This is registered as a listener to change log watcher and on every change invokes rsync to sync only the changed file</li> 
+     </ul></li> 
+   </ul> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Coordination">Coordination</h4> 
+   <p>The coordination between nodes is done by Helix. Helix does the partition management and assigns the partition to multiple nodes based on the replication factor. It elects one the nodes as master and designates others as slaves. It provides notifications to each node in the form of state transitions (Offline to Slave, Slave to Master). It also provides notifications when there is change is cluster state. This allows the slave to stop replicating from current master and start replicating from new master.</p> 
+   <p>In this application, we have only one partition but its very easy to extend it to support multiple partitions. By partitioning the file store, one can add new nodes and Helix will automatically re-distribute partitions among the nodes. To summarize, Helix provides partition management, fault tolerance and facilitates automated cluster expansion.</p> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="../download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="../Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="../Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="../Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li class="active">
+							<a href="#" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li>
+							<a href="service_discovery.html" title="Service Discovery">Service Discovery </a>
+						</li>
+						<li>
+							<a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a>
+						</li>
+						<li>
+							<a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.7.1-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='../js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/js/bootstrap.min.js"></script>
+	<script src="../js/lightbox.js"></script>
+	<script src="../js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="../js/jquery.ba-bbq.min.js"></script>
+
+	<script src="../js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file

Added: helix/site-content/0.7.1-docs/recipes/service_discovery.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.7.1-docs/recipes/service_discovery.html?rev=1624796&view=auto
==============================================================================
--- helix/site-content/0.7.1-docs/recipes/service_discovery.html (added)
+++ helix/site-content/0.7.1-docs/recipes/service_discovery.html Sun Sep 14 01:47:34 2014
@@ -0,0 +1,408 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2014-09-13
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix - Service Discovery</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap.min.css" rel="stylesheet" />
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="../css/docs.css" rel="stylesheet" />
+		<link href="../css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="../css/lightbox.css" rel="stylesheet" />
+		
+		<link href="../css/site.css" rel="stylesheet" />
+		<link href="../css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-recipes-service_discovery project-071-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="../..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="../index.html" title="Helix 0.7.1 (beta)">Helix 0.7.1 (beta) </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../download.html" title="Download">Download </a></li>
+									<li><a href="../Building.html" title="Building">Building </a></li>
+									<li><a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="../Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown active">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li><a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li class="active"><a href="" title="Service Discovery">Service Discovery </a></li>
+									<li><a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a></li>
+									<li><a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../../" id="bannerLeft"><img src="../../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="../" title="Release 0.7.1 (beta)">Release 0.7.1 (beta) </a></li>
+				<li class="divider">/</li>
+				<li>Service Discovery</li>
+				<li class="publishDate version-date pull-right">Last Published: 2014-09-13</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Service_Discovery">Service Discovery</h2>
+ </div> 
+ <p>One of the common usage of ZooKeeper is to enable service discovery. The basic idea is that when a server starts up it advertises its configuration/metadata such as its hostname and port on ZooKeeper. This allows clients to dynamically discover the servers that are currently active. One can think of this like a service registry to which a server registers when it starts and is automatically deregistered when it shutdowns or crashes. In many cases it serves as an alternative to VIPs.</p> 
+ <p>The core idea behind this is to use ZooKeeper ephemeral nodes. The ephemeral nodes are created when the server registers and all its metadata is put into a ZNode. When the server shutdowns, ZooKeeper automatically removes this ZNode.</p> 
+ <p>There are two ways the clients can dynamically discover the active servers:</p> 
+ <div class="section"> 
+  <h3 id="ZooKeeper_Watch">ZooKeeper Watch</h3> 
+  <p>Clients can set a child watch under specific path on ZooKeeper. When a new service is registered/deregistered, ZooKeeper notifies the client via a watch event and the client can read the list of services. Even though this looks trivial, there are lot of things one needs to keep in mind like ensuring that you first set the watch back on ZooKeeper before reading data.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Poll">Poll</h3> 
+  <p>Another approach is for the client to periodically read the ZooKeeper path and get the list of services.</p> 
+  <p>Both approaches have pros and cons, for example setting a watch might trigger herd effect if there are large number of clients. This is problematic, especially when servers are starting up. But the advantage to setting watches is that clients are immediately notified of a change which is not true in case of polling. In some cases, having both watches and polls makes sense; watch allows one to get notifications as soon as possible while poll provides a safety net if a watch event is missed because of code bug or ZooKeeper fails to notify.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Other_Developer_Considerations">Other Developer Considerations</h3> 
+  <ul> 
+   <li>What happens when the ZooKeeper session expires? All the watches and ephemeral nodes previously added or created by this server are lost. One needs to add the watches again, recreate the ephemeral nodes, and so on.</li> 
+   <li>Due to network issues or Java GC pauses session expiry might happen again and again; this phenomenon is known as flapping. It's important for the server to detect this and deregister itself.</li> 
+  </ul> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Other_Operational_Considerations">Other Operational Considerations</h3> 
+  <ul> 
+   <li>What if the node is behaving badly? One might kill the server, but it will lose the ability to debug. It would be nice to have the ability to mark a server as disabled and clients know that a node is disabled and will not contact that node.</li> 
+  </ul> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Configuration_Ownership">Configuration Ownership</h3> 
+  <p>This is an important aspect that is often ignored in the initial stages of your development. Typically, the service discovery pattern means that servers start up with some configuration which it simply puts into ZooKeeper. While this works well in the beginning, configuration management becomes very difficult since the servers themselves are statically configured. Any change in server configuration implies restarting the server. Ideally, it will be nice to have the ability to change configuration dynamically without having to restart a server.</p> 
+  <p>Ideally you want a hybrid solution, a node starts with minimal configuration and gets the rest of configuration from ZooKeeper.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Using_Helix_for_Service_Discovery">Using Helix for Service Discovery</h3> 
+  <p>Even though Helix has a higher-level abstraction in terms of state machines, constraints and objectives, service discovery is one of things has been a prevalent use case from the start. The controller uses the exact mechanism we described above to discover when new servers join the cluster. We create these ZNodes under /CLUSTERNAME/LIVEINSTANCES. Since at any time there is only one controller, we use a ZK watch to track the liveness of a server.</p> 
+  <p>This recipe simply demonstrates how one can re-use that part for implementing service discovery. This demonstrates multiple modes of service discovery:</p> 
+  <ul> 
+   <li>POLL: The client reads from zookeeper at regular intervals 30 seconds. Use this if you have 100’s of clients</li> 
+   <li>WATCH: The client sets up watcher and gets notified of the changes. Use this if you have 10’s of clients</li> 
+   <li>NONE: This does neither of the above, but reads directly from zookeeper when ever needed</li> 
+  </ul> 
+  <p>Helix provides these additional features compared to other implementations available elsewhere:</p> 
+  <ul> 
+   <li>It has the concept of disabling a node which means that a badly behaving node can be disabled using the Helix admin API</li> 
+   <li>It automatically detects if a node connects/disconnects from zookeeper repeatedly and disables the node</li> 
+   <li>Configuration management 
+    <ul> 
+     <li>Allows one to set configuration via the admin API at various granulaties like cluster, instance, resource, partition</li> 
+     <li>Configurations can be dynamically changed</li> 
+     <li>The server is notified when configurations change</li> 
+    </ul></li> 
+  </ul> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Checkout_and_Build">Checkout and Build</h3> 
+  <div class="source"> 
+   <pre>git clone https://git-wip-us.apache.org/repos/asf/helix.git
+cd helix
+git checkout tags/helix-0.7.1
+mvn clean install package -DskipTests
+cd recipes/service-discovery/target/service-discovery-pkg/bin
+chmod +x *
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Start_ZooKeeper">Start ZooKeeper</h3> 
+  <div class="source"> 
+   <pre>./start-standalone-zookeeper 2199
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Run_the_Demo">Run the Demo</h3> 
+  <div class="source"> 
+   <pre>./service-discovery-demo.sh
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Output">Output</h3> 
+  <div class="source"> 
+   <pre>START:Service discovery demo mode:WATCH
+	Registering service
+		host.x.y.z_12000
+		host.x.y.z_12001
+		host.x.y.z_12002
+		host.x.y.z_12003
+		host.x.y.z_12004
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12000
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12002
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Deregistering service:
+		host.x.y.z_12002
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12000
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Registering service:host.x.y.z_12002
+END:Service discovery demo mode:WATCH
+=============================================
+START:Service discovery demo mode:POLL
+	Registering service
+		host.x.y.z_12000
+		host.x.y.z_12001
+		host.x.y.z_12002
+		host.x.y.z_12003
+		host.x.y.z_12004
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12000
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12002
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Deregistering service:
+		host.x.y.z_12002
+	Sleeping for poll interval:30000
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12000
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Registering service:host.x.y.z_12002
+END:Service discovery demo mode:POLL
+=============================================
+START:Service discovery demo mode:NONE
+	Registering service
+		host.x.y.z_12000
+		host.x.y.z_12001
+		host.x.y.z_12002
+		host.x.y.z_12003
+		host.x.y.z_12004
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12000
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12002
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Deregistering service:
+		host.x.y.z_12000
+	SERVICES AVAILABLE
+		SERVICENAME 	HOST 			PORT
+		myServiceName 	host.x.y.z 		12001
+		myServiceName 	host.x.y.z 		12002
+		myServiceName 	host.x.y.z 		12003
+		myServiceName 	host.x.y.z 		12004
+	Registering service:host.x.y.z_12000
+END:Service discovery demo mode:NONE
+=============================================
+</pre> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="../download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="../Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="../Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="../Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li>
+							<a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li class="active">
+							<a href="#" title="Service Discovery">Service Discovery </a>
+						</li>
+						<li>
+							<a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a>
+						</li>
+						<li>
+							<a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.7.1-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='../js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/js/bootstrap.min.js"></script>
+	<script src="../js/lightbox.js"></script>
+	<script src="../js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="../js/jquery.ba-bbq.min.js"></script>
+
+	<script src="../js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file

Added: helix/site-content/0.7.1-docs/recipes/task_dag_execution.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.7.1-docs/recipes/task_dag_execution.html?rev=1624796&view=auto
==============================================================================
--- helix/site-content/0.7.1-docs/recipes/task_dag_execution.html (added)
+++ helix/site-content/0.7.1-docs/recipes/task_dag_execution.html Sun Sep 14 01:47:34 2014
@@ -0,0 +1,413 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2014-09-13
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix - Distributed Task Execution</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap.min.css" rel="stylesheet" />
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="../css/docs.css" rel="stylesheet" />
+		<link href="../css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="../css/lightbox.css" rel="stylesheet" />
+		
+		<link href="../css/site.css" rel="stylesheet" />
+		<link href="../css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-recipes-task_dag_execution project-071-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="../..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="../index.html" title="Helix 0.7.1 (beta)">Helix 0.7.1 (beta) </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../download.html" title="Download">Download </a></li>
+									<li><a href="../Building.html" title="Building">Building </a></li>
+									<li><a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="../Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown active">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li><a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li><a href="service_discovery.html" title="Service Discovery">Service Discovery </a></li>
+									<li class="active"><a href="" title="Distributed task DAG Execution">Distributed task DAG Execution </a></li>
+									<li><a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../../" id="bannerLeft"><img src="../../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="../" title="Release 0.7.1 (beta)">Release 0.7.1 (beta) </a></li>
+				<li class="divider">/</li>
+				<li>Distributed Task Execution</li>
+				<li class="publishDate version-date pull-right">Last Published: 2014-09-13</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Distributed_Task_Execution">Distributed Task Execution</h2>
+ </div> 
+ <p>This recipe is intended to demonstrate how task dependencies can be modeled using primitives provided by Helix. A given task can be run with the desired amount of parallelism and will start only when upstream dependencies are met. The demo executes the task DAG described below using 10 workers. Although the demo starts the workers as threads, there is no requirement that all the workers need to run in the same process. In reality, these workers run on many different boxes on a cluster. When worker fails, Helix takes care of re-assigning a failed task partition to a new worker.</p> 
+ <p>Redis is used as a result store. Any other suitable implementation for TaskResultStore can be plugged in.</p> 
+ <div class="section"> 
+  <h3 id="Workflow">Workflow</h3> 
+  <div class="section"> 
+   <h4 id="Input">Input</h4> 
+   <p>10000 impression events and around 100 click events are pre-populated in task result store (redis).</p> 
+   <ul> 
+    <li> <p><b>ImpEvent</b>: format: id,isFraudulent,country,gender</p></li> 
+    <li> <p><b>ClickEvent</b>: format: id,isFraudulent,impEventId</p></li> 
+   </ul> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Stages">Stages</h4> 
+   <ul> 
+    <li> <p><b>FilterImps</b>: Filters impression where isFraudulent=true.</p></li> 
+    <li> <p><b>FilterClicks</b>: Filters clicks where isFraudulent=true</p></li> 
+    <li> <p><b>impCountsByGender</b>: Generates impression counts grouped by gender. It does this by incrementing the count for ‘impression_gender_counts:&lt;gender_value&gt;’ in the task result store (redis hash). Depends on: <b>FilterImps</b></p></li> 
+    <li> <p><b>impCountsByCountry</b>: Generates impression counts grouped by country. It does this by incrementing the count for ‘impression_country_counts:&lt;country_value&gt;’ in the task result store (redis hash). Depends on: <b>FilterClicks</b></p></li> 
+    <li> <p><b>impClickJoin</b>: Joins clicks with corresponding impression event using impEventId as the join key. Join is needed to pull dimensions not present in click event. Depends on: <b>FilterImps, FilterClicks</b></p></li> 
+    <li> <p><b>clickCountsByGender</b>: Generates click counts grouped by gender. It does this by incrementing the count for click_gender_counts:&lt;gender_value&gt; in the task result store (redis hash). Depends on: <b>impClickJoin</b></p></li> 
+    <li> <p><b>clickCountsByGender</b>: Generates click counts grouped by country. It does this by incrementing the count for click_country_counts:&lt;country_value&gt; in the task result store (redis hash). Depends on: <b>impClickJoin</b></p></li> 
+    <li> <p><b>report</b>: Reads from all aggregates generated by previous stages and prints them. Depends on: <b>impCountsByGender, impCountsByCountry, clickCountsByGender,clickCountsByGender</b></p></li> 
+   </ul> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Creating_a_DAG">Creating a DAG</h3> 
+  <p>Each stage is represented as a Node along with the upstream dependency and desired parallelism. Each stage is modeled as a resource in Helix using OnlineOffline state model. As part of an Offline to Online transition, we watch the external view of upstream resources and wait for them to transition to the online state. See Task.java for additional info.</p> 
+  <div class="source"> 
+   <pre>Dag dag = new Dag();
+dag.addNode(new Node(&quot;filterImps&quot;, 10, &quot;&quot;));
+dag.addNode(new Node(&quot;filterClicks&quot;, 5, &quot;&quot;));
+dag.addNode(new Node(&quot;impClickJoin&quot;, 10, &quot;filterImps,filterClicks&quot;));
+dag.addNode(new Node(&quot;impCountsByGender&quot;, 10, &quot;filterImps&quot;));
+dag.addNode(new Node(&quot;impCountsByCountry&quot;, 10, &quot;filterImps&quot;));
+dag.addNode(new Node(&quot;clickCountsByGender&quot;, 5, &quot;impClickJoin&quot;));
+dag.addNode(new Node(&quot;clickCountsByCountry&quot;, 5, &quot;impClickJoin&quot;));
+dag.addNode(new Node(&quot;report&quot;,1,&quot;impCountsByGender,impCountsByCountry,clickCountsByGender,clickCountsByCountry&quot;));
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Demo">Demo</h3> 
+  <p>In order to run the demo, use the following steps</p> 
+  <p>See <a class="externalLink" href="http://redis.io/topics/quickstart">http://redis.io/topics/quickstart</a> on how to install redis server</p> 
+  <div class="source"> 
+   <pre>Start redis e.g:
+./redis-server --port 6379
+
+git clone https://git-wip-us.apache.org/repos/asf/helix.git
+cd helix
+git checkout helix-0.7.1
+cd recipes/task-execution
+mvn clean install package -DskipTests
+cd target/task-execution-pkg/bin
+chmod +x task-execution-demo.sh
+./task-execution-demo.sh 2181 localhost 6379
+
+</pre> 
+  </div> 
+  <p>Here's a visual representation of the DAG.</p> 
+  <div class="source"> 
+   <pre>                       +-----------------+       +----------------+
+                       |   filterImps    |       |  filterClicks  |
+                       | (parallelism=10)|       | (parallelism=5)|
+                       +----------+-----++       +-------+--------+
+                       |          |     |                |
+                       |          |     |                |
+                       |          |     |                |
+                       |          |     +-------&gt;--------v------------+
+      +--------------&lt;-+   +------v-------+    |  impClickJoin        |
+      |impCountsByGender   |impCountsByCountry | (parallelism=10)     |
+      |(parallelism=10)    |(parallelism=10)   ++-------------------+-+
+      +-----------+--+     +---+----------+     |                   |
+                  |            |                |                   |
+                  |            |                |                   |
+                  |            |       +--------v---------+       +-v-------------------+
+                  |            |       |clickCountsByGender       |clickCountsByCountry |
+                  |            |       |(parallelism=5)   |       |(parallelism=5)      |
+                  |            |       +----+-------------+       +---------------------+
+                  |            |            |                     |
+                  |            |            |                     |
+                  |            |            |                     |
+                  +-----&gt;+-----+&gt;-----------v----+&lt;---------------+
+                         | report                |
+                         |(parallelism=1)        |
+                         +-----------------------+
+
+</pre> 
+  </div> 
+  <p>(credit for above ascii art: <a class="externalLink" href="http://www.asciiflow.com">http://www.asciiflow.com</a>)</p> 
+  <div class="section"> 
+   <h4 id="Output">Output</h4> 
+   <div class="source"> 
+    <pre>Done populating dummy data
+Executing filter task for filterImps_3 for impressions_demo
+Executing filter task for filterImps_2 for impressions_demo
+Executing filter task for filterImps_0 for impressions_demo
+Executing filter task for filterImps_1 for impressions_demo
+Executing filter task for filterImps_4 for impressions_demo
+Executing filter task for filterClicks_3 for clicks_demo
+Executing filter task for filterClicks_1 for clicks_demo
+Executing filter task for filterImps_8 for impressions_demo
+Executing filter task for filterImps_6 for impressions_demo
+Executing filter task for filterClicks_2 for clicks_demo
+Executing filter task for filterClicks_0 for clicks_demo
+Executing filter task for filterImps_7 for impressions_demo
+Executing filter task for filterImps_5 for impressions_demo
+Executing filter task for filterClicks_4 for clicks_demo
+Executing filter task for filterImps_9 for impressions_demo
+Running AggTask for impCountsByGender_3 for filtered_impressions_demo gender
+Running AggTask for impCountsByGender_2 for filtered_impressions_demo gender
+Running AggTask for impCountsByGender_0 for filtered_impressions_demo gender
+Running AggTask for impCountsByGender_9 for filtered_impressions_demo gender
+Running AggTask for impCountsByGender_1 for filtered_impressions_demo gender
+Running AggTask for impCountsByGender_4 for filtered_impressions_demo gender
+Running AggTask for impCountsByCountry_4 for filtered_impressions_demo country
+Running AggTask for impCountsByGender_5 for filtered_impressions_demo gender
+Executing JoinTask for impClickJoin_2
+Running AggTask for impCountsByCountry_3 for filtered_impressions_demo country
+Running AggTask for impCountsByCountry_1 for filtered_impressions_demo country
+Running AggTask for impCountsByCountry_0 for filtered_impressions_demo country
+Running AggTask for impCountsByCountry_2 for filtered_impressions_demo country
+Running AggTask for impCountsByGender_6 for filtered_impressions_demo gender
+Executing JoinTask for impClickJoin_1
+Executing JoinTask for impClickJoin_0
+Executing JoinTask for impClickJoin_3
+Running AggTask for impCountsByGender_8 for filtered_impressions_demo gender
+Executing JoinTask for impClickJoin_4
+Running AggTask for impCountsByGender_7 for filtered_impressions_demo gender
+Running AggTask for impCountsByCountry_5 for filtered_impressions_demo country
+Running AggTask for impCountsByCountry_6 for filtered_impressions_demo country
+Executing JoinTask for impClickJoin_9
+Running AggTask for impCountsByCountry_8 for filtered_impressions_demo country
+Running AggTask for impCountsByCountry_7 for filtered_impressions_demo country
+Executing JoinTask for impClickJoin_5
+Executing JoinTask for impClickJoin_6
+Running AggTask for impCountsByCountry_9 for filtered_impressions_demo country
+Executing JoinTask for impClickJoin_8
+Executing JoinTask for impClickJoin_7
+Running AggTask for clickCountsByCountry_1 for joined_clicks_demo country
+Running AggTask for clickCountsByCountry_0 for joined_clicks_demo country
+Running AggTask for clickCountsByCountry_2 for joined_clicks_demo country
+Running AggTask for clickCountsByCountry_3 for joined_clicks_demo country
+Running AggTask for clickCountsByGender_1 for joined_clicks_demo gender
+Running AggTask for clickCountsByCountry_4 for joined_clicks_demo country
+Running AggTask for clickCountsByGender_3 for joined_clicks_demo gender
+Running AggTask for clickCountsByGender_2 for joined_clicks_demo gender
+Running AggTask for clickCountsByGender_4 for joined_clicks_demo gender
+Running AggTask for clickCountsByGender_0 for joined_clicks_demo gender
+Running reports task
+Impression counts per country
+{CANADA=1940, US=1958, CHINA=2014, UNKNOWN=2022, UK=1946}
+Click counts per country
+{US=24, CANADA=14, CHINA=26, UNKNOWN=14, UK=22}
+Impression counts per gender
+{F=3325, UNKNOWN=3259, M=3296}
+Click counts per gender
+{F=33, UNKNOWN=32, M=35}
+</pre> 
+   </div> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="../download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="../Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="../Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="../Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li>
+							<a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li>
+							<a href="service_discovery.html" title="Service Discovery">Service Discovery </a>
+						</li>
+						<li class="active">
+							<a href="#" title="Distributed task DAG Execution">Distributed task DAG Execution </a>
+						</li>
+						<li>
+							<a href="user_def_rebalancer.html" title="User-defined rebalancer">User-defined rebalancer </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.7.1-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='../js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/js/bootstrap.min.js"></script>
+	<script src="../js/lightbox.js"></script>
+	<script src="../js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="../js/jquery.ba-bbq.min.js"></script>
+
+	<script src="../js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file

Added: helix/site-content/0.7.1-docs/recipes/user_def_rebalancer.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.7.1-docs/recipes/user_def_rebalancer.html?rev=1624796&view=auto
==============================================================================
--- helix/site-content/0.7.1-docs/recipes/user_def_rebalancer.html (added)
+++ helix/site-content/0.7.1-docs/recipes/user_def_rebalancer.html Sun Sep 14 01:47:34 2014
@@ -0,0 +1,508 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2014-09-13
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix - Lock Manager with a User-Defined Rebalancer</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap.min.css" rel="stylesheet" />
+		<link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="../css/docs.css" rel="stylesheet" />
+		<link href="../css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="../css/lightbox.css" rel="stylesheet" />
+		
+		<link href="../css/site.css" rel="stylesheet" />
+		<link href="../css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-recipes-user_def_rebalancer project-071-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="../..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="../index.html" title="Helix 0.7.1 (beta)">Helix 0.7.1 (beta) </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../download.html" title="Download">Download </a></li>
+									<li><a href="../Building.html" title="Building">Building </a></li>
+									<li><a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="../Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="../Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown active">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li><a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li><a href="service_discovery.html" title="Service Discovery">Service Discovery </a></li>
+									<li><a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a></li>
+									<li class="active"><a href="" title="User-defined rebalancer">User-defined rebalancer </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../../" id="bannerLeft"><img src="../../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="../" title="Release 0.7.1 (beta)">Release 0.7.1 (beta) </a></li>
+				<li class="divider">/</li>
+				<li>Lock Manager with a User-Defined Rebalancer</li>
+				<li class="publishDate version-date pull-right">Last Published: 2014-09-13</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Lock_Manager_with_a_User-Defined_Rebalancer">Lock Manager with a User-Defined Rebalancer</h2>
+ </div> 
+ <p>Helix is able to compute node preferences and state assignments automatically using general-purpose algorithms. In many cases, a distributed system implementer may choose to instead define a customized approach to computing the location of replicas, the state mapping, or both in response to the addition or removal of participants. The following is an implementation of the <a href="./lock_manager.html">Distributed Lock Manager</a> that includes a user-defined rebalancer.</p> 
+ <div class="section"> 
+  <h3 id="Define_the_Cluster_and_Resource">Define the Cluster and Resource</h3> 
+  <p>The YAML file below fully defines the cluster and the locks. A lock can be in one of two states: locked and unlocked. Transitions can happen in either direction, and the locked is preferred. A resource in this example is the entire collection of locks to distribute. A partition is mapped to a lock; in this case that means there are 12 locks. These 12 locks will be distributed across 3 nodes. The constraints indicate that only one replica of a lock can be in the locked state at any given time. These locks can each only have a single holder, defined by a replica count of 1.</p> 
+  <p>Notice the rebalancer section of the definition. The mode is set to USER_DEFINED and the class name refers to the plugged-in rebalancer implementation that inherits from <a class="externalLink" href="http://helix.apache.org/javadocs/0.7.1/reference/org/apache/helix/controller/rebalancer/HelixRebalancer.html">HelixRebalancer</a>. This implementation is called whenever the state of the cluster changes, as is the case when participants are added or removed from the system.</p> 
+  <p>Location: <tt>helix/recipes/user-defined-rebalancer/src/main/resources/lock-manager-config.yaml</tt></p> 
+  <div class="source"> 
+   <pre>clusterName: lock-manager-custom-rebalancer # unique name for the cluster
+resources:
+  - name: lock-group # unique resource name
+    rebalancer: # we will provide our own rebalancer
+      mode: USER_DEFINED
+      class: org.apache.helix.userdefinedrebalancer.LockManagerRebalancer
+    partitions:
+      count: 12 # number of locks
+      replicas: 1 # number of simultaneous holders for each lock
+    stateModel:
+      name: lock-unlock # unique model name
+      states: [LOCKED, RELEASED, DROPPED] # the list of possible states
+      transitions: # the list of possible transitions
+        - name: Unlock
+          from: LOCKED
+          to: RELEASED
+        - name: Lock
+          from: RELEASED
+          to: LOCKED
+        - name: DropLock
+          from: LOCKED
+          to: DROPPED
+        - name: DropUnlock
+          from: RELEASED
+          to: DROPPED
+        - name: Undrop
+          from: DROPPED
+          to: RELEASED
+      initialState: RELEASED
+    constraints:
+      state:
+        counts: # maximum number of replicas of a partition that can be in each state
+          - name: LOCKED
+            count: &quot;1&quot;
+          - name: RELEASED
+            count: &quot;-1&quot;
+          - name: DROPPED
+            count: &quot;-1&quot;
+        priorityList: [LOCKED, RELEASED, DROPPED] # states in order of priority
+      transition: # transitions priority to enforce order that transitions occur
+        priorityList: [Unlock, Lock, Undrop, DropUnlock, DropLock]
+participants: # list of nodes that can acquire locks
+  - name: localhost_12001
+    host: localhost
+    port: 12001
+  - name: localhost_12002
+    host: localhost
+    port: 12002
+  - name: localhost_12003
+    host: localhost
+    port: 12003
+</pre> 
+  </div> 
+  <p>Then, Helix's YAMLClusterSetup tool can read in the configuration and bootstrap the cluster immediately:</p> 
+  <div class="source"> 
+   <pre>YAMLClusterSetup setup = new YAMLClusterSetup(zkAddress);
+InputStream input =
+    Thread.currentThread().getContextClassLoader()
+        .getResourceAsStream(&quot;lock-manager-config.yaml&quot;);
+YAMLClusterSetup.YAMLClusterConfig config = setup.setupCluster(input);
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Write_a_Rebalancer">Write a Rebalancer</h3> 
+  <p>Below is a full implementation of a rebalancer that extends <a class="externalLink" href="http://helix.apache.org/javadocs/0.7.1/reference/org/apache/helix/controller/rebalancer/HelixRebalancer.html">HelixRebalancer</a>. In this case, it simply throws out the previous resource assignment, computes the target node for as many partition replicas as can hold a lock in the LOCKED state (in this example, one), and assigns them the LOCKED state (which is at the head of the state preference list). Clearly a more robust implementation would likely examine the current ideal state to maintain current assignments, and the full state list to handle models more complicated than this one. However, for a simple lock holder implementation, this is sufficient.</p> 
+  <p>Location: <tt>helix/recipes/user-rebalanced-lock-manager/src/main/java/org/apache/helix/userdefinedrebalancer/LockManagerRebalancer.java</tt></p> 
+  <div class="source"> 
+   <pre>@Override
+public ResourceAssignment computeResourceMapping(IdealState idealState,
+    RebalancerConfig rebalancerConfig, ResourceAssignment prevAssignment, Cluster cluster,
+    ResourceCurrentState currentState) {
+  // Initialize an empty mapping of locks to participants
+  ResourceAssignment assignment = new ResourceAssignment(idealState.getResourceId());
+
+  // Get the list of live participants in the cluster
+  List&lt;ParticipantId&gt; liveParticipants =
+      new ArrayList&lt;ParticipantId&gt;(cluster.getLiveParticipantMap().keySet());
+
+  // Get the state model (should be a simple lock/unlock model) and the highest-priority state
+  StateModelDefId stateModelDefId = idealState.getStateModelDefId();
+  StateModelDefinition stateModelDef = cluster.getStateModelMap().get(stateModelDefId);
+  if (stateModelDef.getStatesPriorityList().size() &lt; 1) {
+    LOG.error(&quot;Invalid state model definition. There should be at least one state.&quot;);
+    return assignment;
+  }
+  State lockState = stateModelDef.getTypedStatesPriorityList().get(0);
+
+  // Count the number of participants allowed to lock each lock
+  String stateCount = stateModelDef.getNumParticipantsPerState(lockState);
+  int lockHolders = 0;
+  try {
+    // a numeric value is a custom-specified number of participants allowed to lock the lock
+    lockHolders = Integer.parseInt(stateCount);
+  } catch (NumberFormatException e) {
+    LOG.error(&quot;Invalid state model definition. The lock state does not have a valid count&quot;);
+    return assignment;
+  }
+
+  // Fairly assign the lock state to the participants using a simple mod-based sequential
+  // assignment. For instance, if each lock can be held by 3 participants, lock 0 would be held
+  // by participants (0, 1, 2), lock 1 would be held by (1, 2, 3), and so on, wrapping around the
+  // number of participants as necessary.
+  // This assumes a simple lock-unlock model where the only state of interest is which nodes have
+  // acquired each lock.
+  int i = 0;
+  for (PartitionId partition : idealState.getPartitionIdSet()) {
+    Map&lt;ParticipantId, State&gt; replicaMap = new HashMap&lt;ParticipantId, State&gt;();
+    for (int j = i; j &lt; i + lockHolders; j++) {
+      int participantIndex = j % liveParticipants.size();
+      ParticipantId participant = liveParticipants.get(participantIndex);
+      // enforce that a participant can only have one instance of a given lock
+      if (!replicaMap.containsKey(participant)) {
+        replicaMap.put(participant, lockState);
+      }
+    }
+    assignment.addReplicaMap(partition, replicaMap);
+    i++;
+  }
+  return assignment;
+}
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Start_up_the_Participants">Start up the Participants</h3> 
+  <p>Here is a lock class based on the newly defined lock-unlock transition handler so that the participant can receive callbacks on state transitions.</p> 
+  <p>Location: <tt>helix/recipes/user-rebalanced-lock-manager/src/main/java/org/apache/helix/userdefinedrebalancer/Lock.java</tt></p> 
+  <div class="source"> 
+   <pre>public class Lock extends TransitionHandler {
+  private String lockName;
+
+  public Lock(String lockName) {
+    this.lockName = lockName;
+  }
+
+  @Transition(from = &quot;RELEASED&quot;, to = &quot;LOCKED&quot;)
+  public void lock(Message m, NotificationContext context) {
+    System.out.println(context.getManager().getInstanceName() + &quot; acquired lock:&quot; + lockName);
+  }
+
+  @Transition(from = &quot;LOCKED&quot;, to = &quot;RELEASED&quot;)
+  public void release(Message m, NotificationContext context) {
+    System.out.println(context.getManager().getInstanceName() + &quot; releasing lock:&quot; + lockName);
+  }
+}
+</pre> 
+  </div> 
+  <p>Here is the factory to make the Lock class accessible.</p> 
+  <p>Location: <tt>helix/recipes/user-rebalanced-lock-manager/src/main/java/org/apache/helix/userdefinedrebalancer/LockFactory.java</tt></p> 
+  <div class="source"> 
+   <pre>public class LockFactory extends StateTransitionHandlerFactory&lt;Lock&gt; {
+  @Override
+  public Lock createNewStateModel(String lockName) {
+    return new Lock(lockName);
+  }
+}
+</pre> 
+  </div> 
+  <p>Finally, here is the factory registration and the start of the participant:</p> 
+  <div class="source"> 
+   <pre>participantManager =
+    HelixManagerFactory.getZKHelixManager(clusterName, participantName, InstanceType.PARTICIPANT,
+        zkAddress);
+participantManager.getStateMachineEngine().registerStateModelFactory(stateModelName,
+    new LockFactory());
+participantManager.connect();
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Start_up_the_Controller">Start up the Controller</h3> 
+  <div class="source"> 
+   <pre>controllerManager =
+    HelixControllerMain.startHelixController(zkAddress, config.clusterName, &quot;controller&quot;,
+        HelixControllerMain.STANDALONE);
+</pre> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Try_It_Out">Try It Out</h3> 
+  <div class="source"> 
+   <pre>git clone https://git-wip-us.apache.org/repos/asf/helix.git
+cd helix
+git checkout tags/helix-0.7.1
+mvn clean install package -DskipTests
+cd recipes/user-defined-rebalancer/target/user-defined-rebalancer-pkg/bin
+chmod +x *
+./lock-manager-demo.sh
+</pre> 
+  </div> 
+  <div class="section"> 
+   <h4 id="Output">Output</h4> 
+   <div class="source"> 
+    <pre>./lock-manager-demo
+STARTING localhost_12002
+STARTING localhost_12001
+STARTING localhost_12003
+STARTED localhost_12001
+STARTED localhost_12003
+STARTED localhost_12002
+localhost_12003 acquired lock:lock-group_4
+localhost_12002 acquired lock:lock-group_8
+localhost_12001 acquired lock:lock-group_10
+localhost_12001 acquired lock:lock-group_3
+localhost_12001 acquired lock:lock-group_6
+localhost_12003 acquired lock:lock-group_0
+localhost_12002 acquired lock:lock-group_5
+localhost_12001 acquired lock:lock-group_9
+localhost_12002 acquired lock:lock-group_2
+localhost_12003 acquired lock:lock-group_7
+localhost_12003 acquired lock:lock-group_11
+localhost_12002 acquired lock:lock-group_1
+lockName  acquired By
+======================================
+lock-group_0  localhost_12003
+lock-group_1  localhost_12002
+lock-group_10 localhost_12001
+lock-group_11 localhost_12003
+lock-group_2  localhost_12002
+lock-group_3  localhost_12001
+lock-group_4  localhost_12003
+lock-group_5  localhost_12002
+lock-group_6  localhost_12001
+lock-group_7  localhost_12003
+lock-group_8  localhost_12002
+lock-group_9  localhost_12001
+Stopping the first participant
+localhost_12001 Interrupted
+localhost_12002 acquired lock:lock-group_3
+localhost_12003 acquired lock:lock-group_6
+localhost_12003 acquired lock:lock-group_10
+localhost_12002 acquired lock:lock-group_9
+lockName  acquired By
+======================================
+lock-group_0  localhost_12003
+lock-group_1  localhost_12002
+lock-group_10 localhost_12003
+lock-group_11 localhost_12003
+lock-group_2  localhost_12002
+lock-group_3  localhost_12002
+lock-group_4  localhost_12003
+lock-group_5  localhost_12002
+lock-group_6  localhost_12003
+lock-group_7  localhost_12003
+lock-group_8  localhost_12002
+lock-group_9  localhost_12002
+</pre> 
+   </div> 
+   <p>Notice that the lock assignment directly follows the assignment generated by the user-defined rebalancer both initially and after a participant is removed from the system.</p> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="../download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="../Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="../releasenotes/release-0.7.1.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="../Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="../Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../../javadocs/0.7.1" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li>
+							<a href="rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li>
+							<a href="service_discovery.html" title="Service Discovery">Service Discovery </a>
+						</li>
+						<li>
+							<a href="task_dag_execution.html" title="Distributed task DAG Execution">Distributed task DAG Execution </a>
+						</li>
+						<li class="active">
+							<a href="#" title="User-defined rebalancer">User-defined rebalancer </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2014 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.7.1-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='../js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.2.2/js/bootstrap.min.js"></script>
+	<script src="../js/lightbox.js"></script>
+	<script src="../js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="../js/jquery.ba-bbq.min.js"></script>
+
+	<script src="../js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file



Mime
View raw message