helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject svn commit: r1847507 [10/23] - in /helix/site-content: ./ 0.6.1-incubating-docs/ 0.6.1-incubating-docs/recipes/ 0.6.1-incubating-docs/releasenotes/ 0.6.2-incubating-docs/ 0.6.2-incubating-docs/recipes/ 0.6.2-incubating-docs/releasenotes/ 0.6.3-docs/ 0....
Date Tue, 27 Nov 2018 01:02:10 GMT
Added: helix/site-content/0.8.3-docs/Features.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.8.3-docs/Features.html?rev=1847507&view=auto
==============================================================================
--- helix/site-content/0.8.3-docs/Features.html (added)
+++ helix/site-content/0.8.3-docs/Features.html Tue Nov 27 01:02:03 2018
@@ -0,0 +1,475 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2018-11-26
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix -   Features</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="./css/bootstrap.min.css" rel="stylesheet" />
+		<link href="./css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="./css/docs.css" rel="stylesheet" />
+		<link href="./css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="./css/lightbox.css" rel="stylesheet" />
+		
+		<link href="./css/site.css" rel="stylesheet" />
+		<link href="./css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-features project-083-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="index.html" title="Helix 0.8.3">Helix 0.8.3 </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="download.html" title="Download">Download </a></li>
+									<li><a href="Building.html" title="Building">Building </a></li>
+									<li><a href="releasenotes/release-0.8.3.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../javadocs/0.8.3" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="recipes/lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="recipes/rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li><a href="recipes/rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li><a href="recipes/service_discovery.html" title="Service discovery">Service discovery </a></li>
+									<li><a href="recipes/task_dag_execution.html" title="Distributed task DAG execution">Distributed task DAG execution </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../" id="bannerLeft"><img src="../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="./" title="Release 0.8.3">Release 0.8.3 </a></li>
+				<li class="divider">/</li>
+				<li>  Features</li>
+				<li class="publishDate version-date pull-right">Last Published: 2018-11-26</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<p></p> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Features">Features</h2>
+ </div> 
+ <div class="section"> 
+  <h3 id="CONFIGURING_IDEALSTATE">CONFIGURING IDEALSTATE</h3> 
+  <p>Read concepts page for definition of Idealstate.</p> 
+  <p>The placement of partitions in a DDS is very critical for reliability and scalability of the system. For example, when a node fails, it is important that the partitions hosted on that node are reallocated evenly among the remaining nodes. Consistent hashing is one such algorithm that can guarantee this. Helix by default comes with a variant of consistent hashing based of the RUSH algorithm. </p> 
+  <p>This means given a number of partitions, replicas and number of nodes Helix does the automatic assignment of partition to nodes such that</p> 
+  <ul> 
+   <li>Each node has the same number of partitions and replicas of the same partition do not stay on the same node.</li> 
+   <li>When a node fails, the partitions will be equally distributed among the remaining nodes</li> 
+   <li>When new nodes are added, the number of partitions moved will be minimized along with satisfying the above two criteria.</li> 
+  </ul> 
+  <p>Helix provides multiple ways to control the placement and state of a replica. </p> 
+  <div class="source"> 
+   <pre>            |AUTO REBALANCE|   AUTO     |   CUSTOM  |       
+            -----------------------------------------
+   LOCATION | HELIX        |  APP       |  APP      |
+            -----------------------------------------
+      STATE | HELIX        |  HELIX     |  APP      |
+            -----------------------------------------
+</pre> 
+  </div> 
+  <div class="section"> 
+   <h4 id="HELIX_EXECUTION_MODE">HELIX EXECUTION MODE</h4> 
+   <p>Idealstate is defined as the state of the DDS when all nodes are up and running and healthy. Helix uses this as the target state of the system and computes the appropriate transitions needed in the system to bring it to a stable state. </p> 
+   <p>Helix supports 3 different execution modes which allows application to explicitly control the placement and state of the replica.</p> 
+   <div class="section"> 
+    <h5 id="AUTO_REBALANCE">AUTO_REBALANCE</h5> 
+    <p>When the idealstate mode is set to AUTO_REBALANCE, Helix controls both the location of the replica along with the state. This option is useful for applications where creation of a replica is not expensive. Example</p> 
+    <div class="source"> 
+     <pre>{
+  &quot;id&quot; : &quot;MyResource&quot;,
+  &quot;simpleFields&quot; : {
+    &quot;IDEAL_STATE_MODE&quot; : &quot;AUTO_REBALANCE&quot;,
+    &quot;NUM_PARTITIONS&quot; : &quot;3&quot;,
+    &quot;REPLICAS&quot; : &quot;2&quot;,
+    &quot;STATE_MODEL_DEF_REF&quot; : &quot;MasterSlave&quot;,
+  }
+  &quot;listFields&quot; : {
+    &quot;MyResource_0&quot; : [],
+    &quot;MyResource_1&quot; : [],
+    &quot;MyResource_2&quot; : []
+  },
+  &quot;mapFields&quot; : {
+  }
+}
+</pre> 
+    </div> 
+    <p>If there are 3 nodes in the cluster, then Helix will internally compute the ideal state as </p> 
+    <div class="source"> 
+     <pre>{
+  &quot;id&quot; : &quot;MyResource&quot;,
+  &quot;simpleFields&quot; : {
+    &quot;NUM_PARTITIONS&quot; : &quot;3&quot;,
+    &quot;REPLICAS&quot; : &quot;2&quot;,
+    &quot;STATE_MODEL_DEF_REF&quot; : &quot;MasterSlave&quot;,
+  },
+  &quot;mapFields&quot; : {
+    &quot;MyResource_0&quot; : {
+      &quot;N1&quot; : &quot;MASTER&quot;,
+      &quot;N2&quot; : &quot;SLAVE&quot;,
+    },
+    &quot;MyResource_1&quot; : {
+      &quot;N2&quot; : &quot;MASTER&quot;,
+      &quot;N3&quot; : &quot;SLAVE&quot;,
+    },
+    &quot;MyResource_2&quot; : {
+      &quot;N3&quot; : &quot;MASTER&quot;,
+      &quot;N1&quot; : &quot;SLAVE&quot;,
+    }
+  }
+}
+</pre> 
+    </div> 
+    <p>Another typical example is evenly distributing a group of tasks among the currently alive processes. For example, if there are 60 tasks and 4 nodes, Helix assigns 15 tasks to each node. When one node fails Helix redistributes its 15 tasks to the remaining 3 nodes. Similarly, if a node is added, Helix re-allocates 3 tasks from each of the 4 nodes to the 5th node. </p> 
+   </div> 
+  </div> 
+  <div class="section"> 
+   <h4 id="AUTO">AUTO</h4> 
+   <p>When the idealstate mode is set to AUTO, Helix only controls STATE of the replicas where as the location of the partition is controlled by application. Example: The below idealstate indicates thats ‘MyResource_0’ must be only on node1 and node2. But gives the control of assigning the STATE to Helix.</p> 
+   <div class="source"> 
+    <pre>{
+  &quot;id&quot; : &quot;MyResource&quot;,
+  &quot;simpleFields&quot; : {
+    &quot;IDEAL_STATE_MODE&quot; : &quot;AUTO&quot;,
+    &quot;NUM_PARTITIONS&quot; : &quot;3&quot;,
+    &quot;REPLICAS&quot; : &quot;2&quot;,
+    &quot;STATE_MODEL_DEF_REF&quot; : &quot;MasterSlave&quot;,
+  }
+  &quot;listFields&quot; : {
+    &quot;MyResource_0&quot; : [node1, node2],
+    &quot;MyResource_1&quot; : [node2, node3],
+    &quot;MyResource_2&quot; : [node3, node1]
+  },
+  &quot;mapFields&quot; : {
+  }
+}
+</pre> 
+   </div> 
+   <p>In this mode when node1 fails, unlike in AUTO-REBALANCE mode the partition is not moved from node1 to others nodes in the cluster. Instead, Helix will decide to change the state of MyResource_0 in N2 based on the system constraints. For example, if a system constraint specified that there should be 1 Master and if the Master failed, then node2 will be made the new master. </p> 
+  </div> 
+  <div class="section"> 
+   <h4 id="CUSTOM">CUSTOM</h4> 
+   <p>Helix offers a third mode called CUSTOM, in which application can completely control the placement and state of each replica. Applications will have to implement an interface that Helix will invoke when the cluster state changes. Within this callback, the application can recompute the idealstate. Helix will then issue appropriate transitions such that Idealstate and Currentstate converges.</p> 
+   <div class="source"> 
+    <pre>{
+  &quot;id&quot; : &quot;MyResource&quot;,
+  &quot;simpleFields&quot; : {
+      &quot;IDEAL_STATE_MODE&quot; : &quot;CUSTOM&quot;,
+    &quot;NUM_PARTITIONS&quot; : &quot;3&quot;,
+    &quot;REPLICAS&quot; : &quot;2&quot;,
+    &quot;STATE_MODEL_DEF_REF&quot; : &quot;MasterSlave&quot;,
+  },
+  &quot;mapFields&quot; : {
+    &quot;MyResource_0&quot; : {
+      &quot;N1&quot; : &quot;MASTER&quot;,
+      &quot;N2&quot; : &quot;SLAVE&quot;,
+    },
+    &quot;MyResource_1&quot; : {
+      &quot;N2&quot; : &quot;MASTER&quot;,
+      &quot;N3&quot; : &quot;SLAVE&quot;,
+    },
+    &quot;MyResource_2&quot; : {
+      &quot;N3&quot; : &quot;MASTER&quot;,
+      &quot;N1&quot; : &quot;SLAVE&quot;,
+    }
+  }
+}
+</pre> 
+   </div> 
+   <p>For example, the current state of the system might be ‘MyResource_0’ -&gt; {N1:MASTER,N2:SLAVE} and the application changes the ideal state to ‘MyResource_0’ -&gt; {N1:SLAVE,N2:MASTER}. Helix will not blindly issue MASTER–&gt;SLAVE to N1 and SLAVE–&gt;MASTER to N2 in parallel since it might result in a transient state where both N1 and N2 are masters. Helix will first issue MASTER–&gt;SLAVE to N1 and after its completed it will issue SLAVE–&gt;MASTER to N2. </p> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="State_Machine_Configuration">State Machine Configuration</h3> 
+  <p>Helix comes with 3 default state models that are most commonly used. Its possible to have multiple state models in a cluster. Every resource that is added should have a reference to the state model. </p> 
+  <ul> 
+   <li>MASTER-SLAVE: Has 3 states OFFLINE,SLAVE,MASTER. Max masters is 1. Slaves will be based on the replication factor. Replication factor can be specified while adding the resource</li> 
+   <li>ONLINE-OFFLINE: Has 2 states OFFLINE and ONLINE. Very simple state model and most applications start off with this state model.</li> 
+   <li>LEADER-STANDBY:1 Leader and many stand bys. In general the standby’s are idle.</li> 
+  </ul> 
+  <p>Apart from providing the state machine configuration, one can specify the constraints of states and transitions.</p> 
+  <p>For example one can say Master:1. Max number of replicas in Master state at any time is 1. OFFLINE-SLAVE:5 Max number of Offline-Slave transitions that can happen concurrently in the system</p> 
+  <p>STATE PRIORITY Helix uses greedy approach to satisfy the state constraints. For example if the state machine configuration says it needs 1 master and 2 slaves but only 1 node is active, Helix must promote it to master. This behavior is achieved by providing the state priority list as MASTER,SLAVE.</p> 
+  <p>STATE TRANSITION PRIORITY Helix tries to fire as many transitions as possible in parallel to reach the stable state without violating constraints. By default Helix simply sorts the transitions alphabetically and fires as many as it can without violating the constraints. One can control this by overriding the priority order.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Config_management">Config management</h3> 
+  <p>Helix allows applications to store application specific properties. The configuration can have different scopes.</p> 
+  <ul> 
+   <li>Cluster</li> 
+   <li>Node specific</li> 
+   <li>Resource specific</li> 
+   <li>Partition specific</li> 
+  </ul> 
+  <p>Helix also provides notifications when any configs are changed. This allows applications to support dynamic configuration changes.</p> 
+  <p>See HelixManager.getConfigAccessor for more info</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Intra_cluster_messaging_api">Intra cluster messaging api</h3> 
+  <p>This is an interesting feature which is quite useful in practice. Often times, nodes in DDS requires a mechanism to interact with each other. One such requirement is a process of bootstrapping a replica.</p> 
+  <p>Consider a search system use case where the index replica starts up and it does not have an index. One of the commonly used solutions is to get the index from a common location or to copy the index from another replica. Helix provides a messaging api, that can be used to talk to other nodes in the system. The value added that Helix provides here is, message recipient can be specified in terms of resource, partition, state and Helix ensures that the message is delivered to all of the required recipients. In this particular use case, the instance can specify the recipient criteria as all replicas of P1. Since Helix is aware of the global state of the system, it can send the message to appropriate nodes. Once the nodes respond Helix provides the bootstrapping replica with all the responses.</p> 
+  <p>This is a very generic api and can also be used to schedule various periodic tasks in the cluster like data backups etc. System Admins can also perform adhoc tasks like on demand backup or execute a system command(like rm -rf ;-)) across all nodes.</p> 
+  <div class="source"> 
+   <pre>      ClusterMessagingService messagingService = manager.getMessagingService();
+      //CONSTRUCT THE MESSAGE
+      Message requestBackupUriRequest = new Message(
+          MessageType.USER_DEFINE_MSG, UUID.randomUUID().toString());
+      requestBackupUriRequest
+          .setMsgSubType(BootstrapProcess.REQUEST_BOOTSTRAP_URL);
+      requestBackupUriRequest.setMsgState(MessageState.NEW);
+      //SET THE RECIPIENT CRITERIA, All nodes that satisfy the criteria will receive the message
+      Criteria recipientCriteria = new Criteria();
+      recipientCriteria.setInstanceName(&quot;%&quot;);
+      recipientCriteria.setRecipientInstanceType(InstanceType.PARTICIPANT);
+      recipientCriteria.setResource(&quot;MyDB&quot;);
+      recipientCriteria.setPartition(&quot;&quot;);
+      //Should be processed only the process that is active at the time of sending the message. 
+      //This means if the recipient is restarted after message is sent, it will not be processed.
+      recipientCriteria.setSessionSpecific(true);
+      // wait for 30 seconds
+      int timeout = 30000;
+      //The handler that will be invoked when any recipient responds to the message.
+      BootstrapReplyHandler responseHandler = new BootstrapReplyHandler();
+      //This will return only after all recipients respond or after timeout.
+      int sentMessageCount = messagingService.sendAndWait(recipientCriteria,
+          requestBackupUriRequest, responseHandler, timeout);
+</pre> 
+  </div> 
+  <p>See HelixManager.getMessagingService for more info.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Application_specific_property_storage">Application specific property storage</h3> 
+  <p>There are several usecases where applications needs support for distributed data structures. Helix uses Zookeeper to store the application data and hence provides notifications when the data changes. One value add Helix provides is the ability to specify cache the data and also write through cache. This is more efficient than reading from ZK every time.</p> 
+  <p>See HelixManager.getHelixPropertyStore</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Throttling">Throttling</h3> 
+  <p>Since all state changes in the system are triggered through transitions, Helix can control the number of transitions that can happen in parallel. Some of the transitions may be light weight but some might involve moving data around which is quite expensive. Helix allows applications to set threshold on transitions. The threshold can be set at the multiple scopes.</p> 
+  <ul> 
+   <li>MessageType e.g STATE_TRANSITION</li> 
+   <li>TransitionType e.g SLAVE-MASTER</li> 
+   <li>Resource e.g database</li> 
+   <li>Node i.e per node max transitions in parallel.</li> 
+  </ul> 
+  <p>See HelixManager.getHelixAdmin.addMessageConstraint() </p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Health_monitoring_and_alerting">Health monitoring and alerting</h3> 
+  <p>This in currently in development mode, not yet productionized.</p> 
+  <p>Helix provides ability for each node in the system to report health metrics on a periodic basis. Helix supports multiple ways to aggregate these metrics like simple SUM, AVG, EXPONENTIAL DECAY, WINDOW. Helix will only persist the aggregated value. Applications can define threshold on the aggregate values according to the SLA’s and when the SLA is violated Helix will fire an alert. Currently Helix only fires an alert but eventually we plan to use this metrics to either mark the node dead or load balance the partitions. This feature will be valuable in for distributed systems that support multi-tenancy and have huge variation in work load patterns. Another place this can be used is to detect skewed partitions and rebalance the cluster.</p> 
+  <p>This feature is not yet stable and do not recommend to be used in production.</p> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Controller_deployment_modes">Controller deployment modes</h3> 
+  <p>Read Architecture wiki for more details on the Role of a controller. In simple words, it basically controls the participants in the cluster by issuing transitions.</p> 
+  <p>Helix provides multiple options to deploy the controller.</p> 
+  <div class="section"> 
+   <h4 id="STANDALONE">STANDALONE</h4> 
+   <p>Controller can be started as a separate process to manage a cluster. This is the recommended approach. How ever since one controller can be a single point of failure, multiple controller processes are required for reliability. Even if multiple controllers are running only one will be actively managing the cluster at any time and is decided by a leader election process. If the leader fails, another leader will resume managing the cluster.</p> 
+   <p>Even though we recommend this method of deployment, it has the drawback of having to manage an additional service for each cluster. See Controller As a Service option.</p> 
+  </div> 
+  <div class="section"> 
+   <h4 id="EMBEDDED">EMBEDDED</h4> 
+   <p>If setting up a separate controller process is not viable, then it is possible to embed the controller as a library in each of the participant. </p> 
+  </div> 
+  <div class="section"> 
+   <h4 id="CONTROLLER_AS_A_SERVICE">CONTROLLER AS A SERVICE</h4> 
+   <p>One of the cool feature we added in helix was use a set of controllers to manage a large number of clusters. For example if you have X clusters to be managed, instead of deploying X*3(3 controllers for fault tolerance) controllers for each cluster, one can deploy only 3 controllers. Each controller can manage X/3 clusters. If any controller fails the remaining two will manage X/2 clusters. At LinkedIn, we always deploy controllers in this mode. </p> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="releasenotes/release-0.8.3.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../javadocs/0.8.3" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="recipes/lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="recipes/rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li>
+							<a href="recipes/rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li>
+							<a href="recipes/service_discovery.html" title="Service discovery">Service discovery </a>
+						</li>
+						<li>
+							<a href="recipes/task_dag_execution.html" title="Distributed task DAG execution">Distributed task DAG execution </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2018 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.8.3-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='./js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="./js/bootstrap.min.js"></script>
+	<script src="./js/lightbox.js"></script>
+	<script src="./js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="./js/jquery.ba-bbq.min.js"></script>
+
+	<script src="./js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file

Added: helix/site-content/0.8.3-docs/JobExample.json
URL: http://svn.apache.org/viewvc/helix/site-content/0.8.3-docs/JobExample.json?rev=1847507&view=auto
==============================================================================
--- helix/site-content/0.8.3-docs/JobExample.json (added)
+++ helix/site-content/0.8.3-docs/JobExample.json Tue Nov 27 01:02:03 2018
@@ -0,0 +1,19 @@
+{
+  "id" : "TestJob",
+  "simpleFields": {
+    "JobID":"Job2",
+    "WorkflowID":"Workflow1"
+  },
+  "mapFields":{
+     "Task1" : {
+       "TASK_ID":"Task1",
+       "TASK_COMMAND":"Backup",
+       "TASK_TARGET_PARTITION":"p1"
+     },
+     "Task2" : { 
+       "TASK_ID":"Task2",
+       "TASK_COMMAND":"ReIndex",
+     }
+  },
+  "listFields":{}
+}

Added: helix/site-content/0.8.3-docs/Metrics.html
URL: http://svn.apache.org/viewvc/helix/site-content/0.8.3-docs/Metrics.html?rev=1847507&view=auto
==============================================================================
--- helix/site-content/0.8.3-docs/Metrics.html (added)
+++ helix/site-content/0.8.3-docs/Metrics.html Tue Nov 27 01:02:03 2018
@@ -0,0 +1,845 @@
+
+<!DOCTYPE html>
+<!--
+ Generated by Apache Maven Doxia at 2018-11-26
+ Rendered using Maven Reflow Skin 1.0.0 (http://andriusvelykis.github.com/reflow-maven-skin)
+-->
+<html  xml:lang="en" lang="en">
+
+	<head>
+		<meta charset="UTF-8" />
+		<title>Apache Helix - Helix Monitoring Metrics</title>
+		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
+		<meta name="description" content="" />
+		<meta http-equiv="content-language" content="en" />
+
+		<link href="./css/bootstrap.min.css" rel="stylesheet" />
+		<link href="./css/bootstrap-responsive.min.css" rel="stylesheet" />
+		<link href="./css/docs.css" rel="stylesheet" />
+		<link href="./css/reflow-skin.css" rel="stylesheet" />
+		
+		
+		<link href="./css/lightbox.css" rel="stylesheet" />
+		
+		<link href="./css/site.css" rel="stylesheet" />
+		<link href="./css/print.css" rel="stylesheet" media="print" />
+		
+		<!-- Le HTML5 shim, for IE6-8 support of HTML5 elements -->
+		<!--[if lt IE 9]>
+			<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+		<![endif]-->
+		
+<script type="text/javascript">var _gaq = _gaq || [];
+        _gaq.push(['_setAccount', 'UA-3211522-12']);
+        _gaq.push(['_trackPageview']);
+
+        (function() {
+        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+        })();</script>
+	</head>
+
+	<body class="page-metrics project-083-docs" data-spy="scroll" data-offset="60" data-target="#toc-scroll-target">
+
+		<div class="navbar navbar-fixed-top">
+			<div class="navbar-inner">
+				<div class="container">
+					<a class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+						<span class="icon-bar"></span>
+					</a>
+					<a class="brand" href="..">Apache Helix</a>
+					<div class="nav-collapse">
+						<ul class="nav pull-right">
+							<li><a href="index.html" title="Helix 0.8.3">Helix 0.8.3 </a></li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Get Helix <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="download.html" title="Download">Download </a></li>
+									<li><a href="Building.html" title="Building">Building </a></li>
+									<li><a href="releasenotes/release-0.8.3.html" title="Release Notes">Release Notes </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Hands-On <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="Quickstart.html" title="Quick Start">Quick Start </a></li>
+									<li><a href="Tutorial.html" title="Tutorial">Tutorial </a></li>
+									<li><a href="../javadocs/0.8.3" title="Javadocs">Javadocs </a></li>
+								</ul>
+							</li>
+							<li class="dropdown">
+								<a href="#" class="dropdown-toggle" data-toggle="dropdown">Recipes <b class="caret"></b></a>
+								<ul class="dropdown-menu">
+									<li><a href="recipes/lock_manager.html" title="Distributed lock manager">Distributed lock manager </a></li>
+									<li><a href="recipes/rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a></li>
+									<li><a href="recipes/rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a></li>
+									<li><a href="recipes/service_discovery.html" title="Service discovery">Service discovery </a></li>
+									<li><a href="recipes/task_dag_execution.html" title="Distributed task DAG execution">Distributed task DAG execution </a></li>
+								</ul>
+							</li>
+						</ul>
+					</div><!--/.nav-collapse -->
+				</div>
+			</div>
+		</div>
+		
+	<div class="container">
+	
+	<!-- Masthead
+	================================================== -->
+	<header class="jumbotron subhead">
+		<div class="row" id="banner">
+			<div class="span12">
+				<div class="pull-left">
+					<a href="../" id="bannerLeft"><img src="../images/helix-logo.jpg" alt='"''"' /></a>
+					<p class="lead">A cluster management framework for partitioned and replicated distributed resources</p>
+				</div>
+				<div class="pull-right">
+					<a href="http://www.apache.org/" id="bannerRight"><img src="../images/feather_small.gif" alt='"''"' /></a>
+				</div>
+			</div>
+		</div>
+		<div>
+			<ul class="breadcrumb">
+				<li><a href="../" title="Apache Helix">Apache Helix </a></li>
+				<li class="divider">/</li>
+				<li><a href="./" title="Release 0.8.3">Release 0.8.3 </a></li>
+				<li class="divider">/</li>
+				<li>Helix Monitoring Metrics</li>
+				<li class="publishDate version-date pull-right">Last Published: 2018-11-26</li>
+			</ul>
+		</div>
+	</header>
+
+	<div class="main-body">
+	<div class="row">
+		<div class="span12">
+			<div class="body-content">
+<!-- -
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License. --> 
+<div class="section"> 
+ <div class="page-header">
+  <h2 id="Helix_Monitoring_Metrics">Helix Monitoring Metrics</h2>
+ </div> 
+ <p>Helix monitoring metrics are exposed as the MBeans attributes. The MBeans are registered based on instance role.</p> 
+ <p>The easiest way to see the available metrics is using jconsole and point it at a running Helix instance. This will allow browsing all metrics with JMX.</p> 
+ <p>Note that if not mentioned in the attribute name, all attributes are gauge by default.</p> 
+ <div class="section"> 
+  <h3 id="Metrics_on_Both_Controller_and_Participant">Metrics on Both Controller and Participant</h3> 
+  <div class="section"> 
+   <h4 id="MBean_ZkClientMonitor">MBean ZkClientMonitor</h4> 
+   <p>ObjectName: “HelixZkClient:type=[client-type],key=[specified-client-key],PATH=[zk-client-listening-path]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>ReadCounter</td> 
+      <td>Zk Read counter. Which could be used to identify unusually high/low ZK traffic</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>WriteCounter</td> 
+      <td>Same as above</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>ReadBytesCounter</td> 
+      <td>Same as above</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>WriteBytesCounter</td> 
+      <td>Same as above</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>StateChangeEventCounter</td> 
+      <td>Zk connection state change counter. Which could be used to identify ZkClient unstable connection</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>DataChangeEventCounter</td> 
+      <td>Zk node data change counter. which could be used to identify unusual high/low ZK events occurrence or slow event processing</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PendingCallbackGauge</td> 
+      <td>Number of the pending Zk callbacks.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>TotalCallbackCounter</td> 
+      <td>Number of total received Zk callbacks.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>TotalCallbackHandledCounter</td> 
+      <td>Number of total handled Zk callbacks.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ReadTotalLatencyCounter</td> 
+      <td>Total read latency in ms.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>WriteTotalLatencyCounter</td> 
+      <td>Total write latency in ms.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>WriteFailureCounter</td> 
+      <td>Total write failures.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>ReadFailureCounter</td> 
+      <td>Total read failures.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ReadLatencyGauge</td> 
+      <td>Histogram (with all statistic data) of read latency.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>WriteLatencyGauge</td> 
+      <td>Histogram (with all statistic data) of write latency.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ReadBytesGauge</td> 
+      <td>Histogram (with all statistic data) of read bytes of single Zk access.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>WriteBytesGauge</td> 
+      <td>Histogram (with all statistic data) of write bytes of single Zk access.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_HelixCallbackMonitor">MBean HelixCallbackMonitor</h4> 
+   <p>ObjectName: “HelixCallback:Type=[callback-type],Key=[cluster-name].[instance-name],Change=[callback-change-type]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>Counter</td> 
+      <td>Zk Callback counter for each Helix callback type.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>UnbatchedCounter</td> 
+      <td>Unbatched Zk Callback counter for each helix callback type.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>LatencyCounter</td> 
+      <td>Callback handler latency counter in ms.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>LatencyGauge</td> 
+      <td>Histogram (with all statistic data) of Callback handler latency.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_MessageQueueMonitor">MBean MessageQueueMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],messageQueue=[instance-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>MessageQueueBacklog</td> 
+      <td>Get the message queue size</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Metrics_on_Controller_only">Metrics on Controller only</h3> 
+  <div class="section"> 
+   <h4 id="MBean_ClusterStatusMonitor">MBean ClusterStatusMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>DisabledInstancesGauge</td> 
+      <td>Current number of disabled instances</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>DisabledPartitionsGauge</td> 
+      <td>Current number of disabled partitions number</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>DownInstanceGauge</td> 
+      <td>Current down instances number</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>InstanceMessageQueueBacklog</td> 
+      <td>The sum of all message queue sizes for instances in this cluster</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>InstancesGauge</td> 
+      <td>Current live instances number</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MaxMessageQueueSizeGauge</td> 
+      <td>The maximum message queue size across all instances including controller</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>RebalanceFailureGauge</td> 
+      <td>None 0 if previous rebalance failed unexpectedly. The Gauge will be set every time rebalance is done.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>RebalanceFailureCounter</td> 
+      <td>The number of failures during rebalance pipeline.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>Enabled</td> 
+      <td>1 if cluster is enabled, otherwise 0</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>Maintenance</td> 
+      <td>1 if cluster is in maintenance mode, otherwise 0</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>Paused</td> 
+      <td>1 if cluster is paused, otherwise 0</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_ClusterEventMonitor">MBean ClusterEventMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],eventName=ClusterEvent,phaseName=[event-handling-phase]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>TotalDurationCounter</td> 
+      <td>Total event process duration for each stage.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MaxSingleDurationGauge</td> 
+      <td>Max event process duration for each stage within the recent hour.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>EventCounter</td> 
+      <td>The count of processed event in each stage.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>DurationGauge</td> 
+      <td>Histogram (with all statistic data) of event process duration for each stage.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_InstanceMonitor">MBean InstanceMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],instanceName=[instance-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>Online</td> 
+      <td>This instance is Online (1) or Offline (0)</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>Enabled</td> 
+      <td>This instance is Enabled (1) or Disabled (0)</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>TotalMessageReceived</td> 
+      <td>Number of messages sent to this instance by controller</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>DisabledPartitions</td> 
+      <td>Get the total disabled partitions number for this instance</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_ResourceMonitor">MBean ResourceMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],resourceName=[resource-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>PartitionGauge</td> 
+      <td>Get number of partitions of the resource in best possible ideal state for this resource</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ErrorPartitionGauge</td> 
+      <td>Get the number of current partitions in ERORR state for this resource</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>DifferenceWithIdealStateGauge</td> 
+      <td>Get the number of how many replicas’ current state are different from ideal state for this resource</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MissingTopStatePartitionGauge</td> 
+      <td>Get the number of partitions do not have top state for this resource</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>ExternalViewPartitionGauge</td> 
+      <td>Get number of partitions in ExternalView for this resource</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>TotalMessageReceived</td> 
+      <td>Get number of messages sent to this resource by controller</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>LoadRebalanceThrottledPartitionGauge</td> 
+      <td>Get number of partitions that need load rebalance but were throttled.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>RecoveryRebalanceThrottledPartitionGauge</td> 
+      <td>Get number of partitions that need recovery rebalance but were throttled.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PendingLoadRebalancePartitionGauge</td> 
+      <td>Get number of partitions that have pending load rebalance requests.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>PendingRecoveryRebalancePartitionGauge</td> 
+      <td>Get number of partitions that have pending recovery rebalance requests.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MissingReplicaPartitionGauge</td> 
+      <td>Get number of partitions that have replica number smaller than expected.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MissingMinActiveReplicaPartitionGauge</td> 
+      <td>Get number of partitions that have replica number smaller than the minimum requirement.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MaxSinglePartitionTopStateHandoffDurationGauge</td> 
+      <td>Get the max duration recorded when the top state is missing in any single partition.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>FailedTopStateHandoffCounter</td> 
+      <td>Get the number of total top state transition failure.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>SucceededTopStateHandoffCounter</td> 
+      <td>Get the number of total top state transition done successfully.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>SuccessfulTopStateHandoffDurationCounter</td> 
+      <td>Get the total duration of all top state transitions.</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PartitionTopStateHandoffDurationGauge</td> 
+      <td>Histogram (with all statistic data) of top state transition duration.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_PerInstanceResourceMonitor">MBean PerInstanceResourceMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],instanceName=[instance-name],resourceName=[resource-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>PartitionGauge</td> 
+      <td>Get number of partitions of the resource in best possible ideal state for this resource on specific instance</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_JobMonitor">MBean JobMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],jobType=[job-type]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>SuccessfulJobCount</td> 
+      <td>Get number of the succeeded jobs</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>FailedJobCount</td> 
+      <td>Get number of failed jobs</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>AbortedJobCount</td> 
+      <td>Get number of the aborted jobs</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ExistingJobGauge</td> 
+      <td>Get number of existing jobs registered</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>QueuedJobGauge</td> 
+      <td>Get numbers of queued jobs, which are not running jobs</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>RunningJobGauge</td> 
+      <td>Get numbers of running jobs</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MaximumJobLatencyGauge</td> 
+      <td>Get maximum latency of jobs running time. It will be cleared every hour</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>JobLatencyCount</td> 
+      <td>Get total job latency counter.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_WorkflowMonitor">MBean WorkflowMonitor</h4> 
+   <p>ObjectName: “ClusterStatus:cluster=[cluster-name],workflowType=[workflow-type]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>SuccessfulWorkflowCount</td> 
+      <td>Get number of succeeded workflows</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>FailedWorkflowCount</td> 
+      <td>Get number of failed workflows</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>FailedWorkflowGauge</td> 
+      <td>Get number of current failed workflows</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ExistingWorkflowGauge</td> 
+      <td>Get number of current existing workflows</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>QueuedWorkflowGauge</td> 
+      <td>Get number of queued but not started workflows</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>RunningWorkflowGauge</td> 
+      <td>Get number of running workflows</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>WorkflowLatencyCount</td> 
+      <td>Get workflow latency count</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MaximumWorkflowLatencyGauge</td> 
+      <td>Get maximum workflow latency gauge. It will be reset in 1 hour.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+ </div> 
+ <div class="section"> 
+  <h3 id="Metrics_on_Participant_only">Metrics on Participant only</h3> 
+  <div class="section"> 
+   <h4 id="MBean_StateTransitionStatMonitor">MBean StateTransitionStatMonitor</h4> 
+   <p>ObjectName: “CLMParticipantReport:Cluster=[cluster-name],Resource=[resource-name],Transition=[transaction-id]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>TotalStateTransitionGauge</td> 
+      <td>Get the number of total state transitions</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>TotalFailedTransitionGauge</td> 
+      <td>Get the number of total failed state transitions</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>TotalSuccessTransitionGauge</td> 
+      <td>Get the number of total succeeded state transitions</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MeanTransitionLatency</td> 
+      <td>Get the average state transition latency (from message read to finish)</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MaxTransitionLatency</td> 
+      <td>Get the maximum state transition latency</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MinTransitionLatency</td> 
+      <td>Get the minimum state transition latency</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PercentileTransitionLatency</td> 
+      <td>Get the percentile of state transitions latency</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MeanTransitionExecuteLatency</td> 
+      <td>Get the average execution latency of state transition (from task started to finish)</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MaxTransitionExecuteLatency</td> 
+      <td>Get the maximum execution latency of state transition</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>MinTransitionExecuteLatency</td> 
+      <td>Get the minimum execution latency of state transition</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PercentileTransitionExecuteLatency</td> 
+      <td>Get the percentile of execution latency of state transitions</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_ThreadPoolExecutorMonitor">MBean ThreadPoolExecutorMonitor</h4> 
+   <p>ObjectName: “HelixThreadPoolExecutor:Type=[threadpool-type]” (threadpool-type in Message.MessageType, BatchMessageExecutor, Task)</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>ThreadPoolCoreSizeGauge</td> 
+      <td>Thread pool size is as configured. Aggregate total thread pool size for the whole cluster.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>ThreadPoolMaxSizeGauge</td> 
+      <td>Same as above</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>NumOfActiveThreadsGauge</td> 
+      <td>Number of running threads.</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>QueueSizeGauge</td> 
+      <td>Queue size. Could be used to identify if too many HelixTask blocked in participant.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_MessageLatencyMonitor">MBean MessageLatencyMonitor</h4> 
+   <p>ObjectName: “CLMParticipantReport:ParticipantName=[instance-name],MonitorType=MessageLatencyMonitor”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>TotalMessageCount</td> 
+      <td>Total message count</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>TotalMessageLatency</td> 
+      <td>Total message latency in ms</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>MessagelatencyGauge</td> 
+      <td>Histogram (with all statistic data) of message processing latency.</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+  <div class="section"> 
+   <h4 id="MBean_ParticipantMessageMonitor">MBean ParticipantMessageMonitor</h4> 
+   <p>ObjectName: “CLMParticipantReport:ParticipantName=[instance-name]”</p> 
+   <table border="0" class="bodyTable table table-striped table-hover"> 
+    <thead> 
+     <tr class="a"> 
+      <th>Attributes</th> 
+      <th>Description</th> 
+     </tr> 
+    </thead> 
+    <tbody> 
+     <tr class="b"> 
+      <td>ReceivedMessages</td> 
+      <td>Number of received messages</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>DiscardedMessages</td> 
+      <td>Number of discarded messages</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>CompletedMessages</td> 
+      <td>Number of completed messages</td> 
+     </tr> 
+     <tr class="a"> 
+      <td>FailedMessages</td> 
+      <td>Number of failed messages</td> 
+     </tr> 
+     <tr class="b"> 
+      <td>PendingMessages</td> 
+      <td>Number of pending messages to be processed</td> 
+     </tr> 
+    </tbody> 
+   </table> 
+  </div> 
+ </div> 
+</div>
+			</div>
+		</div>
+	</div>
+	</div>
+
+	</div><!-- /container -->
+	
+	<!-- Footer
+	================================================== -->
+	<footer class="well">
+		<div class="container">
+			<div class="row">
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Get Helix</li>
+						<li>
+							<a href="download.html" title="Download">Download </a>
+						</li>
+						<li>
+							<a href="Building.html" title="Building">Building </a>
+						</li>
+						<li>
+							<a href="releasenotes/release-0.8.3.html" title="Release Notes">Release Notes </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Hands-On</li>
+						<li>
+							<a href="Quickstart.html" title="Quick Start">Quick Start </a>
+						</li>
+						<li>
+							<a href="Tutorial.html" title="Tutorial">Tutorial </a>
+						</li>
+						<li>
+							<a href="../javadocs/0.8.3" title="Javadocs">Javadocs </a>
+						</li>
+					</ul>
+				</div>
+				<div class="span3 bottom-nav">
+					<ul class="nav nav-list">
+						<li class="nav-header">Recipes</li>
+						<li>
+							<a href="recipes/lock_manager.html" title="Distributed lock manager">Distributed lock manager </a>
+						</li>
+						<li>
+							<a href="recipes/rabbitmq_consumer_group.html" title="Rabbit MQ consumer group">Rabbit MQ consumer group </a>
+						</li>
+						<li>
+							<a href="recipes/rsync_replicated_file_store.html" title="Rsync replicated file store">Rsync replicated file store </a>
+						</li>
+						<li>
+							<a href="recipes/service_discovery.html" title="Service discovery">Service discovery </a>
+						</li>
+						<li>
+							<a href="recipes/task_dag_execution.html" title="Distributed task DAG execution">Distributed task DAG execution </a>
+						</li>
+					</ul>
+				</div>
+			</div>
+		</div>
+	</footer>
+		
+	<div class="container subfooter">
+		<div class="row">
+			<div class="span12">
+				<p class="pull-right"><a href="#">Back to top</a></p>
+				<p class="copyright">Copyright &copy;2018 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All Rights Reserved.</p>
+				<p><a href="http://github.com/andriusvelykis/reflow-maven-skin" title="Reflow Maven skin">Reflow Maven skin</a> by <a href="http://andrius.velykis.lt" target="_blank" title="Andrius Velykis">Andrius Velykis</a>.</p>
+		
+<div class="row span16">
+  <div>Apache Helix, Apache, the Apache feather logo, and the Apache Helix project logos are trademarks of The Apache Software Foundation.
+        All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+  <a href="http://helix.apache.org/0.8.3-docs/privacy-policy.html">Privacy Policy</a>
+</div>			</div>
+		</div>
+	</div>
+
+	<!-- Le javascript
+	================================================== -->
+	<!-- Placed at the end of the document so the pages load faster -->
+
+	<!-- Fallback jQuery loading from Google CDN:
+	     http://stackoverflow.com/questions/1014203/best-way-to-use-googles-hosted-jquery-but-fall-back-to-my-hosted-library-on-go -->
+	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+	<script type="text/javascript">
+		if (typeof jQuery == 'undefined')
+		{
+			document.write(unescape("%3Cscript src='./js/jquery-1.8.3.min.js' type='text/javascript'%3E%3C/script%3E"));
+		}
+	</script>
+	
+	<script src="./js/bootstrap.min.js"></script>
+	<script src="./js/lightbox.js"></script>
+	<script src="./js/jquery.smooth-scroll.min.js"></script>
+	<!-- back button support for smooth scroll -->
+	<script src="./js/jquery.ba-bbq.min.js"></script>
+
+	<script src="./js/reflow-skin.js"></script>
+	
+	</body>
+</html>
\ No newline at end of file



Mime
View raw message