Repository: helix
Updated Branches:
refs/heads/master 43555ff0d -> 45009e2c4
Fix unstable TestControllerLeadershipChange
Project: http://git-wip-us.apache.org/repos/asf/helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/helix/commit/45009e2c
Tree: http://git-wip-us.apache.org/repos/asf/helix/tree/45009e2c
Diff: http://git-wip-us.apache.org/repos/asf/helix/diff/45009e2c
Branch: refs/heads/master
Commit: 45009e2c48289fce825075e276aebc064ef195c2
Parents: 43555ff
Author: Harry Zhang <hrzhang@linkedin.com>
Authored: Thu Nov 1 17:50:09 2018 -0700
Committer: Harry Zhang <hrzhang@linkedin.com>
Committed: Fri Nov 2 10:58:07 2018 -0700
----------------------------------------------------------------------
.../TestControllerLeadershipChange.java | 44 ++++++++++++--------
1 file changed, 26 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/helix/blob/45009e2c/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
index 6c0236f..f497894 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestControllerLeadershipChange.java
@@ -33,6 +33,8 @@ import org.apache.helix.integration.manager.MockParticipantManager;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.LiveInstance;
import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
+import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
+import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -52,6 +54,10 @@ public class TestControllerLeadershipChange extends ZkTestBase {
// Create cluster
_gSetupTool.addCluster(clusterName, true);
+ // Create cluster verifier
+ ZkHelixClusterVerifier clusterVerifier =
+ new BestPossibleExternalViewVerifier.Builder(clusterName).setZkClient(_gZkClient).build();
+
// Create participant
_gSetupTool.addInstanceToCluster(clusterName, instanceName);
MockParticipantManager participant =
@@ -72,20 +78,20 @@ public class TestControllerLeadershipChange extends ZkTestBase {
// Rebalance Resource
_gSetupTool
.rebalanceResource(clusterName, resourceName, numReplica);
+
// Wait for rebalance
- Thread.sleep(2000);
+ Assert.assertTrue(clusterVerifier.verifyByPolling());
// Trigger missing top state in manager1
participant.syncStop();
- Thread.sleep(2000);
+ Thread.sleep(1000);
// Starting manager2
HelixManager manager2 = HelixManagerFactory
.getZKHelixManager(clusterName, clusterName + "-manager2", InstanceType.CONTROLLER,
ZK_ADDR);
manager2.connect();
- Assert.assertFalse(manager2.isLeader());
// Set leader to manager2
setLeader(manager2);
@@ -93,30 +99,33 @@ public class TestControllerLeadershipChange extends ZkTestBase {
Assert.assertFalse(manager1.isLeader());
Assert.assertTrue(manager2.isLeader());
- // Make resource top state to come back
- participant = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
- participant.syncStart();
-
// Wait for rebalance
- Thread.sleep(2000);
+ Assert.assertTrue(clusterVerifier.verify());
+
+ Thread.sleep(1000);
setLeader(manager1);
Assert.assertTrue(manager1.isLeader());
Assert.assertFalse(manager2.isLeader());
+ // Make resource top state to come back by restarting participant
+ participant = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
+ participant.syncStart();
+
+
_gSetupTool.rebalanceResource(clusterName, resourceName, numReplica);
- // Wait for manager1 to update
- Thread.sleep(2000);
+ Assert.assertTrue(clusterVerifier.verifyByPolling());
- // Resource lost top state, and manager1 lost leadership for 4000ms, because manager1
will
+ // Resource lost top state, and manager1 lost leadership for 2000ms, because manager1
will
// clean monitoring cache after re-gaining leadership, so max value of hand off duration
should
// not have such a large value
Assert.assertTrue((long) beanServer
.getAttribute(resourceMBeanObjectName, "PartitionTopStateHandoffDurationGauge.Max")
< 500);
+
}
- private void setLeader(HelixManager manager) {
+ private void setLeader(HelixManager manager) throws Exception {
System.out.println("Setting controller " + manager.getInstanceName() + " as leader");
HelixDataAccessor accessor = manager.getHelixDataAccessor();
final LiveInstance leader = new LiveInstance(manager.getInstanceName());
@@ -125,12 +134,11 @@ public class TestControllerLeadershipChange extends ZkTestBase {
leader.setHelixVersion(manager.getVersion());
// Delete the current controller leader node so it will trigger leader election
- accessor.getBaseDataAccessor().remove(PropertyPathBuilder.controllerLeader(manager.getClusterName()),
AccessOption.EPHEMERAL);
-
- // No matter who gets leadership, force the given manager to become leader
- // Note there is theoretically a racing condition that GenericHelixController.onControllerChange()
- // will not catch this new value when it's double checking leadership, but it's stable
enough
- accessor.getBaseDataAccessor().set(PropertyPathBuilder.controllerLeader(manager.getClusterName()),
leader.getRecord(), AccessOption.EPHEMERAL);
+ while (!manager.isLeader()) {
+ accessor.getBaseDataAccessor()
+ .remove(PropertyPathBuilder.controllerLeader(manager.getClusterName()), AccessOption.EPHEMERAL);
+ Thread.sleep(50);
+ }
}
private ObjectName getResourceMonitorObjectName(String clusterName, String resourceName)
|