This is an automated email from the ASF dual-hosted git repository.
jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git
The following commit(s) were added to refs/heads/master by this push:
new 67f4937 Don't output the best possible state calculate result if the result is not
valid. (#1359)
67f4937 is described below
commit 67f493760aa6d598dd0242996fe9b79c37085726
Author: Jiajun Wang <jjwang@linkedin.com>
AuthorDate: Tue Sep 15 10:42:16 2020 -0700
Don't output the best possible state calculate result if the result is not valid. (#1359)
The invalid result may mislead the following stages in the pipeline and cause problems.
For example, this change fixes the issue that resource monitor rebalance state may be inaccurately
reported.
---
.../stages/BestPossibleStateCalcStage.java | 21 ++++++++++++---------
.../integration/TestAlertingRebalancerFailure.java | 20 ++++++++++++++------
2 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index bca7a37..1c0d63a 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -347,36 +347,39 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
LogUtil.logError(logger, _eventId, "Error computing assignment for resource " + resourceName
+ ". no rebalancer found. rebalancer: " + rebalancer + " mappingCalculator: "
+ mappingCalculator);
- }
-
- if (rebalancer != null && mappingCalculator != null) {
- ResourceAssignment partitionStateAssignment = null;
+ } else {
try {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
rebalancer.init(manager);
idealState =
rebalancer.computeNewIdealState(resourceName, idealState, currentStateOutput,
cache);
- output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
+ // Check if calculation is done successfully
+ if (!checkBestPossibleStateCalculation(idealState)) {
+ LogUtil.logWarn(logger, _eventId,
+ "The calculated idealState is not valid, resource: " + resourceName);
+ return false;
+ }
// Use the internal MappingCalculator interface to compute the final assignment
// The next release will support rebalancers that compute the mapping from start
to finish
- partitionStateAssignment = mappingCalculator
+ ResourceAssignment partitionStateAssignment = mappingCalculator
.computeBestPossiblePartitionState(cache, idealState, resource, currentStateOutput);
if (partitionStateAssignment == null) {
LogUtil.logWarn(logger, _eventId,
- "PartitionStateAssignment is null, resource: " + resourceName);
+ "The calculated partitionStateAssignment is null, resource: " + resourceName);
return false;
}
+ // Set the calculated result to the output.
+ output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
for (Partition partition : resource.getPartitions()) {
Map<String, String> newStateMap = partitionStateAssignment.getReplicaMap(partition);
output.setState(resourceName, partition, newStateMap);
}
- // Check if calculation is done successfully
- return checkBestPossibleStateCalculation(idealState);
+ return true;
} catch (HelixException e) {
// No eligible instance is found.
LogUtil.logError(logger, _eventId, e.getMessage());
diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
index be5ea37..4caba32 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
@@ -43,6 +43,7 @@ import org.apache.helix.model.ClusterConfig;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.IdealState.RebalanceMode;
import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.builder.FullAutoModeISBuilder;
import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
import org.apache.helix.monitoring.mbeans.ResourceMonitor;
import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
@@ -118,18 +119,25 @@ public class TestAlertingRebalancerFailure extends ZkStandAloneCMTestBase
{
@Test
public void testParticipantUnavailable() throws Exception {
- _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
- BuiltInStateModelDefinitions.MasterSlave.name(), RebalanceMode.FULL_AUTO.name());
+ IdealState idealState = new FullAutoModeISBuilder(testDb)
+ .setStateModel(BuiltInStateModelDefinitions.MasterSlave.name())
+ .setStateModelFactoryName("DEFAULT").setNumPartitions(5).setNumReplica(3)
+ .setRebalancerMode(IdealState.RebalanceMode.FULL_AUTO)
+ .setRebalancerClass("org.apache.helix.controller.rebalancer.DelayedAutoRebalancer")
+ .setRebalanceStrategy(
+ "org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy").build();
+
+ _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
_gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
- ZkHelixClusterVerifier verifier = new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME)
- .setZkClient(_gZkClient).setResources(new HashSet<>(Collections.singleton(testDb))).build();
+ ZkHelixClusterVerifier verifier =
+ new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient)
+ .setResources(new HashSet<>(Collections.singleton(testDb))).build();
Assert.assertTrue(verifier.verifyByPolling());
// disable then enable the resource to ensure no rebalancing error is generated during
this
// process
_gSetupTool.dropResourceFromCluster(CLUSTER_NAME, testDb);
- _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
- BuiltInStateModelDefinitions.MasterSlave.name(), RebalanceMode.FULL_AUTO.name());
+ _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
_gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
Assert.assertTrue(verifier.verifyByPolling());
|