helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jiajunw...@apache.org
Subject [helix] branch master updated: Don't output the best possible state calculate result if the result is not valid. (#1359)
Date Tue, 15 Sep 2020 17:42:22 GMT
This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new 67f4937  Don't output the best possible state calculate result if the result is not
valid. (#1359)
67f4937 is described below

commit 67f493760aa6d598dd0242996fe9b79c37085726
Author: Jiajun Wang <jjwang@linkedin.com>
AuthorDate: Tue Sep 15 10:42:16 2020 -0700

    Don't output the best possible state calculate result if the result is not valid. (#1359)
    
    The invalid result may mislead the following stages in the pipeline and cause problems.
For example, this change fixes the issue that resource monitor rebalance state may be inaccurately
reported.
---
 .../stages/BestPossibleStateCalcStage.java          | 21 ++++++++++++---------
 .../integration/TestAlertingRebalancerFailure.java  | 20 ++++++++++++++------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
index bca7a37..1c0d63a 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java
@@ -347,36 +347,39 @@ public class BestPossibleStateCalcStage extends AbstractBaseStage {
       LogUtil.logError(logger, _eventId, "Error computing assignment for resource " + resourceName
           + ". no rebalancer found. rebalancer: " + rebalancer + " mappingCalculator: "
           + mappingCalculator);
-    }
-
-    if (rebalancer != null && mappingCalculator != null) {
-      ResourceAssignment partitionStateAssignment = null;
+    } else {
       try {
         HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
         rebalancer.init(manager);
         idealState =
             rebalancer.computeNewIdealState(resourceName, idealState, currentStateOutput,
cache);
 
-        output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
+        // Check if calculation is done successfully
+        if (!checkBestPossibleStateCalculation(idealState)) {
+          LogUtil.logWarn(logger, _eventId,
+              "The calculated idealState is not valid, resource: " + resourceName);
+          return false;
+        }
 
         // Use the internal MappingCalculator interface to compute the final assignment
         // The next release will support rebalancers that compute the mapping from start
to finish
-        partitionStateAssignment = mappingCalculator
+        ResourceAssignment partitionStateAssignment = mappingCalculator
             .computeBestPossiblePartitionState(cache, idealState, resource, currentStateOutput);
 
         if (partitionStateAssignment == null) {
           LogUtil.logWarn(logger, _eventId,
-              "PartitionStateAssignment is null, resource: " + resourceName);
+              "The calculated partitionStateAssignment is null, resource: " + resourceName);
           return false;
         }
 
+        // Set the calculated result to the output.
+        output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
         for (Partition partition : resource.getPartitions()) {
           Map<String, String> newStateMap = partitionStateAssignment.getReplicaMap(partition);
           output.setState(resourceName, partition, newStateMap);
         }
 
-        // Check if calculation is done successfully
-        return checkBestPossibleStateCalculation(idealState);
+        return true;
       } catch (HelixException e) {
         // No eligible instance is found.
         LogUtil.logError(logger, _eventId, e.getMessage());
diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
index be5ea37..4caba32 100644
--- a/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
+++ b/helix-core/src/test/java/org/apache/helix/integration/TestAlertingRebalancerFailure.java
@@ -43,6 +43,7 @@ import org.apache.helix.model.ClusterConfig;
 import org.apache.helix.model.IdealState;
 import org.apache.helix.model.IdealState.RebalanceMode;
 import org.apache.helix.model.InstanceConfig;
+import org.apache.helix.model.builder.FullAutoModeISBuilder;
 import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
 import org.apache.helix.monitoring.mbeans.ResourceMonitor;
 import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier;
@@ -118,18 +119,25 @@ public class TestAlertingRebalancerFailure extends ZkStandAloneCMTestBase
{
 
   @Test
   public void testParticipantUnavailable() throws Exception {
-    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
-        BuiltInStateModelDefinitions.MasterSlave.name(), RebalanceMode.FULL_AUTO.name());
+    IdealState idealState = new FullAutoModeISBuilder(testDb)
+        .setStateModel(BuiltInStateModelDefinitions.MasterSlave.name())
+        .setStateModelFactoryName("DEFAULT").setNumPartitions(5).setNumReplica(3)
+        .setRebalancerMode(IdealState.RebalanceMode.FULL_AUTO)
+        .setRebalancerClass("org.apache.helix.controller.rebalancer.DelayedAutoRebalancer")
+        .setRebalanceStrategy(
+            "org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy").build();
+
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
     _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
-    ZkHelixClusterVerifier verifier = new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME)
-        .setZkClient(_gZkClient).setResources(new HashSet<>(Collections.singleton(testDb))).build();
+    ZkHelixClusterVerifier verifier =
+        new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkClient(_gZkClient)
+            .setResources(new HashSet<>(Collections.singleton(testDb))).build();
     Assert.assertTrue(verifier.verifyByPolling());
 
     // disable then enable the resource to ensure no rebalancing error is generated during
this
     // process
     _gSetupTool.dropResourceFromCluster(CLUSTER_NAME, testDb);
-    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, 5,
-        BuiltInStateModelDefinitions.MasterSlave.name(), RebalanceMode.FULL_AUTO.name());
+    _gSetupTool.addResourceToCluster(CLUSTER_NAME, testDb, idealState);
     _gSetupTool.rebalanceStorageCluster(CLUSTER_NAME, testDb, 3);
     Assert.assertTrue(verifier.verifyByPolling());
 


Mime
View raw message