helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jiajunw...@apache.org
Subject [helix] branch master updated: Adjust the auto rebalancer state assignment logic to reduce top state transition. (#986)
Date Wed, 06 May 2020 22:48:24 GMT
This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/master by this push:
     new a1aba60  Adjust the auto rebalancer state assignment logic to reduce top state transition.
(#986)
a1aba60 is described below

commit a1aba60a6701692f42ed650cae88827a26e93183
Author: Jiajun Wang <1803880+jiajunwang@users.noreply.github.com>
AuthorDate: Wed May 6 15:48:15 2020 -0700

    Adjust the auto rebalancer state assignment logic to reduce top state transition. (#986)
    
    The old state assignment logic assign the states to selected nodes according to the priority
of the current replica state that is on the instance. Moreover, the sorting algorithm is designed
to prioritize both current topstate and current secondary states equally. The result is that
we will have premature mastership handoff to a current seconardy state host before the real
desired master host is ready.
    For example,
    1. The current states are: [N1:M, N2:S, N3,S]
    2. The desired states are: [N4:M, N2:S, N1:S]
    3. Due to the sorting logic based on current states, we will have a transient preference
list ordered like: [N2, N1, N4]. In which case, the controller will assign master to N2 before
N4 has a slave state replica.
    4. When N4 finishes the Offline to Slave transition, the same sorting logic will sort
the preference list to be: [N4, N2, N1]. Then we have another mastership handoff.
    To be clear, we don't want step 3. But only the state transition in step 4.
    
    In this PR, we refactor the sorting logic so that it will only move the master whenever
the candidate has a "ready" state replica, in which case, only one mastership handoff happens.
---
 .../controller/rebalancer/AbstractRebalancer.java  | 78 +++++++++++-----------
 .../rebalancer/DelayedAutoRebalancer.java          |  2 +-
 .../rebalancer/TestAbstractRebalancer.java         |  6 +-
 ...bstractRebalancer.ComputeBestPossibleState.json | 27 ++++++++
 4 files changed, 71 insertions(+), 42 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
index 9416c7e..e85a2df 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java
@@ -305,45 +305,60 @@ public abstract class AbstractRebalancer<T extends BaseControllerDataProvider>
i
     Set<String> liveAndEnabled = new HashSet<>(liveInstances);
     liveAndEnabled.removeAll(disabledInstancesForPartition);
 
-    for (String state : statesPriorityList) {
-      // Use the the specially ordered preferenceList for choosing instance for top state.
-      if (state.equals(statesPriorityList.get(0))) {
-        List<String> preferenceListForTopState = new ArrayList<>(preferenceList);
-        Collections.sort(preferenceListForTopState,
-            new TopStatePreferenceListComparator(currentStateMap, stateModelDef));
-        preferenceList = preferenceListForTopState;
-      }
+    // Sort the instances based on replicas' state priority in the current state
+    List<String> sortedPreferenceList = new ArrayList<>(preferenceList);
+    sortedPreferenceList.sort(new StatePriorityComparator(currentStateMap, stateModelDef));
 
+    // Assign the state to the instances that appear in the preference list.
+    for (String state : statesPriorityList) {
       int stateCount =
           getStateCount(state, stateModelDef, liveAndEnabled.size(), preferenceList.size());
       for (String instance : preferenceList) {
         if (stateCount <= 0) {
-          break;
+          break; // continue assigning for the next state
+        }
+        if (assigned.contains(instance) || !liveAndEnabled.contains(instance)) {
+          continue; // continue checking for the next available instance
         }
-        if (!assigned.contains(instance) && liveAndEnabled.contains(instance)) {
-          if (HelixDefinedState.ERROR.toString().equals(currentStateMap.get(instance))) {
-            bestPossibleStateMap.put(instance, HelixDefinedState.ERROR.toString());
-          } else {
-            bestPossibleStateMap.put(instance, state);
-            stateCount--;
+        String proposedInstance = instance;
+        // Additional check and alternate the assignment for reducing top state handoff.
+        if (state.equals(stateModelDef.getTopState()) && !stateModelDef.getSecondTopStates()
+            .contains(currentStateMap.getOrDefault(instance, stateModelDef.getInitialState())))
{
+          // If the desired state is the top state, but the instance cannot be transited
to the
+          // top state in one hop, try to keep the top state on current host or a host with
a closer
+          // state.
+          for (String currentStatePrioritizedInstance : sortedPreferenceList) {
+            if (!assigned.contains(currentStatePrioritizedInstance) && liveAndEnabled
+                .contains(currentStatePrioritizedInstance)) {
+              proposedInstance = currentStatePrioritizedInstance;
+              break;
+            }
           }
-          assigned.add(instance);
+          // Note that if all the current top state instances are not assignable, then we
fallback
+          // to the default logic that assigning the state according to preference list order.
         }
+        // Assign the desired state to the proposed instance
+        if (HelixDefinedState.ERROR.toString().equals(currentStateMap.get(proposedInstance)))
{
+          bestPossibleStateMap.put(proposedInstance, HelixDefinedState.ERROR.toString());
+        } else {
+          bestPossibleStateMap.put(proposedInstance, state);
+          stateCount--;
+        }
+        assigned.add(proposedInstance);
       }
     }
-
     return bestPossibleStateMap;
   }
 
   /**
-   * Sorter for nodes that sorts according to the CurrentState of the partition. There are
only two priorities:
-   * (1) Top-state and second states have priority 0. (2) Other states(or no state) have
priority 1.
+   * Sorter for nodes that sorts according to the CurrentState of the partition.
    */
-  protected static class TopStatePreferenceListComparator implements Comparator<String>
{
-    protected final Map<String, String> _currentStateMap;
-    protected final StateModelDefinition _stateModelDef;
+  protected static class StatePriorityComparator implements Comparator<String> {
+    private final Map<String, String> _currentStateMap;
+    private final StateModelDefinition _stateModelDef;
 
-    public TopStatePreferenceListComparator(Map<String, String> currentStateMap, StateModelDefinition
stateModelDef) {
+    public StatePriorityComparator(Map<String, String> currentStateMap,
+        StateModelDefinition stateModelDef) {
       _currentStateMap = currentStateMap;
       _stateModelDef = stateModelDef;
     }
@@ -352,21 +367,8 @@ public abstract class AbstractRebalancer<T extends BaseControllerDataProvider>
i
     public int compare(String ins1, String ins2) {
       String state1 = _currentStateMap.get(ins1);
       String state2 = _currentStateMap.get(ins2);
-
-      String topState = _stateModelDef.getStatesPriorityList().get(0);
-      Set<String> preferredStates = new HashSet<String>(_stateModelDef.getSecondTopStates());
-      preferredStates.add(topState);
-
-      int p1 = 1;
-      int p2 = 1;
-
-      if (state1 != null && preferredStates.contains(state1)) {
-        p1 = 0;
-      }
-      if (state2 != null && preferredStates.contains(state2)) {
-        p2 = 0;
-      }
-
+      int p1 = state1 == null ? Integer.MAX_VALUE : _stateModelDef.getStatePriorityMap().get(state1);
+      int p2 = state2 == null ? Integer.MAX_VALUE : _stateModelDef.getStatePriorityMap().get(state2);
       return p1 - p2;
     }
   }
diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
index 53c1225..f4c95a6 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java
@@ -357,7 +357,7 @@ public class DelayedAutoRebalancer extends AbstractRebalancer<ResourceController
           .subList(0, subListSize <= instanceToAdd.size() ? subListSize : instanceToAdd.size()));
     }
 
-    // Make all intial state instance not in preference list to be dropped.
+    // Make all initial state instance not in preference list to be dropped.
     Map<String, String> currentMapWithPreferenceList = new HashMap<>(currentStateMap);
     currentMapWithPreferenceList.keySet().retainAll(preferenceList);
 
diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAbstractRebalancer.java
b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAbstractRebalancer.java
index a3c2d3f..0886768 100644
--- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAbstractRebalancer.java
+++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAbstractRebalancer.java
@@ -49,12 +49,12 @@ public class TestAbstractRebalancer {
           .setCurrentState("test", partition, instance, currentStateMap.get(instance));
     }
     Map<String, String> bestPossibleMap = rebalancer
-        .computeBestPossibleStateForPartition(new HashSet<String>(liveInstances),
+        .computeBestPossibleStateForPartition(new HashSet<>(liveInstances),
             BuiltInStateModelDefinitions.valueOf(stateModelName).getStateModelDefinition(),
-            preferenceList, currentStateOutput, new HashSet<String>(disabledInstancesForPartition),
+            preferenceList, currentStateOutput, new HashSet<>(disabledInstancesForPartition),
             new IdealState("test"), new ClusterConfig("TestCluster"), partition);
 
-    Assert.assertTrue(bestPossibleMap.equals(expectedBestPossibleMap));
+    Assert.assertEquals(bestPossibleMap, expectedBestPossibleMap);
   }
 
   @DataProvider(name = "TestComputeBestPossibleStateInput")
diff --git a/helix-core/src/test/resources/TestAbstractRebalancer.ComputeBestPossibleState.json
b/helix-core/src/test/resources/TestAbstractRebalancer.ComputeBestPossibleState.json
index 16d7cf0..8aac5c8 100644
--- a/helix-core/src/test/resources/TestAbstractRebalancer.ComputeBestPossibleState.json
+++ b/helix-core/src/test/resources/TestAbstractRebalancer.ComputeBestPossibleState.json
@@ -171,5 +171,32 @@
       "node_2": "SLAVE",
       "node_3": "SLAVE"
     }
+  },
+  {
+    "comment": "Rebalancer switched master to a new instance. Before the new instance ready,
keep the master at the original node to avoid extra mastership handoff.",
+    "stateModel": "MasterSlave",
+    "liveInstances": [
+      "node_1",
+      "node_2",
+      "node_3",
+      "node_4"
+    ],
+    "preferenceList": [
+      "node_4",
+      "node_2",
+      "node_1"
+    ],
+    "currentStateMap": {
+      "node_1": "MASTER",
+      "node_2": "SLAVE",
+      "node_3": "SLAVE"
+    },
+    "disabledInstancesForPartition": [],
+    "expectedBestPossibleStateMap": {
+      "node_1": "MASTER",
+      "node_2": "SLAVE",
+      "node_3": "DROPPED",
+      "node_4": "SLAVE"
+    }
   }
 ]


Mime
View raw message