helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hu...@apache.org
Subject [helix] 21/44: Add util for checking per instance level health and partition level health
Date Sat, 25 May 2019 01:19:55 GMT
This is an automated email from the ASF dual-hosted git repository.

hulee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 5459e8f848df1e30110f1c3ec32f3e30ca813274
Author: Junkai Xue <jxue@linkedin.com>
AuthorDate: Wed Apr 10 17:18:30 2019 -0700

    Add util for checking per instance level health and partition level health
    
    Customized health check including user customized per instance check which ioslated from
other instances.
    
    In addition to per instance level check, partition level check should have complete scope
crossing instances which hold sibling partitions. For this partition check is to guarantee
shuting down current check instance can have health replicas to hold top state.
    
    RB=1627813
    BUG=HELIX-1776
    G=helix-reviewers
    A=hulee
    
    Signed-off-by: Hunter Lee <hulee@linkedin.com>
---
 .../apache/helix/util/InstanceValidationUtil.java  | 78 ++++++++----------
 .../rest/server/service/InstanceServiceImpl.java   | 16 ++++
 .../helix/rest/server/TestPerInstanceAccessor.java | 15 ----
 .../server/util/TestInstanceValidationUtil.java    | 92 ++++++++++++++++++++++
 4 files changed, 142 insertions(+), 59 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
index dba8f94..a9692d8 100644
--- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
+++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java
@@ -19,11 +19,13 @@ package org.apache.helix.util;
  * under the License.
  */
 
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import java.util.stream.Collectors;
 import org.apache.helix.AccessOption;
 import org.apache.helix.ConfigAccessor;
 import org.apache.helix.HelixDataAccessor;
@@ -204,55 +206,43 @@ public class InstanceValidationUtil {
   }
 
   /**
-   * Check the overall health status for instance including:
-   * 1. Per instance health status with application customized key-value entries
-   * 2. Sibling partitions (replicas for same partition holding on different node
-   * health status for the entire cluster.
-   * @param configAccessor
-   * @param clustername
-   * @param hostName
-   * @param customizedInputs
+   * Perform sibling node partition health check
    * @param partitionHealthMap
    * @return
    */
-  public static boolean checkCustomizedHealthStatusForInstance(ConfigAccessor configAccessor,
-      String clustername, String hostName, Map<String, String> customizedInputs,
-      Map<String, Map<String, String>> partitionHealthMap, Map<String, String>
instanceHealthMap) {
-    boolean isHealthy = true;
-    RESTConfig restConfig = configAccessor.getRESTConfig(clustername);
-    // If user customized URL is not ready, return true as the check
-    if (restConfig == null || restConfig.getCustomizedHealthURL() == null) {
-      return isHealthy;
-    }
-    // TODO : 1. Call REST with customized URL
-    // 2. Parse mapping result with string -> boolean value and return out for per instance
-    // 3. Check sibling nodes for partition health
-    isHealthy =
-        perInstanceHealthCheck(instanceHealthMap) || perPartitionHealthCheck(partitionHealthMap);
-
-    return isHealthy;
-  }
-
-  /**
-   * Fetch the health map based on health type: per instance or per partition
-   * Accessor can used for fetching data from ZK for per partition level.
-   * @param URL
-   * @param accessor
-   * @param healthStatusType
-   * @return
-   */
-  public static Map<String, Map<String, String>> getHealthMapBasedOnType(String
URL,
-      HelixDataAccessor accessor, HealthStatusType healthStatusType) {
-    return null;
-  }
+  public static List<String> perPartitionHealthCheck(List<ExternalView> externalViews,
+      Map<String, Map<String, Boolean>> partitionHealthMap, String instanceName,
+      HelixDataAccessor accessor) {
+    List<String> unhealthyPartitions = new ArrayList<>();
+
+    for (ExternalView externalView : externalViews) {
+      StateModelDefinition stateModelDefinition = accessor
+          .getProperty(accessor.keyBuilder().stateModelDef(externalView.getStateModelDefRef()));
+      for (String partition : externalView.getPartitionSet()) {
+        Map<String, String> stateMap = externalView.getStateMap(partition);
+        // Only check if instance holds top state
+        if (stateMap.containsKey(instanceName) && stateMap.get(instanceName)
+            .equals(stateModelDefinition.getTopState())) {
+          for (String siblingInstance : stateMap.keySet()) {
+            // Skip this self check
+            if (siblingInstance.equals(instanceName)) {
+              continue;
+            }
 
-  protected static boolean perInstanceHealthCheck(Map<String, String> statusMap) {
-    return true;
-  }
+            // We are checking sibling partition healthy status. So if partition health does
not
+            // exist or it is not healthy. We should mark this partition is unhealthy.
+            if (!partitionHealthMap.containsKey(siblingInstance) || !partitionHealthMap
+                .get(siblingInstance).containsKey(partition)
+                || !partitionHealthMap.get(siblingInstance).get(partition)) {
+              unhealthyPartitions.add(partition);
+              break;
+            }
+          }
+        }
+      }
+    }
 
-  protected static boolean perPartitionHealthCheck(
-      Map<String, Map<String, String>> partitionHealthMap) {
-    return true;
+    return unhealthyPartitions;
   }
 
   /**
diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java
b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java
index ff12678..1ad4069 100644
--- a/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java
+++ b/helix-rest/src/main/java/org/apache/helix/rest/server/service/InstanceServiceImpl.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import java.util.stream.Collectors;
 import org.apache.helix.ConfigAccessor;
 import org.apache.helix.HelixDataAccessor;
 import org.apache.helix.HelixException;
@@ -155,4 +156,19 @@ public class InstanceServiceImpl implements InstanceService {
         StoppableCheck.mergeStoppableChecks(helixStoppableCheck, customStoppableCheck);
     return stoppableCheck;
   }
+
+  /**
+   * Perform customized single instance health check map filtering
+   *
+   * Input map is user customized health out put. It will be HEALTH_ENTRY_KEY -> true/false
+   * @param statusMap
+   * @return
+   */
+  private Map<String, Boolean> perInstanceHealthCheck(Map<String, Boolean> statusMap)
{
+    if (statusMap != null && !statusMap.isEmpty()) {
+      statusMap = statusMap.entrySet().stream().filter(entry -> !entry.getValue())
+          .collect(Collectors.toMap(map -> map.getKey(), map -> map.getValue()));
+    }
+    return statusMap;
+  }
 }
diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java
b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java
index 199c82b..cbc80ee 100644
--- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java
+++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java
@@ -391,19 +391,4 @@ public class TestPerInstanceAccessor extends AbstractTestClass {
         .expectedReturnStatusCode(Response.Status.NOT_FOUND.getStatusCode())
         .format(CLUSTER_NAME, instanceName).post(this, entity);
   }
-
-  @Test(dependsOnMethods = "checkUpdateFails")
-  public void testCustomizedChecks() {
-    // TODO: This is fake testing. Only validate it returns true value of this function.
-    // For future, we need test: 1. mock the input of per participant API result to test
validate logic
-    //                           2. mock the input of per partition API result to test the
sibling
-    //                              check logic
-    System.out.println("Start test :" + TestHelper.getTestMethodName());
-    String instanceName = "TestInstance";
-    Assert.assertTrue(InstanceValidationUtil
-        .checkCustomizedHealthStatusForInstance(_configAccessor, CLUSTER_NAME, instanceName,
-            Collections.EMPTY_MAP, Collections.EMPTY_MAP, Collections.EMPTY_MAP));
-  }
-
-
 }
diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtil.java
b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtil.java
new file mode 100644
index 0000000..cbcd034
--- /dev/null
+++ b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtil.java
@@ -0,0 +1,92 @@
+package org.apache.helix.rest.server.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.MasterSlaveSMD;
+import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.util.InstanceValidationUtil;
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.*;
+
+public class TestInstanceValidationUtil{
+  private static final String RESOURCE_NAME = "TestResource";
+  private static final String TEST_CLUSTER = "TestCluster";
+
+  @Test
+  public void testPartitionLevelCheck() {
+    List<ExternalView> externalViews = new ArrayList<>(Arrays.asList(prepareExternalView()));
+    Mock mock = new Mock();
+    HelixDataAccessor accessor = mock.dataAccessor;
+
+    when(mock.dataAccessor.keyBuilder())
+        .thenReturn(new PropertyKey.Builder(TEST_CLUSTER));
+    when(mock.dataAccessor
+        .getProperty(new PropertyKey.Builder(TEST_CLUSTER).stateModelDef(MasterSlaveSMD.name)))
+        .thenReturn(mock.stateModel);
+    when(mock.stateModel.getTopState()).thenReturn("MASTER");
+    List<String> failedPartitions = InstanceValidationUtil
+        .perPartitionHealthCheck(externalViews, preparePartitionStateMap(), "h2", accessor);
+
+    Assert.assertTrue(failedPartitions.size() == 1);
+    Assert.assertEquals(failedPartitions.iterator().next(), "p2");
+  }
+
+  private ExternalView prepareExternalView() {
+    ExternalView externalView = new ExternalView(RESOURCE_NAME);
+    externalView.getRecord()
+        .setSimpleField(ExternalView.ExternalViewProperty.STATE_MODEL_DEF_REF.toString(),
+            MasterSlaveSMD.name);
+    externalView.setState("p1", "h1", "MASTER");
+    externalView.setState("p1", "h2", "SLAVE");
+    externalView.setState("p1", "h3", "SLAVE");
+
+    externalView.setState("p2", "h1", "SLAVE");
+    externalView.setState("p2", "h2", "MASTER");
+    externalView.setState("p2", "h3", "SLAVE");
+
+    externalView.setState("p3", "h1", "SLAVE");
+    externalView.setState("p3", "h2", "MASTER");
+    externalView.setState("p3", "h3", "SLAVE");
+
+    return externalView;
+  }
+
+  private Map<String, Map<String, Boolean>> preparePartitionStateMap() {
+    Map<String, Map<String, Boolean>> partitionStateMap = new HashMap<>();
+    partitionStateMap.put("h1", new HashMap<>());
+    partitionStateMap.put("h2", new HashMap<>());
+    partitionStateMap.put("h3", new HashMap<>());
+
+    // h1 holds master for p1 is unhealthy should not impact decision of shut down h2
+    // But h2 holds master for p2, shutdown h2 may cause unhealthy master on h3.
+    partitionStateMap.get("h1").put("p1", false);
+    partitionStateMap.get("h1").put("p2", true);
+    partitionStateMap.get("h1").put("p3", true);
+
+    partitionStateMap.get("h2").put("p1", true);
+    partitionStateMap.get("h2").put("p2", true);
+    partitionStateMap.get("h2").put("p3", true);
+
+    partitionStateMap.get("h3").put("p1", true);
+    partitionStateMap.get("h3").put("p2", false);
+    partitionStateMap.get("h3").put("p3", true);
+
+    return partitionStateMap;
+  }
+
+  private final class Mock {
+    private HelixDataAccessor dataAccessor = mock(HelixDataAccessor.class);
+    private StateModelDefinition stateModel = mock(StateModelDefinition.class);
+
+    Mock() {
+    }
+  }
+}


Mime
View raw message