helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h...@apache.org
Subject [helix] 01/13: Add java api for enable/disable cluster pause mode (#1740)
Date Fri, 16 Jul 2021 21:03:08 GMT
This is an automated email from the ASF dual-hosted git repository.

hzlu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/helix.git

commit 113c9335761fbe34f10914003c1e529c91d68527
Author: Huizhi Lu <5187721+huizhilu@users.noreply.github.com>
AuthorDate: Wed May 19 15:45:11 2021 -0700

    Add java api for enable/disable cluster pause mode (#1740)
    
    Cluster pause mode feature is going to be added. This commit adds java api to set cluster
pause mode: void setClusterManagementMode(ClusterManagementModeRequest request);
---
 .../src/main/java/org/apache/helix/HelixAdmin.java |  14 +++
 .../api/exceptions/HelixConflictException.java     |  33 +++++++
 .../helix/api/status/ClusterManagementMode.java    |  70 ++++++++++++++
 .../api/status/ClusterManagementModeRequest.java   | 102 +++++++++++++++++++++
 .../org/apache/helix/manager/zk/ZKHelixAdmin.java  |  80 +++++++++++++++-
 .../java/org/apache/helix/model/LiveInstance.java  |  28 +++++-
 .../java/org/apache/helix/model/PauseSignal.java   |  42 ++++++++-
 .../apache/helix/manager/zk/TestZkHelixAdmin.java  |  70 ++++++++++++++
 .../java/org/apache/helix/mock/MockHelixAdmin.java |   6 ++
 .../org/apache/helix/model/TestLiveInstance.java   |   8 ++
 10 files changed, 449 insertions(+), 4 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
index 3c456bb..d403571 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java
@@ -37,6 +37,8 @@ import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.ResourceConfig;
 import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.api.status.ClusterManagementMode;
+import org.apache.helix.api.status.ClusterManagementModeRequest;
 
 /*
  * Helix cluster management
@@ -368,6 +370,18 @@ public interface HelixAdmin {
   boolean isInMaintenanceMode(String clusterName);
 
   /**
+   * Requests to put a cluster into a management mode
+   * {@link ClusterManagementMode.Type}. When this method returns,
+   * it means the signal has been successfully sent, but it does not mean the cluster has
+   * fully entered the mode. Because the cluster can take some time to complete the request.
+   * <p>
+   * To check the cluster management mode status, call {@link #getClusterManagementMode(String)}.
+   *
+   * @param request request to set the cluster management mode. {@link ClusterManagementModeRequest}
+   */
+  void setClusterManagementMode(ClusterManagementModeRequest request);
+
+  /**
    * Reset a list of partitions in error state for an instance
    * The partitions are assume to be in error state and reset will bring them from error
    * to initial state. An error to initial state transition is required for reset.
diff --git a/helix-core/src/main/java/org/apache/helix/api/exceptions/HelixConflictException.java
b/helix-core/src/main/java/org/apache/helix/api/exceptions/HelixConflictException.java
new file mode 100644
index 0000000..c626155
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/api/exceptions/HelixConflictException.java
@@ -0,0 +1,33 @@
+package org.apache.helix.api.exceptions;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.HelixException;
+
+/**
+ * Occurs when a conflict with a previous successful write is detected. This generally occurs
when
+ * a write request is conflicted with the existing data. Eg. if a cluster is already in cluster
+ * pause mode, a request of enabling maintenance mode is a conflict.
+ */
+public class HelixConflictException extends HelixException {
+  public HelixConflictException(String message) {
+    super(message);
+  }
+}
diff --git a/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementMode.java
b/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementMode.java
new file mode 100644
index 0000000..d7a1637
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementMode.java
@@ -0,0 +1,70 @@
+package org.apache.helix.api.status;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Represents the management mode of the cluster:
+ * 1. what type of mode it targets to be;
+ * 2. what progress status it is now.
+ */
+public class ClusterManagementMode {
+    /** Represents  */
+    public enum Type {
+        /** Cluster is not in any pause or maintenance mode */
+        NORMAL,
+
+        /**
+         * Puts a cluster into pause mode, which will pause controller and participants.
+         * This can be used to retain the cluster state.
+         */
+        CLUSTER_PAUSE,
+
+        /** Pause controller only, but not participants. */
+        CONTROLLER_PAUSE,
+
+        /** Put cluster into maintenance mode. */
+        MAINTENANCE
+    }
+
+    /** Current status of the cluster mode */
+    public enum Status {
+        /** Cluster is in progress to the target {@link Type} of mode */
+        IN_PROGRESS,
+
+        /** Cluster is fully stable in the target {@link Type} of mode */
+        COMPLETED
+    }
+
+    private final Type mode;
+    private final Status status;
+
+    public ClusterManagementMode(Type mode, Status status) {
+        this.mode = mode;
+        this.status = status;
+    }
+
+    public Status getStatus() {
+        return status;
+    }
+
+    public Type getMode() {
+        return mode;
+    }
+}
diff --git a/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementModeRequest.java
b/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementModeRequest.java
new file mode 100644
index 0000000..dd2fe58
--- /dev/null
+++ b/helix-core/src/main/java/org/apache/helix/api/status/ClusterManagementModeRequest.java
@@ -0,0 +1,102 @@
+package org.apache.helix.api.status;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Represents a request to set the cluster management mode {@link ClusterManagementMode}
+ */
+public class ClusterManagementModeRequest {
+  private final ClusterManagementMode.Type _mode;
+  private final String _clusterName;
+  private final String _reason;
+  private final boolean _cancelPendingST;
+
+  public ClusterManagementMode.Type getMode() {
+    return _mode;
+  }
+
+  public String getClusterName() {
+    return _clusterName;
+  }
+
+  public String getReason() {
+    return _reason;
+  }
+
+  public boolean isCancelPendingST() {
+    return _cancelPendingST;
+  }
+
+  public ClusterManagementModeRequest(Builder builder) {
+    _mode = builder.mode;
+    _clusterName = builder.clusterName;
+    _reason = builder.reason;
+    _cancelPendingST = builder.cancelPendingST;
+  }
+
+  public static Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static final class Builder {
+    private ClusterManagementMode.Type mode;
+    private String clusterName;
+    private String reason = "";
+    private boolean cancelPendingST;
+
+    public Builder withMode(ClusterManagementMode.Type mode) {
+      this.mode = mode;
+      return this;
+    }
+
+    public Builder withClusterName(String clusterName) {
+      this.clusterName = clusterName;
+      return this;
+    }
+
+    public Builder withReason(String reason) {
+      this.reason = reason;
+      return this;
+    }
+
+    /**
+     * If mode is not CLUSTER_PAUSE, this should not be set to true.
+     *
+     * @param cancelPendingST whether or not cancel pending ST for CLUSTER_PAUSE mode.
+     * @return {@link Builder}
+     */
+    public Builder withCancelPendingST(boolean cancelPendingST) {
+      this.cancelPendingST = cancelPendingST;
+      return this;
+    }
+
+    public ClusterManagementModeRequest build() {
+      validate();
+      return new ClusterManagementModeRequest(this);
+    }
+
+    private void validate() {
+      Preconditions.checkNotNull(mode, "Mode not set");
+      Preconditions.checkNotNull(clusterName, "Cluster name not set");
+    }
+  }
+}
diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
index 62840b6..2545139 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java
@@ -25,6 +25,7 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
+import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -51,9 +52,9 @@ import org.apache.helix.PropertyKey;
 import org.apache.helix.PropertyPathBuilder;
 import org.apache.helix.PropertyType;
 import org.apache.helix.SystemPropertyKeys;
+import org.apache.helix.api.exceptions.HelixConflictException;
+import org.apache.helix.api.status.ClusterManagementMode;
 import org.apache.helix.api.topology.ClusterTopology;
-import org.apache.helix.controller.rebalancer.DelayedAutoRebalancer;
-import org.apache.helix.controller.rebalancer.strategy.CrushEdRebalanceStrategy;
 import org.apache.helix.controller.rebalancer.strategy.RebalanceStrategy;
 import org.apache.helix.controller.rebalancer.util.WagedValidationUtil;
 import org.apache.helix.controller.rebalancer.waged.WagedRebalancer;
@@ -80,6 +81,7 @@ import org.apache.helix.model.ParticipantHistory;
 import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.ResourceConfig;
 import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.api.status.ClusterManagementModeRequest;
 import org.apache.helix.msdcommon.exception.InvalidRoutingDataException;
 import org.apache.helix.tools.DefaultIdealStateCalculator;
 import org.apache.helix.util.HelixUtil;
@@ -93,6 +95,7 @@ import org.apache.helix.zookeeper.impl.client.FederatedZkClient;
 import org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory;
 import org.apache.helix.zookeeper.routing.RoutingDataManager;
 import org.apache.helix.zookeeper.zkclient.DataUpdater;
+import org.apache.helix.zookeeper.zkclient.NetworkUtil;
 import org.apache.helix.zookeeper.zkclient.exception.ZkException;
 import org.apache.helix.zookeeper.zkclient.exception.ZkNoNodeException;
 import org.apache.zookeeper.KeeperException;
@@ -498,6 +501,79 @@ public class ZKHelixAdmin implements HelixAdmin {
   }
 
   @Override
+  public void setClusterManagementMode(ClusterManagementModeRequest request) {
+    ClusterManagementMode.Type mode = request.getMode();
+    String clusterName = request.getClusterName();
+    String reason = request.getReason();
+
+    // TODO: support other modes
+    switch (mode) {
+      case CLUSTER_PAUSE:
+        enableClusterPauseMode(clusterName, request.isCancelPendingST(), reason);
+        break;
+      case NORMAL:
+        // If from other modes, should check what mode it is in and call the api accordingly.
+        // If we put all mode config in one znode, one generic method is good enough.
+        disableClusterPauseMode(clusterName);
+        break;
+      default:
+        throw new IllegalArgumentException("ClusterManagementMode " + mode + " is not supported");
+    }
+  }
+
+  private void enableClusterPauseMode(String clusterName, boolean cancelPendingST, String
reason) {
+    String hostname = NetworkUtil.getLocalhostName();
+    logger.info(
+        "Enable cluster pause mode for cluster: {}. CancelPendingST: {}. Reason: {}. From
Host: {}",
+        clusterName, cancelPendingST, reason, hostname);
+
+    BaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(_zkClient);
+    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, baseDataAccessor);
+
+    if (baseDataAccessor.exists(accessor.keyBuilder().pause().getPath(), AccessOption.PERSISTENT))
{
+      throw new HelixConflictException(clusterName + " pause signal already exists");
+    }
+    if (baseDataAccessor.exists(accessor.keyBuilder().maintenance().getPath(), AccessOption.PERSISTENT))
{
+      throw new HelixConflictException(clusterName + " maintenance signal already exists");
+    }
+
+    // check whether cancellation is enabled
+    ClusterConfig config = accessor.getProperty(accessor.keyBuilder().clusterConfig());
+    if (cancelPendingST && !config.isStateTransitionCancelEnabled()) {
+      throw new HelixConflictException(
+          "State transition cancellation not enabled in " + clusterName);
+    }
+
+    PauseSignal pauseSignal = new PauseSignal();
+    pauseSignal.setClusterPause(true);
+    pauseSignal.setCancelPendingST(cancelPendingST);
+    pauseSignal.setFromHost(hostname);
+    pauseSignal.setTriggerTime(Instant.now().toEpochMilli());
+    if (reason != null && !reason.isEmpty()) {
+      pauseSignal.setReason(reason);
+    }
+    // TODO: merge management status signal into one znode to avoid race condition
+    if (!accessor.createPause(pauseSignal)) {
+      throw new HelixException("Failed to create pause signal");
+    }
+  }
+
+  private void disableClusterPauseMode(String clusterName) {
+    logger.info("Disable cluster pause mode for cluster: {}", clusterName);
+    HelixDataAccessor accessor =
+        new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_zkClient));
+    PropertyKey pausePropertyKey = accessor.keyBuilder().pause();
+    PauseSignal pauseSignal = accessor.getProperty(pausePropertyKey);
+    if (pauseSignal == null || !pauseSignal.isClusterPause()) {
+      throw new HelixException("Cluster pause mode is not enabled for cluster " + clusterName);
+    }
+
+    if (!accessor.removeProperty(pausePropertyKey)) {
+      throw new HelixException("Failed to disable cluster pause mode for cluster: " + clusterName);
+    }
+  }
+
+  @Override
   @Deprecated
   public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) {
     manuallyEnableMaintenanceMode(clusterName, enabled, reason, null);
diff --git a/helix-core/src/main/java/org/apache/helix/model/LiveInstance.java b/helix-core/src/main/java/org/apache/helix/model/LiveInstance.java
index 0adccdd..7a9671d 100644
--- a/helix-core/src/main/java/org/apache/helix/model/LiveInstance.java
+++ b/helix-core/src/main/java/org/apache/helix/model/LiveInstance.java
@@ -40,7 +40,17 @@ public class LiveInstance extends HelixProperty {
     LIVE_INSTANCE,
     ZKPROPERTYTRANSFERURL,
     RESOURCE_CAPACITY,
-    CURRENT_TASK_THREAD_POOL_SIZE
+    CURRENT_TASK_THREAD_POOL_SIZE,
+
+    /** Represents the status of live instance, eg. PAUSED */
+    STATUS
+  }
+
+  /**
+   * Saved values for the {@link LiveInstanceProperty#STATUS} field
+   */
+  public enum LiveInstanceStatus {
+    PAUSED
   }
 
   /**
@@ -131,6 +141,22 @@ public class LiveInstance extends HelixProperty {
   }
 
   /**
+   * Gets the live instance's status. Returns null if the status field is not set.
+   */
+  public LiveInstanceStatus getStatus() {
+    return _record.getEnumField(LiveInstanceProperty.STATUS.name(), LiveInstanceStatus.class,
null);
+  }
+
+  /**
+   * Sets the status in simple field.
+   *
+   * @param status status value
+   */
+  public void setStatus(LiveInstanceStatus status) {
+    _record.setEnumField(LiveInstanceProperty.STATUS.name(), status);
+  }
+
+  /**
    * Get an identifier that represents the instance and where it is located
    * @return identifier, e.g. process_id@host
    */
diff --git a/helix-core/src/main/java/org/apache/helix/model/PauseSignal.java b/helix-core/src/main/java/org/apache/helix/model/PauseSignal.java
index e68345d..bcfd17d 100644
--- a/helix-core/src/main/java/org/apache/helix/model/PauseSignal.java
+++ b/helix-core/src/main/java/org/apache/helix/model/PauseSignal.java
@@ -19,6 +19,8 @@ package org.apache.helix.model;
  * under the License.
  */
 
+import java.time.Instant;
+
 import org.apache.helix.HelixProperty;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 
@@ -26,9 +28,18 @@ import org.apache.helix.zookeeper.datamodel.ZNRecord;
  * Represent a pause in the cluster
  */
 public class PauseSignal extends HelixProperty {
+  private static final String DEFAULT_PAUSE_ID = "pause";
 
   public enum PauseSignalProperty {
-    REASON
+    REASON,
+    CLUSTER_PAUSE,
+    FROM_HOST,
+    CANCEL_PENDING_ST,
+    TRIGGER_TIME
+  }
+
+  public PauseSignal() {
+    this(DEFAULT_PAUSE_ID);
   }
 
   /**
@@ -63,4 +74,33 @@ public class PauseSignal extends HelixProperty {
   public boolean isValid() {
     return true;
   }
+
+  public void setClusterPause(boolean pause) {
+    _record.setBooleanField(PauseSignalProperty.CLUSTER_PAUSE.name(), pause);
+  }
+
+  public boolean isClusterPause() {
+    return _record.getBooleanField(PauseSignalProperty.CLUSTER_PAUSE.name(), false);
+  }
+
+  public void setFromHost(String host) {
+    _record.setSimpleField(PauseSignalProperty.FROM_HOST.name(), host);
+  }
+
+  public String getFromHost() {
+    return _record.getSimpleField(PauseSignalProperty.FROM_HOST.name());
+  }
+
+  public void setCancelPendingST(boolean cancel) {
+    _record.setBooleanField(PauseSignalProperty.CANCEL_PENDING_ST.name(), cancel);
+  }
+
+  public boolean getCancelPendingST() {
+    return _record.getBooleanField(PauseSignalProperty.CANCEL_PENDING_ST.name(), false);
+  }
+
+  public void setTriggerTime(long time) {
+    _record.setSimpleField(PauseSignalProperty.TRIGGER_TIME.name(),
+        Instant.ofEpochMilli(time).toString());
+  }
 }
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
index ce6ea9d..7ddf677 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java
@@ -45,10 +45,13 @@ import org.apache.helix.PropertyPathBuilder;
 import org.apache.helix.PropertyType;
 import org.apache.helix.TestHelper;
 import org.apache.helix.ZkUnitTestBase;
+import org.apache.helix.api.exceptions.HelixConflictException;
+import org.apache.helix.api.status.ClusterManagementMode;
 import org.apache.helix.api.topology.ClusterTopology;
 import org.apache.helix.cloud.constants.CloudProvider;
 import org.apache.helix.controller.rebalancer.waged.WagedRebalancer;
 import org.apache.helix.examples.MasterSlaveStateModelFactory;
+import org.apache.helix.integration.manager.ClusterControllerManager;
 import org.apache.helix.integration.manager.MockParticipantManager;
 import org.apache.helix.model.CloudConfig;
 import org.apache.helix.model.ClusterConfig;
@@ -65,15 +68,18 @@ import org.apache.helix.model.IdealState;
 import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.LiveInstance;
 import org.apache.helix.model.MasterSlaveSMD;
+import org.apache.helix.model.PauseSignal;
 import org.apache.helix.model.ResourceConfig;
 import org.apache.helix.model.StateModelDefinition;
 import org.apache.helix.model.builder.ConstraintItemBuilder;
 import org.apache.helix.model.builder.HelixConfigScopeBuilder;
+import org.apache.helix.api.status.ClusterManagementModeRequest;
 import org.apache.helix.participant.StateMachineEngine;
 import org.apache.helix.tools.StateModelConfigGenerator;
 import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 import org.apache.helix.zookeeper.exception.ZkClientException;
+import org.apache.helix.zookeeper.zkclient.NetworkUtil;
 import org.apache.helix.zookeeper.zkclient.exception.ZkException;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
@@ -1060,4 +1066,68 @@ public class TestZkHelixAdmin extends ZkUnitTestBase {
     tool.dropCluster(clusterName);
     System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
   }
+
+  /*
+   * Tests 2 APIs: enable and disable cluster pause mode.
+   */
+  @Test
+  public void testEnableDisableClusterPauseMode() {
+    String className = TestHelper.getTestClassName();
+    String methodName = TestHelper.getTestMethodName();
+    String clusterName = className + "_" + methodName;
+
+    _gSetupTool.setupTestCluster(clusterName);
+    ClusterControllerManager controller =
+        new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
+    controller.syncStart();
+    _gSetupTool.activateCluster(clusterName, controller.getClusterName(), true);
+
+    try {
+      // Should not create pause with pending cancel ST enabled because cancellation is not
enabled
+      try {
+        ClusterManagementModeRequest request = ClusterManagementModeRequest.newBuilder()
+            .withClusterName(clusterName)
+            .withMode(ClusterManagementMode.Type.CLUSTER_PAUSE)
+            .withCancelPendingST(true)
+            .withReason(methodName)
+            .build();
+        _gSetupTool.getClusterManagementTool().setClusterManagementMode(request);
+        Assert.fail("Should not create pause with pending cancel ST enabled because "
+            + "cancellation is not enabled");
+      } catch (HelixConflictException e) {
+        Assert.assertTrue(e.getMessage().startsWith("State transition cancellation not enabled"));
+      }
+
+      ClusterManagementModeRequest request = ClusterManagementModeRequest.newBuilder()
+          .withClusterName(clusterName)
+          .withMode(ClusterManagementMode.Type.CLUSTER_PAUSE)
+          .withReason(methodName)
+          .build();
+      _gSetupTool.getClusterManagementTool().setClusterManagementMode(request);
+      HelixDataAccessor dataAccessor = new ZKHelixDataAccessor(clusterName, _baseAccessor);
+      PauseSignal pauseSignal = dataAccessor.getProperty(dataAccessor.keyBuilder().pause());
+
+      // Verify pause signal is correctly written
+      Assert.assertNotNull(pauseSignal);
+      Assert.assertTrue(pauseSignal.isClusterPause());
+      Assert.assertFalse(pauseSignal.getCancelPendingST());
+      Assert.assertEquals(pauseSignal.getFromHost(), NetworkUtil.getLocalhostName());
+      Assert.assertEquals(pauseSignal.getReason(), methodName);
+
+      // Disable pause mode
+      request = ClusterManagementModeRequest.newBuilder()
+          .withClusterName(clusterName)
+          .withMode(ClusterManagementMode.Type.NORMAL)
+          .build();
+      _gSetupTool.getClusterManagementTool().setClusterManagementMode(request);
+      pauseSignal = dataAccessor.getProperty(dataAccessor.keyBuilder().pause());
+
+      // Verify pause signal has been deleted.
+      Assert.assertNull(pauseSignal);
+    } finally {
+      _gSetupTool.activateCluster(clusterName, controller.getClusterName(), false);
+      controller.syncStop();
+      _gSetupTool.deleteCluster(clusterName);
+    }
+  }
 }
diff --git a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
index 42633c1..2bacc74 100644
--- a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
+++ b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java
@@ -44,6 +44,7 @@ import org.apache.helix.model.InstanceConfig;
 import org.apache.helix.model.MaintenanceSignal;
 import org.apache.helix.model.ResourceConfig;
 import org.apache.helix.model.StateModelDefinition;
+import org.apache.helix.api.status.ClusterManagementModeRequest;
 import org.apache.helix.zookeeper.datamodel.ZNRecord;
 
 public class MockHelixAdmin implements HelixAdmin {
@@ -325,6 +326,11 @@ public class MockHelixAdmin implements HelixAdmin {
     return false;
   }
 
+  @Override
+  public void setClusterManagementMode(ClusterManagementModeRequest request) {
+
+  }
+
   @Override public void resetPartition(String clusterName, String instanceName, String resourceName,
       List<String> partitionNames) {
 
diff --git a/helix-core/src/test/java/org/apache/helix/model/TestLiveInstance.java b/helix-core/src/test/java/org/apache/helix/model/TestLiveInstance.java
index 39f0a57..18285f5 100644
--- a/helix-core/src/test/java/org/apache/helix/model/TestLiveInstance.java
+++ b/helix-core/src/test/java/org/apache/helix/model/TestLiveInstance.java
@@ -146,4 +146,12 @@ public class TestLiveInstance extends ZkUnitTestBase {
 
     Assert.assertEquals(testLiveInstance.getCurrentTaskThreadPoolSize(), 100);
   }
+
+  @Test
+  public void testLiveInstanceStatus() {
+    LiveInstance testLiveInstance = new LiveInstance("testLiveInstanceStatus");
+    Assert.assertNull(testLiveInstance.getStatus());
+    testLiveInstance.setStatus(LiveInstance.LiveInstanceStatus.PAUSED);
+    Assert.assertEquals(testLiveInstance.getStatus(), LiveInstance.LiveInstanceStatus.PAUSED);
+  }
 }

Mime
View raw message