flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tillrohrmann <...@git.apache.org>
Subject [GitHub] flink pull request #2410: [FLINK-4449] [cluster management] Heartbeat Manage...
Date Wed, 24 Aug 2016 13:04:19 GMT
Github user tillrohrmann commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2410#discussion_r76051833
  
    --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/rpc/resourcemanager/ResourceManagerToTaskExecutorHeartbeatManager.java
---
    @@ -0,0 +1,177 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.runtime.rpc.resourcemanager;
    +
    +import org.apache.flink.runtime.clusterframework.types.ResourceID;
    +import org.apache.flink.runtime.rpc.heartbeat.HeartbeatScheduler;
    +import org.apache.flink.runtime.rpc.taskexecutor.SlotReport;
    +import org.apache.flink.runtime.rpc.taskexecutor.TaskExecutorGateway;
    +import org.slf4j.Logger;
    +import scala.concurrent.Future;
    +import scala.concurrent.duration.FiniteDuration;
    +
    +import java.util.HashMap;
    +import java.util.Map;
    +import java.util.UUID;
    +
    +import static org.apache.flink.util.Preconditions.checkNotNull;
    +
    +/**
    + * ResourceManagerToTaskExecutorHeartbeatManager is responsible for trigger heartbeat
between resourceManager and all registered taskExecutors,
    + * notify ResourceManager about failure of taskManager instance which lost heartbeat
    + */
    +public class ResourceManagerToTaskExecutorHeartbeatManager {
    +	/** active heartbeat schedulers between resourceManager and TaskExecutors */
    +	private final Map<ResourceID, ResourceManagerToTaskExecutorHeartbeatScheduler>
activeHeartbeatSchedulers;
    +
    +	/** resourceManager which send heartbeat */
    +	private final ResourceManager resourceManager;
    +
    +	/** leader session id of current resourceManager */
    +	private final UUID leaderID;
    +	private final Logger log;
    +
    +	/**
    +	 * @param resourceManager                resourceManager which handles heartbeat communication
with taskManager
    +	 * @param resourceManagerLeaderSessionID leader session id of current resourceManager
    +	 * @param log
    +	 */
    +	public ResourceManagerToTaskExecutorHeartbeatManager(ResourceManager resourceManager,
    +		UUID resourceManagerLeaderSessionID, Logger log) {
    +		this.resourceManager = resourceManager;
    +		this.leaderID = resourceManagerLeaderSessionID;
    +		this.log = log;
    +		this.activeHeartbeatSchedulers = new HashMap<>();
    +	}
    +
    +	/**
    +	 * register heartbeat target
    +	 * @param resourceID target taskExecutor resourceID
    +	 * @param targetTarget target taskExecutor gateway
    +	 * @param targetAddress target taskExecutor address
    +	 * @return heartbeat interval in millisecond
    +	 */
    +	public long registerTarget(ResourceID resourceID, TaskExecutorGateway targetTarget,
String targetAddress) {
    +		if (activeHeartbeatSchedulers.containsKey(resourceID)) {
    +			log.warn("Ignore taskExecutor registration with resource id {} from {} because it
is already registered,", resourceID, targetAddress);
    +			return -1;
    +		} else {
    +			ResourceManagerToTaskExecutorHeartbeatScheduler heartbeatScheduler = new ResourceManagerToTaskExecutorHeartbeatScheduler(
    +				resourceManager, leaderID, targetTarget, targetAddress, resourceID, log);
    +			heartbeatScheduler.start();
    +			activeHeartbeatSchedulers.put(resourceID, heartbeatScheduler);
    +			return heartbeatScheduler.getHeartbeatInterval();
    +		}
    +	}
    +
    +	/**
    +	 * stop and clean all active heartbeat scheduler
    +	 */
    +	public void stopHeartbeatToAllTaskExecutor() {
    +		for (ResourceManagerToTaskExecutorHeartbeatScheduler heartbeatScheduler : activeHeartbeatSchedulers.values())
{
    +			heartbeatScheduler.close();
    +		}
    +		activeHeartbeatSchedulers.clear();
    +	}
    +
    +	/**
    +	 * stop and clean heartbeat scheduler to the specified taskExecutor, usually happens
when taskExecutor marked failed
    +	 * @param resourceID taskExecutor's resourceID which to stop heartbeat
    +	 */
    +	public void stopHeartbeatToTaskExecutor(ResourceID resourceID) {
    +		if (activeHeartbeatSchedulers.containsKey(resourceID)) {
    +			ResourceManagerToTaskExecutorHeartbeatScheduler heartbeatManager = activeHeartbeatSchedulers.get(resourceID);
    +			heartbeatManager.close();
    +			activeHeartbeatSchedulers.remove(resourceID);
    +		}
    +	}
    +
    +	/**
    +	 * Heartbeat scheduler from ResourceManager to TaskExecutor
    +	 */
    +	class ResourceManagerToTaskExecutorHeartbeatScheduler extends HeartbeatScheduler<TaskExecutorGateway,
SlotReport> {
    +
    +		/** identify the taskManager resourceID */
    +		private final ResourceID resourceID;
    +
    +		/** identify the resourceManager rpc endpoint */
    +		private final ResourceManager resourceManager;
    +
    +
    +		/**
    +		 * @param resourceManager                resourceManager which handles heartbeat communication
with taskManager
    +		 * @param resourceManagerLeaderSessionID leader session id of current resourceManager
    +		 * @param taskExecutorGateway            taskManager which receives heartbeat from
resourceManager and report its slot
    +		 *                                       allocation to resourceManager
    +		 * @param taskExecutorAddress            taskManager's address
    +		 * @param taskExecutorResourceID         taskManager's resourceID
    +		 * @param log                            log
    +		 */
    +		public ResourceManagerToTaskExecutorHeartbeatScheduler(
    +			ResourceManager resourceManager, UUID resourceManagerLeaderSessionID,
    +			TaskExecutorGateway taskExecutorGateway,
    +			String taskExecutorAddress, ResourceID taskExecutorResourceID, Logger log) {
    +			super(resourceManager.getRpcService(), resourceManagerLeaderSessionID, taskExecutorGateway,
taskExecutorAddress,
    +				"taskExecutor " + taskExecutorResourceID.toString(), log);
    +			this.resourceManager = resourceManager;
    --- End diff --
    
    `checkNotNull`


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message