storm-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kabh...@apache.org
Subject [2/6] storm git commit: STORM-2175: Retry shutdown longer and timeout if too long.
Date Thu, 03 Nov 2016 00:21:08 GMT
STORM-2175: Retry shutdown longer and timeout if too long.


Project: http://git-wip-us.apache.org/repos/asf/storm/repo
Commit: http://git-wip-us.apache.org/repos/asf/storm/commit/9747e5f2
Tree: http://git-wip-us.apache.org/repos/asf/storm/tree/9747e5f2
Diff: http://git-wip-us.apache.org/repos/asf/storm/diff/9747e5f2

Branch: refs/heads/master
Commit: 9747e5f2f8072ba82b27c9fb96993270c114e034
Parents: 94d61c7
Author: Robert (Bobby) Evans <evans@yahoo-inc.com>
Authored: Mon Oct 31 11:13:25 2016 -0500
Committer: Robert (Bobby) Evans <evans@yahoo-inc.com>
Committed: Mon Oct 31 11:13:25 2016 -0500

----------------------------------------------------------------------
 .../daemon/supervisor/ReadClusterState.java     | 32 ++++++++++++++------
 1 file changed, 22 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/storm/blob/9747e5f2/storm-core/src/jvm/org/apache/storm/daemon/supervisor/ReadClusterState.java
----------------------------------------------------------------------
diff --git a/storm-core/src/jvm/org/apache/storm/daemon/supervisor/ReadClusterState.java b/storm-core/src/jvm/org/apache/storm/daemon/supervisor/ReadClusterState.java
index 27f18b6..981d619 100644
--- a/storm-core/src/jvm/org/apache/storm/daemon/supervisor/ReadClusterState.java
+++ b/storm-core/src/jvm/org/apache/storm/daemon/supervisor/ReadClusterState.java
@@ -286,30 +286,42 @@ public class ReadClusterState implements Runnable, AutoCloseable {
         return portTasks;
     }
 
+    private static final long WARN_MILLIS = 1_000; //Warn about a shutdown that takes longer
than 1 second (default timeout)
+    private static final long ERROR_MILLIS = 10_000; //Throw an exception if after 10 seconds.
+    
     public synchronized void shutdownAllWorkers() {
         for (Slot slot: slots.values()) {
             slot.setNewAssignment(null);
         }
-
+        
+        long startTime = Time.currentTimeMillis();
+        Exception exp = null;
         for (Slot slot: slots.values()) {
             try {
-                int count = 0;
                 while (slot.getMachineState() != MachineState.EMPTY) {
-                    if (count > 10) {
-                        LOG.warn("DONE waiting for {} to finish {}", slot, slot.getMachineState());
-                        break;
+                    long timeSpentMillis = Time.currentTimeMillis() - startTime;
+                    if (timeSpentMillis > ERROR_MILLIS) {
+                        throw new IllegalStateException("It took over " + timeSpentMillis
+ "ms to shut down slot " + slot);
+                    }
+                    
+                    if (timeSpentMillis > WARN_MILLIS) {
+                        LOG.warn("It has taken {}ms so far and {} is still not shut down.",
timeSpentMillis, slot);
                     }
                     if (Time.isSimulating()) {
-                        Time.advanceTime(1000);
-                        Thread.sleep(100);
-                    } else {
-                        Time.sleep(100);
+                        Time.advanceTime(100);
                     }
-                    count++;
+                    Thread.sleep(100);
                 }
             } catch (Exception e) {
                 LOG.error("Error trying to shutdown workers in {}", slot, e);
+                exp = e;
+            }
+        }
+        if (exp != null) {
+            if (exp instanceof RuntimeException) {
+                throw (RuntimeException)exp;
             }
+            throw new RuntimeException(exp);
         }
     }
     


Mime
View raw message