Author: tucu
Date: Thu Jan 16 17:12:31 2014
New Revision: 1558853
URL: http://svn.apache.org/r1558853
Log:
MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. (tucu)
Added:
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java
- copied unchanged from r1558852, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java
Modified:
hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml
hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1558853&r1=1558852&r2=1558853&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Thu Jan 16 17:12:31
2014
@@ -126,6 +126,9 @@ Release 2.4.0 - UNRELEASED
MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps
cannot be fulfilled. (lohit via kasha)
+ MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running.
+ (tucu)
+
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java?rev=1558853&r1=1558852&r2=1558853&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
Thu Jan 16 17:12:31 2014
@@ -78,6 +78,13 @@ public class JHAdminConfig {
MR_HISTORY_PREFIX + "done-dir";
/**
+ * Maximum time the History server will wait for the FileSystem for History
+ * files to become available. Default value is -1, forever.
+ */
+ public static final String MR_HISTORY_MAX_START_WAIT_TIME =
+ MR_HISTORY_PREFIX + "maximum-start-wait-time-millis";
+ public static final long DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME = -1;
+ /**
* Path where history files should be stored after a job finished and before
* they are pulled into the job history server.
**/
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml?rev=1558853&r1=1558852&r2=1558853&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml
Thu Jan 16 17:12:31 2014
@@ -35,6 +35,10 @@
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
</dependency>
<dependency>
@@ -53,6 +57,12 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java?rev=1558853&r1=1558852&r2=1558853&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
Thu Jan 16 17:12:31 2014
@@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.v2.h
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -69,6 +70,8 @@ import org.apache.hadoop.yarn.exceptions
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
/**
* This class provides a way to interact with history files in a thread safe
@@ -464,7 +467,8 @@ public class HistoryFileManager extends
private JobACLsManager aclsMgr;
- private Configuration conf;
+ @VisibleForTesting
+ Configuration conf;
private String serialNumberFormat;
@@ -491,36 +495,10 @@ public class HistoryFileManager extends
+ (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS + serialNumberLowDigits)
+ "d");
- String doneDirPrefix = null;
- doneDirPrefix = JobHistoryUtils
- .getConfiguredHistoryServerDoneDirPrefix(conf);
- try {
- doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(
- new Path(doneDirPrefix));
- doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf);
- doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK);
- mkdir(doneDirFc, doneDirPrefixPath, new FsPermission(
- JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION));
- } catch (IOException e) {
- throw new YarnRuntimeException("Error creating done directory: ["
- + doneDirPrefixPath + "]", e);
- }
-
- String intermediateDoneDirPrefix = null;
- intermediateDoneDirPrefix = JobHistoryUtils
- .getConfiguredHistoryIntermediateDoneDirPrefix(conf);
- try {
- intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified(
- new Path(intermediateDoneDirPrefix));
- intermediateDoneDirFc = FileContext.getFileContext(
- intermediateDoneDirPath.toUri(), conf);
- mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission(
- JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort()));
- } catch (IOException e) {
- LOG.info("error creating done directory on dfs " + e);
- throw new YarnRuntimeException("Error creating intermediate done directory: ["
- + intermediateDoneDirPath + "]", e);
- }
+ long maxFSWaitTime = conf.getLong(
+ JHAdminConfig.MR_HISTORY_MAX_START_WAIT_TIME,
+ JHAdminConfig.DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME);
+ createHistoryDirs(new SystemClock(), 10 * 1000, maxFSWaitTime);
this.aclsMgr = new JobACLsManager(conf);
@@ -544,6 +522,107 @@ public class HistoryFileManager extends
super.serviceInit(conf);
}
+ @VisibleForTesting
+ void createHistoryDirs(Clock clock, long intervalCheckMillis,
+ long timeOutMillis) throws IOException {
+ long start = clock.getTime();
+ boolean done = false;
+ int counter = 0;
+ while (!done &&
+ ((timeOutMillis == -1) || (clock.getTime() - start < timeOutMillis))) {
+ done = tryCreatingHistoryDirs(counter++ % 3 == 0); // log every 3 attempts, 30sec
+ try {
+ Thread.sleep(intervalCheckMillis);
+ } catch (InterruptedException ex) {
+ throw new YarnRuntimeException(ex);
+ }
+ }
+ if (!done) {
+ throw new YarnRuntimeException("Timed out '" + timeOutMillis+
+ "ms' waiting for FileSystem to become available");
+ }
+ }
+
+ /**
+ * DistributedFileSystem returns a RemoteException with a message stating
+ * SafeModeException in it. So this is only way to check it is because of
+ * being in safe mode.
+ */
+ private boolean isBecauseSafeMode(Throwable ex) {
+ return ex.toString().contains("SafeModeException");
+ }
+
+ /**
+ * Returns TRUE if the history dirs were created, FALSE if they could not
+ * be created because the FileSystem is not reachable or in safe mode and
+ * throws and exception otherwise.
+ */
+ @VisibleForTesting
+ boolean tryCreatingHistoryDirs(boolean logWait) throws IOException {
+ boolean succeeded = true;
+ String doneDirPrefix = JobHistoryUtils.
+ getConfiguredHistoryServerDoneDirPrefix(conf);
+ try {
+ doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified(
+ new Path(doneDirPrefix));
+ doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf);
+ doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK);
+ mkdir(doneDirFc, doneDirPrefixPath, new FsPermission(
+ JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION));
+ } catch (ConnectException ex) {
+ if (logWait) {
+ LOG.info("Waiting for FileSystem at " +
+ doneDirPrefixPath.toUri().getAuthority() + "to be available");
+ }
+ succeeded = false;
+ } catch (IOException e) {
+ if (isBecauseSafeMode(e)) {
+ succeeded = false;
+ if (logWait) {
+ LOG.info("Waiting for FileSystem at " +
+ doneDirPrefixPath.toUri().getAuthority() +
+ "to be out of safe mode");
+ }
+ } else {
+ throw new YarnRuntimeException("Error creating done directory: ["
+ + doneDirPrefixPath + "]", e);
+ }
+ }
+ if (succeeded) {
+ String intermediateDoneDirPrefix = JobHistoryUtils.
+ getConfiguredHistoryIntermediateDoneDirPrefix(conf);
+ try {
+ intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified(
+ new Path(intermediateDoneDirPrefix));
+ intermediateDoneDirFc = FileContext.getFileContext(
+ intermediateDoneDirPath.toUri(), conf);
+ mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission(
+ JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort()));
+ } catch (ConnectException ex) {
+ succeeded = false;
+ if (logWait) {
+ LOG.info("Waiting for FileSystem at " +
+ intermediateDoneDirPath.toUri().getAuthority() +
+ "to be available");
+ }
+ } catch (IOException e) {
+ if (isBecauseSafeMode(e)) {
+ succeeded = false;
+ if (logWait) {
+ LOG.info("Waiting for FileSystem at " +
+ intermediateDoneDirPath.toUri().getAuthority() +
+ "to be out of safe mode");
+ }
+ } else {
+ throw new YarnRuntimeException(
+ "Error creating intermediate done directory: ["
+ + intermediateDoneDirPath + "]", e);
+ }
+ }
+ }
+ return succeeded;
+ }
+
@Override
public void serviceStop() throws Exception {
ShutdownThreadsHelper.shutdownExecutorService(moveToDoneExecutor);
|