sqoop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From venkatran...@apache.org
Subject sqoop git commit: SQOOP-2055: Run only one map task attempt during export
Date Thu, 29 Jan 2015 06:45:21 GMT
Repository: sqoop
Updated Branches:
  refs/heads/trunk 6e555218f -> 420fc3d53


SQOOP-2055:  Run only one map task attempt during export

(Jarek Jarcec Cecho via Venkat Ranganathan)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/420fc3d5
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/420fc3d5
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/420fc3d5

Branch: refs/heads/trunk
Commit: 420fc3d53f1db62710710b93b9801cff5e4d1b53
Parents: 6e55521
Author: Venkat Ranganathan <venkat@hortonworks.com>
Authored: Wed Jan 28 22:45:05 2015 -0800
Committer: Venkat Ranganathan <venkat@hortonworks.com>
Committed: Wed Jan 28 22:45:05 2015 -0800

----------------------------------------------------------------------
 .../apache/sqoop/mapreduce/ExportJobBase.java   | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/420fc3d5/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java b/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
index cb846e8..f9fa7f3 100644
--- a/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
+++ b/src/java/org/apache/sqoop/mapreduce/ExportJobBase.java
@@ -82,6 +82,18 @@ public class ExportJobBase extends JobBase {
   public static final String EXPORT_MAP_TASKS_KEY =
       "sqoop.mapreduce.export.map.tasks";
 
+  /**
+   *  Maximal number of attempts for map task during export
+   *
+   *  Sqoop will default to "1" if this property is not set regardless of what is configured
directly
+   *  in your hadoop configuration.
+   */
+  public static final String SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS =
+    "sqoop.export.mapred.map.max.attempts";
+
+  private static final String HADOOP_MAP_TASK_MAX_ATTEMTPS =
+    "mapred.map.max.attempts";
+
   protected ExportJobContext context;
 
 
@@ -510,4 +522,25 @@ public class ExportJobBase extends JobBase {
    */
   protected void jobTeardown(Job job) throws IOException, ExportException {
   }
+
+  @Override
+  protected void propagateOptionsToJob(Job job) {
+    super.propagateOptionsToJob(job);
+    Configuration conf = job.getConfiguration();
+
+    // This is export job where re-trying failed mapper mostly don't make sense. By
+    // default we will force MR to run only one attempt per mapper. User or connector
+    // developer can override this behavior by setting SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS:
+    //
+    // * Positive number - we will allow specified number of attempts
+    // * Negative number - we will default to Hadoop's default number of attempts
+    //
+    // This is important for most connectors as they are directly committing data to
+    // final table and hence re-running one mapper will lead to a misleading errors
+    // of inserting duplicate rows.
+    int sqoopMaxAttempts = conf.getInt(SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS, 1);
+    if (sqoopMaxAttempts > 1) {
+      conf.setInt(HADOOP_MAP_TASK_MAX_ATTEMTPS, sqoopMaxAttempts);
+    }
+  }
 }


Mime
View raw message