avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1556069 - in /avro/trunk: CHANGES.txt doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java doc/src/content/xdocs/mr.xml
Date Mon, 06 Jan 2014 23:33:38 GMT
Author: cutting
Date: Mon Jan  6 23:33:37 2014
New Revision: 1556069

URL: http://svn.apache.org/r1556069
Log:
AVRO-1426. Java: Add mapreduce word count example.  Contributed by Jesse Anderson.

Added:
    avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java 
 (with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/doc/src/content/xdocs/mr.xml

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1556069&r1=1556068&r2=1556069&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Mon Jan  6 23:33:37 2014
@@ -67,6 +67,9 @@ Trunk (not yet released)
 
     AVRO-1225. Java: Add guide for MapReduce API. (Brock Noland via cutting)
 
+    AVRO-1426. Java: Add mapreduce word count example.
+    (Jesse Anderson via cutting)
+
   BUG FIXES
 
     AVRO-1368. Fix SpecificDatumWriter to, when writing a string

Added: avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java
URL: http://svn.apache.org/viewvc/avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java?rev=1556069&view=auto
==============================================================================
--- avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java (added)
+++ avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java Mon
Jan  6 23:33:37 2014
@@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package example;
+
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.avro.mapred.Pair;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * The classic WordCount example modified to output Avro Pair<CharSequence,
+ * Integer> records instead of text.
+ */
+public class MapReduceAvroWordCount extends Configured implements Tool {
+
+  public static class Map
+    extends Mapper<LongWritable, Text, Text, IntWritable> {
+
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+
+    public void map(LongWritable key, Text value, Context context)
+      throws IOException, InterruptedException {
+      String line = value.toString();
+      StringTokenizer tokenizer = new StringTokenizer(line);
+      while (tokenizer.hasMoreTokens()) {
+        word.set(tokenizer.nextToken());
+        context.write(word, one);
+      }
+    }
+  }
+
+  public static class Reduce
+    extends Reducer<Text, IntWritable,
+            AvroWrapper<Pair<CharSequence, Integer>>, NullWritable> {
+
+    public void reduce(Text key, Iterable<IntWritable> values,
+                       Context context)
+      throws IOException, InterruptedException {
+      int sum = 0;
+      for (IntWritable value : values) {
+        sum += value.get();
+      }
+      context.write(new AvroWrapper<Pair<CharSequence, Integer>>
+                    (new Pair<CharSequence, Integer>(key.toString(), sum)),
+                    NullWritable.get());
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    if (args.length != 2) {
+      System.err.println("Usage: AvroWordCount <input path> <output path>");
+      return -1;
+    }
+
+    Job job = new Job(getConf());
+    job.setJarByClass(MapReduceAvroWordCount.class);
+    job.setJobName("wordcount");
+
+    // We call setOutputSchema first so we can override the configuration
+    // parameters it sets
+    AvroJob.setOutputKeySchema(job,
+                               Pair.getPairSchema(Schema.create(Type.STRING),
+                                                  Schema.create(Type.INT)));
+    job.setOutputValueClass(NullWritable.class);
+
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+
+    job.setInputFormatClass(TextInputFormat.class);
+
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(IntWritable.class);
+    job.setSortComparatorClass(Text.Comparator.class);
+
+    FileInputFormat.setInputPaths(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+    job.waitForCompletion(true);
+    
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res =
+      ToolRunner.run(new Configuration(), new MapReduceAvroWordCount(), args);
+    System.exit(res);
+  }
+}

Propchange: avro/trunk/doc/examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: avro/trunk/doc/src/content/xdocs/mr.xml
URL: http://svn.apache.org/viewvc/avro/trunk/doc/src/content/xdocs/mr.xml?rev=1556069&r1=1556068&r2=1556069&view=diff
==============================================================================
--- avro/trunk/doc/src/content/xdocs/mr.xml (original)
+++ avro/trunk/doc/src/content/xdocs/mr.xml Mon Jan  6 23:33:37 2014
@@ -39,7 +39,8 @@
       See the <a href="http://hadoop.apache.org/docs/current/">Hadoop
       documentation</a> and the <a href="gettingstartedjava.html">Avro getting
       started guide</a> for introductions to these projects.  This guide uses
-      the old MapReduce API (<code>org.apache.hadoop.mapred</code>).
+      the old MapReduce API (<code>org.apache.hadoop.mapred</code>) and the new

+      MapReduce API (<code>org.apache.hadoop.mapreduce</code>).
     </p>
     <section>
       <title>Setup</title>
@@ -289,7 +290,10 @@ public class MapReduceColorCount extends
         ColorCount reads in data files containing <code>User</code> records,
         defined in <em>examples/user.avsc</em>, and counts the number of
         instances of each favorite color.  (This example draws inspiration from
-        the canonical WordCount MapReduce application.)  The <code>User</code>
+        the canonical WordCount MapReduce application.)  This example uses the 
+        old MapReduce API.  See MapReduceAvroWordCount, found under 
+        <em>doc/examples/mr-example/src/main/java/example/</em> to see the new
MapReduce 
+        API example.  The <code>User</code>
         schema is defined as follows:
       </p>
       <source>
@@ -547,7 +551,7 @@ AvroJob.setOutputSchema(conf, Pair.getPa
       </p>
 
       <p>
-        The mapred package has api <a
+        The mapred package has API <a
         href="http://avro.apache.org/docs/current/api/java/org/apache/avro/mapred/package-summary.html">
         <code>org.apache.avro.mapred</code> documentation</a> as does the
<a
         href="http://avro.apache.org/docs/current/api/java/org/apache/avro/mapreduce/package-summary.html">
@@ -558,7 +562,11 @@ AvroJob.setOutputSchema(conf, Pair.getPa
         these libraries.  See the AvroWordCount application, found under
         <em>examples/mr-example/src/main/java/example/AvroWordCount.java</em>
in
         the Avro documentation, for an example of implementing a
-        <code>Reducer</code> that outputs Avro data.
+        <code>Reducer</code> that outputs Avro data using the old MapReduce API.
+        See the MapReduceAvroWordCount application, found under
+        <em>examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java</em>
in
+        the Avro documentation, for an example of implementing a
+        <code>Reducer</code> that outputs Avro data using the new MapReduce API.
       </p>
     </section>
   </body>



Mime
View raw message