avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1383626 [2/3] - in /avro/trunk: ./ doc/src/content/xdocs/ lang/java/ lang/java/tools/ lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/trevni/ lang/java/trevni/avro/ lang/java/trevni/avro/src/ lang/java/trevni/avro/src/main/ l...
Date Tue, 11 Sep 2012 21:35:59 GMT
Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni.avro;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.trevni.ValueType;
+import org.apache.trevni.ColumnMetaData;
+import org.apache.trevni.ColumnFileMetaData;
+
+import org.apache.avro.Schema;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestShredder {
+
+  private static final int COUNT = 100;
+  private static final File FILE = new File("target", "test.trv");
+
+  @Test public void testPrimitives() throws Exception {
+    check(Schema.create(Schema.Type.NULL),
+          new ColumnMetaData("null", ValueType.NULL));
+
+    check(Schema.create(Schema.Type.INT),
+          new ColumnMetaData("int", ValueType.INT));
+    check(Schema.create(Schema.Type.LONG),
+          new ColumnMetaData("long", ValueType.LONG));
+
+    check(Schema.create(Schema.Type.FLOAT),
+          new ColumnMetaData("float", ValueType.FLOAT));
+    check(Schema.create(Schema.Type.DOUBLE),
+          new ColumnMetaData("double", ValueType.DOUBLE));
+
+    check(Schema.create(Schema.Type.BYTES),
+          new ColumnMetaData("bytes", ValueType.BYTES));
+    check(Schema.create(Schema.Type.STRING),
+          new ColumnMetaData("string", ValueType.STRING));
+
+    check(Schema.createEnum("E", null, null, Arrays.asList("X","Y","Z")),
+          new ColumnMetaData("E", ValueType.INT));
+    check(Schema.createFixed("F", null, null, 5),
+          new ColumnMetaData("F", ValueType.BYTES));
+  }
+
+  private static final String SIMPLE_RECORD =
+    "{\"type\":\"record\",\"name\":\"R\",\"fields\":["
+    +"{\"name\":\"x\",\"type\":\"int\"},"
+    +"{\"name\":\"y\",\"type\":\"string\"}"
+    +"]}";
+
+  @Test public void testSimpleRecord() throws Exception {
+    check(Schema.parse(SIMPLE_RECORD),
+          new ColumnMetaData("x", ValueType.INT),
+          new ColumnMetaData("y", ValueType.STRING));
+  }
+
+  @Test public void testNestedRecord() throws Exception {
+    String s = 
+      "{\"type\":\"record\",\"name\":\"S\",\"fields\":["
+      +"{\"name\":\"x\",\"type\":\"int\"},"
+      +"{\"name\":\"R\",\"type\":"+SIMPLE_RECORD+"},"
+      +"{\"name\":\"y\",\"type\":\"string\"}"
+      +"]}";
+    check(Schema.parse(s),
+          new ColumnMetaData("x", ValueType.INT),
+          new ColumnMetaData("R#x", ValueType.INT),
+          new ColumnMetaData("R#y", ValueType.STRING),
+          new ColumnMetaData("y", ValueType.STRING));
+  }
+
+  @Test public void testSimpleArray() throws Exception {
+    String s = "{\"type\":\"array\",\"items\":\"long\"}";
+    check(Schema.parse(s),
+          new ColumnMetaData("[]", ValueType.LONG).isArray(true));
+  }
+
+  private static final String RECORD_ARRAY = 
+    "{\"type\":\"array\",\"items\":"+SIMPLE_RECORD+"}";
+
+  @Test public void testArray() throws Exception {
+    ColumnMetaData p = new ColumnMetaData("[]", ValueType.NULL).isArray(true);
+    check(Schema.parse(RECORD_ARRAY),
+          p,
+          new ColumnMetaData("[]#x", ValueType.INT).setParent(p),
+          new ColumnMetaData("[]#y", ValueType.STRING).setParent(p));
+  }
+
+  @Test public void testSimpleUnion() throws Exception {
+    String s = "[\"int\",\"string\"]";
+    check(Schema.parse(s),
+          new ColumnMetaData("int", ValueType.INT).isArray(true),
+          new ColumnMetaData("string", ValueType.STRING).isArray(true));
+  }
+
+  @Test public void testSimpleOptional() throws Exception {
+    String s = "[\"null\",\"string\"]";
+    check(Schema.parse(s),
+          new ColumnMetaData("string", ValueType.STRING).isArray(true));
+  }
+
+  private static final String UNION = "[\"null\",\"int\","+SIMPLE_RECORD+"]";
+
+  @Test public void testUnion() throws Exception {
+    ColumnMetaData p = new ColumnMetaData("R", ValueType.NULL).isArray(true);
+    check(Schema.parse(UNION),
+          new ColumnMetaData("int", ValueType.INT).isArray(true),
+          p,
+          new ColumnMetaData("R#x", ValueType.INT).setParent(p),
+          new ColumnMetaData("R#y", ValueType.STRING).setParent(p));
+  }
+
+  @Test public void testNestedArray() throws Exception {
+    String s = 
+      "{\"type\":\"record\",\"name\":\"S\",\"fields\":["
+      +"{\"name\":\"x\",\"type\":\"int\"},"
+      +"{\"name\":\"A\",\"type\":"+RECORD_ARRAY+"},"
+      +"{\"name\":\"y\",\"type\":\"string\"}"
+      +"]}";
+    ColumnMetaData p = new ColumnMetaData("A[]", ValueType.NULL).isArray(true);
+    check(Schema.parse(s),
+          new ColumnMetaData("x", ValueType.INT),
+          p,
+          new ColumnMetaData("A[]#x", ValueType.INT).setParent(p),
+          new ColumnMetaData("A[]#y", ValueType.STRING).setParent(p),
+          new ColumnMetaData("y", ValueType.STRING));
+  }
+
+  @Test public void testNestedUnion() throws Exception {
+    String s = 
+      "{\"type\":\"record\",\"name\":\"S\",\"fields\":["
+      +"{\"name\":\"x\",\"type\":\"int\"},"
+      +"{\"name\":\"u\",\"type\":"+UNION+"},"
+      +"{\"name\":\"y\",\"type\":\"string\"}"
+      +"]}";
+    ColumnMetaData p = new ColumnMetaData("u/R", ValueType.NULL).isArray(true);
+    check(Schema.parse(s),
+          new ColumnMetaData("x", ValueType.INT),
+          new ColumnMetaData("u/int", ValueType.INT).isArray(true),
+          p,
+          new ColumnMetaData("u/R#x", ValueType.INT).setParent(p),
+          new ColumnMetaData("u/R#y", ValueType.STRING).setParent(p),
+          new ColumnMetaData("y", ValueType.STRING));
+  }
+
+  @Test public void testUnionInArray() throws Exception {
+    String s = 
+      "{\"type\":\"record\",\"name\":\"S\",\"fields\":["
+      +"{\"name\":\"a\",\"type\":{\"type\":\"array\",\"items\":"+UNION+"}}"
+      +"]}";
+    ColumnMetaData p = new ColumnMetaData("a[]",ValueType.NULL).isArray(true);
+    ColumnMetaData r = new ColumnMetaData("a[]/R", ValueType.NULL)
+      .setParent(p)
+      .isArray(true);
+      check(Schema.parse(s),
+          p,
+          new ColumnMetaData("a[]/int", ValueType.INT)
+            .setParent(p)
+            .isArray(true),
+          r,
+          new ColumnMetaData("a[]/R#x", ValueType.INT).setParent(r),
+          new ColumnMetaData("a[]/R#y", ValueType.STRING).setParent(r));
+  }
+
+  @Test public void testArrayInUnion() throws Exception {
+    String s = 
+      "{\"type\":\"record\",\"name\":\"S\",\"fields\":["
+      +"{\"name\":\"a\",\"type\":[\"int\","+RECORD_ARRAY+"]}]}";
+    ColumnMetaData q = new ColumnMetaData("a/array",ValueType.NULL)
+      .isArray(true);
+    ColumnMetaData r = new ColumnMetaData("a/array[]", ValueType.NULL)
+      .setParent(q)
+      .isArray(true);
+    check(Schema.parse(s),
+          new ColumnMetaData("a/int", ValueType.INT).isArray(true),
+          q,
+          r,
+          new ColumnMetaData("a/array[]#x", ValueType.INT).setParent(r),
+          new ColumnMetaData("a/array[]#y", ValueType.STRING).setParent(r));
+  }
+
+  @Test public void testSimpleMap() throws Exception {
+    String s = "{\"type\":\"map\",\"values\":\"long\"}";
+    ColumnMetaData p = new ColumnMetaData(">", ValueType.NULL).isArray(true);
+    check(Schema.parse(s),
+          p,
+          new ColumnMetaData(">key", ValueType.STRING).setParent(p),
+          new ColumnMetaData(">value", ValueType.LONG).setParent(p));
+  }
+
+  @Test public void testMap() throws Exception {
+    String s = "{\"type\":\"map\",\"values\":"+SIMPLE_RECORD+"}";
+    ColumnMetaData p = new ColumnMetaData(">", ValueType.NULL).isArray(true);
+    check(Schema.parse(s),
+          p,
+          new ColumnMetaData(">key", ValueType.STRING).setParent(p),
+          new ColumnMetaData(">value#x", ValueType.INT).setParent(p),
+          new ColumnMetaData(">value#y", ValueType.STRING).setParent(p));
+  }
+
+  private void check(Schema s, ColumnMetaData... expected) throws Exception {
+    ColumnMetaData[] shredded = new AvroColumnator(s).getColumns();
+    assertEquals(expected.length, shredded.length);
+    for (int i = 0; i < expected.length; i++)
+      assertEquals(expected[i].toString(), shredded[i].toString());
+    checkWrite(s);
+    checkRead(s);
+  }
+
+  private void checkWrite(Schema schema) throws IOException {
+    AvroColumnWriter<Object> writer =
+      new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
+    int count = 0;
+    for (Object datum : new RandomData(schema, COUNT)) {
+      //System.out.println("datum="+datum);
+      writer.write(datum);
+    }
+    writer.writeTo(FILE);
+  }
+
+  private void checkRead(Schema schema) throws IOException {
+    AvroColumnReader<Object> reader =
+      new AvroColumnReader<Object>(new AvroColumnReader.Params(FILE));
+    for (Object expected : new RandomData(schema, COUNT))
+      assertEquals(expected, reader.next());
+    reader.close();
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestShredder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+import java.io.File;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.Pair;
+import org.apache.avro.mapred.AvroMapper;
+import org.apache.avro.mapred.AvroReducer;
+import org.apache.avro.mapred.AvroCollector;
+
+import org.apache.avro.Schema;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+import static org.apache.trevni.avro.WordCountUtil.DIR;
+
+public class TestWordCount {
+
+  public static class MapImpl extends AvroMapper<String, Pair<String, Long> > {
+    @Override
+      public void map(String text, AvroCollector<Pair<String,Long>> collector,
+                      Reporter reporter) throws IOException {
+      StringTokenizer tokens = new StringTokenizer(text.toString());
+      while (tokens.hasMoreTokens())
+        collector.collect(new Pair<String,Long>(tokens.nextToken(),1L));
+    }
+  }
+  
+  public static class ReduceImpl
+    extends AvroReducer<String, Long, Pair<String, Long> > {
+    @Override
+    public void reduce(String word, Iterable<Long> counts,
+                       AvroCollector<Pair<String,Long>> collector,
+                       Reporter reporter) throws IOException {
+      long sum = 0;
+      for (long count : counts)
+        sum += count;
+      collector.collect(new Pair<String,Long>(word, sum));
+    }
+  }    
+
+  @Test public void runTestsInOrder() throws Exception {
+    testOutputFormat();
+    testInputFormat();
+  }
+
+  private static final Schema STRING = Schema.create(Schema.Type.STRING);
+  static { GenericData.setStringType(STRING, GenericData.StringType.String); }
+  private static final Schema LONG = Schema.create(Schema.Type.LONG);
+
+  public void testOutputFormat() throws Exception {
+    JobConf job = new JobConf();
+    
+    WordCountUtil.writeLinesFile();
+    
+    AvroJob.setInputSchema(job, STRING);
+    AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
+    
+    AvroJob.setMapperClass(job, MapImpl.class);        
+    AvroJob.setCombinerClass(job, ReduceImpl.class);
+    AvroJob.setReducerClass(job, ReduceImpl.class);
+    
+    FileInputFormat.setInputPaths(job, new Path(DIR + "/in"));
+    FileOutputFormat.setOutputPath(job, new Path(DIR + "/out"));
+    FileOutputFormat.setCompressOutput(job, true);
+    
+    job.setOutputFormat(AvroTrevniOutputFormat.class);
+
+    JobClient.runJob(job);
+    
+    WordCountUtil.validateCountsFile();
+  }
+
+  private static long total;
+
+  public static class Counter extends AvroMapper<GenericRecord,Void> {
+    @Override public void map(GenericRecord r, AvroCollector<Void> collector,
+                              Reporter reporter) throws IOException {
+      total += (Long)r.get("value");
+    }
+  }
+  
+  public void testInputFormat() throws Exception {
+    JobConf job = new JobConf();
+
+    Schema subSchema = Schema.parse("{\"type\":\"record\"," +
+                                    "\"name\":\"PairValue\","+
+                                    "\"fields\": [ " + 
+                                    "{\"name\":\"value\", \"type\":\"long\"}" + 
+                                    "]}");
+    AvroJob.setInputSchema(job, subSchema);
+    AvroJob.setMapperClass(job, Counter.class);        
+    FileInputFormat.setInputPaths(job, new Path(DIR + "/out/*"));
+    job.setInputFormat(AvroTrevniInputFormat.class);
+
+    job.setNumReduceTasks(0);                     // map-only
+    job.setOutputFormat(NullOutputFormat.class);  // ignore output
+
+    total = 0;
+    JobClient.runJob(job);
+    assertEquals(WordCountUtil.TOTAL, total);
+  }
+
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/TestWordCount.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java (added)
+++ avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni.avro;
+
+import static org.junit.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.File;
+import java.io.InputStream;
+import java.io.FileInputStream;
+import java.io.BufferedInputStream;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.StringTokenizer;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.mapred.JobConf;
+
+import org.apache.avro.Schema;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.specific.SpecificData;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.mapred.Pair;
+
+public class WordCountUtil {
+
+  public static final File DIR = new File("target", "wc");
+  public static final File LINES_FILE
+    = new File(new File(DIR, "in"), "lines.avro");
+  static final File COUNTS_FILE
+    = new File(new File(DIR, "out"), "part-00000/part-0.trv");
+
+  public static final String[] LINES = new String[] {
+    "the quick brown fox jumps over the lazy dog",
+    "the cow jumps over the moon",
+    "the rain in spain falls mainly on the plains"
+  };
+
+  public static final Map<String,Long> COUNTS = new TreeMap<String,Long>();
+  public static final long TOTAL;
+  static {
+    long total = 0;
+    for (String line : LINES) {
+      StringTokenizer tokens = new StringTokenizer(line);
+      while (tokens.hasMoreTokens()) {
+        String word = tokens.nextToken();
+        long count = COUNTS.containsKey(word) ? COUNTS.get(word) : 0L;
+        count++;
+        total++;
+        COUNTS.put(word, count);
+      }
+    }
+    TOTAL = total;
+  }
+
+  public static void writeLinesFile() throws IOException {
+    FileUtil.fullyDelete(DIR);
+    DatumWriter<String> writer = new GenericDatumWriter<String>();
+    DataFileWriter<String> out = new DataFileWriter<String>(writer);
+    LINES_FILE.getParentFile().mkdirs();
+    out.create(Schema.create(Schema.Type.STRING), LINES_FILE);
+    for (String line : LINES)
+      out.append(line);
+    out.close();
+  }
+
+  public static void validateCountsFile() throws Exception {
+    AvroColumnReader<Pair<String,Long>> reader =
+      new AvroColumnReader<Pair<String,Long>>
+      (new AvroColumnReader.Params(COUNTS_FILE).setModel(SpecificData.get()));
+    int numWords = 0;
+    for (Pair<String,Long> wc : reader) {
+      assertEquals(wc.key(), COUNTS.get(wc.key()), wc.value());
+      numWords++;
+    }
+    reader.close();
+    assertEquals(COUNTS.size(), numWords);
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/avro/src/test/java/org/apache/trevni/avro/WordCountUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/checkstyle.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/checkstyle.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/checkstyle.xml (added)
+++ avro/trunk/lang/java/trevni/checkstyle.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!DOCTYPE module PUBLIC
+    "-//Puppy Crawl//DTD Check Configuration 1.2//EN"
+    "http://www.puppycrawl.com/dtds/configuration_1_2.dtd">
+
+<module name="Checker">
+    <module name="FileTabCharacter"/>
+    <module name="NewlineAtEndOfFile">
+        <property name="lineSeparator" value="lf"/>
+    </module>
+
+    <module name="TreeWalker">
+        <module name="ConstantName"/>
+        <module name="LocalFinalVariableName"/>
+        <module name="LocalVariableName"/>
+        <module name="MemberName"/>
+        <module name="MethodName"/>
+        <module name="PackageName"/>
+        <module name="ParameterName"/>
+        <module name="StaticVariableName"/>
+        <module name="TypeName"/>
+
+        <module name="AvoidStarImport"/>
+        <module name="RedundantImport"/>
+        <module name="UnusedImports"/>
+
+        <module name="RedundantModifier"/>
+
+        <module name="EmptyStatement"/>
+        <module name="IllegalInstantiation"/>
+        <module name="RedundantThrows"/>
+        <module name="SimplifyBooleanExpression"/>
+        <module name="SimplifyBooleanReturn"/>
+
+        <module name="InterfaceIsType"/>
+
+        <module name="ArrayTypeStyle"/>
+        <module name="UpperEll"/>
+
+    </module>
+</module>

Propchange: avro/trunk/lang/java/trevni/checkstyle.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: avro/trunk/lang/java/trevni/core/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target

Added: avro/trunk/lang/java/trevni/core/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/pom.xml (added)
+++ avro/trunk/lang/java/trevni/core/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+  xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <artifactId>trevni-java</artifactId>
+    <groupId>org.apache.trevni</groupId>
+    <version>1.7.2-SNAPSHOT</version>
+    <relativePath>../</relativePath>
+  </parent>
+
+  <artifactId>trevni-core</artifactId>
+  <name>Trevni Java Core</name>
+  <url>http://avro.apache.org/</url>
+  <description>Trevni Java Core</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <version>${snappy.version}</version>
+      <scope>compile</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+  </profiles>
+
+</project>
+

Propchange: avro/trunk/lang/java/trevni/core/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+
+/** A column output buffer for array columns. */
+class ArrayColumnOutputBuffer extends ColumnOutputBuffer {
+  private int length;                             // remaining in current array
+
+  public ArrayColumnOutputBuffer(ColumnFileWriter writer, ColumnMetaData meta)
+    throws IOException {
+    super(writer, meta);
+    assert getMeta().isArray() || getMeta().getParent() != null;
+    assert !getMeta().hasIndexValues();
+  }
+
+  @Override public void writeLength(int length) throws IOException {
+    assert this.length == 0;
+    this.length = length;
+    getBuffer().writeInt(length);
+  }
+
+  @Override public void writeValue(Object value) throws IOException {
+    assert length > 0;
+    getBuffer().writeValue(value, getMeta().getType());
+    length -= 1;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ArrayColumnOutputBuffer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+
+class BlockDescriptor {
+  int rowCount;
+  int uncompressedSize;
+  int compressedSize;
+
+  BlockDescriptor() {}
+  
+  BlockDescriptor(int rowCount, int uncompressedSize, int compressedSize) {
+    this.rowCount = rowCount;
+    this.uncompressedSize = uncompressedSize;
+    this.compressedSize = compressedSize;
+  }
+  
+  public void writeTo(OutputBuffer out) throws IOException {
+    out.writeFixed32(rowCount);
+    out.writeFixed32(uncompressedSize);
+    out.writeFixed32(compressedSize);
+  }
+
+  public static BlockDescriptor read(InputBuffer in) throws IOException {
+    BlockDescriptor result = new BlockDescriptor();
+    result.rowCount = in.readFixed32();
+    result.uncompressedSize = in.readFixed32();
+    result.compressedSize = in.readFixed32();
+    return result;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BlockDescriptor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.nio.ByteBuffer;
+
+/** Interface for checksum algorithms. */
+abstract class Checksum {
+
+  public static Checksum get(MetaData meta) {
+    String name = meta.getChecksum();
+    if (name == null || "null".equals(name))
+      return new NullChecksum();
+    else if ("crc32".equals(name))
+      return new Crc32Checksum();
+    else
+      throw new TrevniRuntimeException("Unknown checksum: "+name);
+  }
+
+  /** The number of bytes per checksum. */
+  public abstract int size();
+
+  /** Compute a checksum. */
+  public abstract ByteBuffer compute(ByteBuffer data);
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Checksum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/** Interface for compression codecs. */
+abstract class Codec {
+
+  public static Codec get(MetaData meta) {
+    String name = meta.getCodec();
+    if (name == null || "null".equals(name))
+      return new NullCodec();
+    else if ("deflate".equals(name))
+      return new DeflateCodec();
+    else if ("snappy".equals(name))
+      return new SnappyCodec();
+    else
+      throw new TrevniRuntimeException("Unknown codec: "+name);
+  }
+
+  /** Compress data */
+  abstract ByteBuffer compress(ByteBuffer uncompressedData) throws IOException;
+
+  /** Decompress data  */
+  abstract ByteBuffer decompress(ByteBuffer compressedData) throws IOException;
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+
+import java.util.Arrays;
+
+class ColumnDescriptor<T extends Comparable> {
+  final Input file;
+  final ColumnMetaData metaData;
+
+  long start;
+  long dataStart;
+
+  BlockDescriptor[] blocks;
+
+  long[] blockStarts;                             // for random access
+  long[] firstRows;                               // for binary searches
+  T[] firstValues;                                // for binary searches
+
+  public ColumnDescriptor(Input file, ColumnMetaData metaData) {
+    this.file = file;
+    this.metaData = metaData;
+  }
+
+  public int findBlock(long row) {
+    int block = Arrays.binarySearch(firstRows, row);
+    if (block < 0)
+      block = -block - 2;
+    return block;
+  }
+
+  public int findBlock(T value) {
+    int block = Arrays.binarySearch(firstValues, value);
+    if (block < 0)
+      block = -block - 2;
+    return block;
+  }
+
+  public int blockCount() { return blocks.length; }
+
+  public long lastRow(int block) {
+    if (blocks.length == 0 || block < 0) return 0;
+    return firstRows[block] + blocks[block].rowCount;
+  }
+
+  public void ensureBlocksRead() throws IOException {
+    if (blocks != null) return;
+
+    // read block descriptors
+    InputBuffer in = new InputBuffer(file, start);
+    int blockCount = in.readFixed32();
+    BlockDescriptor[] blocks = new BlockDescriptor[blockCount];
+    if (metaData.hasIndexValues())
+      firstValues = (T[])new Comparable[blockCount];
+
+    for (int i = 0; i < blockCount; i++) {
+      blocks[i] = BlockDescriptor.read(in);
+      if (metaData.hasIndexValues())
+        firstValues[i] = in.<T>readValue(metaData.getType());
+    }
+    dataStart = in.tell();
+    
+    // compute blockStarts and firstRows
+    Checksum checksum = Checksum.get(metaData);
+    blockStarts = new long[blocks.length];
+    firstRows = new long[blocks.length];
+    long startPosition = dataStart;
+    long row = 0;
+    for (int i = 0; i < blockCount; i++) {
+      BlockDescriptor b = blocks[i];
+      blockStarts[i] = startPosition;
+      firstRows[i] = row;
+      startPosition += b.compressedSize + checksum.size();
+      row += b.rowCount;
+    }
+    this.blocks = blocks;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnDescriptor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+
+/** File-level metadata. */
+public class ColumnFileMetaData extends MetaData<ColumnFileMetaData> {
+
+  static ColumnFileMetaData read(InputBuffer in) throws IOException {
+    ColumnFileMetaData result = new ColumnFileMetaData();
+    MetaData.read(in, result);
+    return result;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileMetaData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.Closeable;
+import java.io.File;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+/** Reads data from a column file. */
+public class ColumnFileReader implements Closeable {
+  private Input file;
+
+  private long rowCount;
+  private int columnCount;
+  private ColumnFileMetaData metaData;
+  private ColumnDescriptor[] columns;
+  private Map<String,ColumnDescriptor> columnsByName;
+
+  /** Construct reading from the named file. */
+  public ColumnFileReader(File file) throws IOException {
+    this(new InputFile(file));
+  }
+
+  /** Construct reading from the provided input. */
+  public ColumnFileReader(Input file) throws IOException {
+    this.file = file;
+    readHeader();
+  }
+
+  /** Return the number of rows in this file. */
+  public long getRowCount() { return rowCount; }
+
+  /** Return the number of columns in this file. */
+  public long getColumnCount() { return columnCount; }
+
+  /** Return this file's metadata. */
+  public ColumnFileMetaData getMetaData() { return metaData; }
+
+  /** Return all columns' metadata. */
+  public ColumnMetaData[] getColumnMetaData() {
+    ColumnMetaData[] result = new ColumnMetaData[columnCount];
+    for (int i = 0; i < columnCount; i++)
+      result[i] = columns[i].metaData;
+    return result;
+  }
+
+  /** Return root columns' metadata.  Roots are columns that have no parent. */
+  public List<ColumnMetaData> getRoots() {
+    List<ColumnMetaData> result = new ArrayList<ColumnMetaData>();
+    for (int i = 0; i < columnCount; i++)
+      if (columns[i].metaData.getParent() == null)
+        result.add(columns[i].metaData);
+    return result;
+  }
+
+  /** Return a column's metadata. */
+  public ColumnMetaData getColumnMetaData(int number) {
+    return columns[number].metaData;
+  }
+
+  /** Return a column's metadata. */
+  public ColumnMetaData getColumnMetaData(String name) {
+    return getColumn(name).metaData;
+  }
+
+  private <T extends Comparable> ColumnDescriptor<T> getColumn(String name) {
+    ColumnDescriptor column = columnsByName.get(name);
+    if (column == null)
+      throw new TrevniRuntimeException("No column named: "+name);
+    return (ColumnDescriptor<T>)column;
+  }
+
+  private void readHeader() throws IOException {
+    InputBuffer in = new InputBuffer(file, 0);
+    readMagic(in);
+    this.rowCount = in.readFixed64();
+    this.columnCount = in.readFixed32();
+    this.metaData = ColumnFileMetaData.read(in);
+    this.columnsByName = new HashMap<String,ColumnDescriptor>(columnCount);
+
+    columns = new ColumnDescriptor[columnCount];
+    readColumnMetaData(in);
+    readColumnStarts(in);
+  }
+
+  private void readMagic(InputBuffer in) throws IOException {
+    byte[] magic = new byte[ColumnFileWriter.MAGIC.length];
+    try {
+      in.readFully(magic);
+    } catch (IOException e) {
+      throw new IOException("Not a data file.");
+    }
+    if (!(Arrays.equals(ColumnFileWriter.MAGIC, magic)
+          || !Arrays.equals(ColumnFileWriter.MAGIC_0, magic)))
+      throw new IOException("Not a data file.");
+  }
+
+  private void readColumnMetaData(InputBuffer in) throws IOException {
+    for (int i = 0; i < columnCount; i++) {
+      ColumnMetaData meta = ColumnMetaData.read(in, this);
+      meta.setDefaults(this.metaData);
+      ColumnDescriptor column = new ColumnDescriptor(file, meta);
+      columns[i] = column;
+      meta.setNumber(i);
+      columnsByName.put(meta.getName(), column);
+    }
+  }
+
+  private void readColumnStarts(InputBuffer in) throws IOException {
+    for (int i = 0; i < columnCount; i++)
+      columns[i].start = in.readFixed64();
+  }
+ 
+  /** Return an iterator over values in the named column. */
+  public <T extends Comparable> ColumnValues<T> getValues(String columnName)
+    throws IOException {
+    return new ColumnValues<T>(getColumn(columnName));
+  }
+
+  /** Return an iterator over values in a column. */
+  public <T extends Comparable> ColumnValues<T> getValues(int column)
+    throws IOException {
+    return new ColumnValues<T>(columns[column]);
+  }
+
+  @Override public void close() throws IOException {
+    file.close();
+  }
+
+}
+

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.util.Set;
+import java.util.HashSet;
+
+/** Writes data to a column file.
+ * All data is buffered until {@link #writeTo(File)} is called.
+ */
+public class ColumnFileWriter {
+
+  static final byte[] MAGIC_0 = new byte[] {'T', 'r', 'v', 0};
+  static final byte[] MAGIC = new byte[] {'T', 'r', 'v', 1};
+
+  private ColumnFileMetaData metaData;
+  private ColumnOutputBuffer[] columns;
+
+  private long rowCount;
+  private int columnCount;
+  private long size;
+
+  /** Construct given metadata for each column in the file. */
+  public ColumnFileWriter(ColumnFileMetaData fileMeta,
+                          ColumnMetaData... columnMeta) throws IOException {
+    checkColumns(columnMeta);
+    this.metaData = fileMeta;
+    this.columnCount = columnMeta.length;
+    this.columns = new ColumnOutputBuffer[columnCount];
+    for (int i = 0; i < columnCount; i++) {
+      ColumnMetaData c = columnMeta[i];
+      c.setDefaults(metaData);
+      columns[i] = c.isArray()
+        ? new ArrayColumnOutputBuffer(this, c)
+        : new ColumnOutputBuffer(this, c);
+      size += OutputBuffer.BLOCK_SIZE;            // over-estimate
+    }
+  }
+
+  private void checkColumns(ColumnMetaData[] columnMeta) {
+    Set<String> seen = new HashSet<String>();
+    for (int i = 0; i < columnMeta.length; i++) {
+      ColumnMetaData c = columnMeta[i];
+      String name = c.getName();
+      if (seen.contains(name))
+        throw new TrevniRuntimeException("Duplicate column name: "+name);
+      ColumnMetaData parent = c.getParent();
+      if (parent != null && !seen.contains(parent.getName()))
+        throw new TrevniRuntimeException("Parent must precede child: "+name);
+      seen.add(name);
+    }          
+  }
+
+  void incrementSize(int n) { size += n; }
+
+  /** Return the approximate size of the file that will be written.  Tries to
+   * slightly over-estimate.  Indicates both the size in memory of the buffered
+   * data as well as the size of the file that will be written by {@link
+   * #writeTo(OutputStream)}. */
+  public long sizeEstimate() { return size; }
+
+  /** Return this file's metadata. */
+  public ColumnFileMetaData getMetaData() { return metaData; }
+
+  /** Return the number of columns in the file. */
+  public int getColumnCount() { return columnCount; }
+
+  /** Add a row to the file. */
+  public void writeRow(Object... row) throws IOException {
+    startRow();
+    for (int column = 0; column < columnCount; column++)
+      writeValue(row[column], column);
+    endRow();
+  }
+
+  /** Expert: Called before any values are written to a row. */
+  public void startRow() throws IOException {
+    for (int column = 0; column < columnCount; column++)
+      columns[column].startRow();
+  }
+
+  /** Expert: Declare a count of items to be written to an array column or a
+   * column whose parent is an array. */
+  public void writeLength(int length, int column) throws IOException {
+    columns[column].writeLength(length);
+  }
+
+  /** Expert: Add a value to a row.  For values in array columns or whose
+   * parents are array columns, this must be preceded by a call to {@link
+   * #writeLength(int, int)} and must be called that many times.   For normal
+   * columns this is called once for each row in the column. */
+  public void writeValue(Object value, int column) throws IOException {
+    columns[column].writeValue(value);
+  }
+
+  /** Expert: Called after all values are written to a row. */
+  public void endRow() throws IOException {
+    for (int column = 0; column < columnCount; column++)
+      columns[column].endRow();
+    rowCount++;
+  }
+
+  /** Write all rows added to the named file. */
+  public void writeTo(File file) throws IOException {
+    OutputStream out = new FileOutputStream(file);
+    try {
+      writeTo(out);
+    } finally {
+      out.close();
+    }
+  }
+
+  /** Write all rows added to the named output stream. */
+  public void writeTo(OutputStream out) throws IOException {
+    writeHeader(out);
+    
+    for (int column = 0; column < columnCount; column++)
+      columns[column].writeTo(out);
+  }
+
+  private void writeHeader(OutputStream out) throws IOException {
+    OutputBuffer header = new OutputBuffer();
+
+    header.write(MAGIC);                          // magic
+
+    header.writeFixed64(rowCount);                // row count
+
+    header.writeFixed32(columnCount);             // column count
+
+    metaData.write(header);                       // file metadata
+
+    for (ColumnOutputBuffer column : columns)
+      column.getMeta().write(header);             // column metadata
+
+    for (long start : computeStarts(header.size()))
+      header.writeFixed64(start);                 // column starts
+
+    header.writeTo(out);
+
+  }
+
+  private long[] computeStarts(long start) throws IOException {
+    long[] result = new long[columnCount];
+    start += columnCount * 8;                     // room for starts
+    for (int column = 0; column < columnCount; column++) {
+      result[column] = start;
+      start += columns[column].size();
+    }
+    return result;
+  }
+
+}
+

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnFileWriter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+/** Metadata for a column. */
+public class ColumnMetaData extends MetaData<ColumnMetaData> {
+
+  static final String NAME_KEY = RESERVED_KEY_PREFIX + "name";
+  static final String TYPE_KEY = RESERVED_KEY_PREFIX + "type";
+  static final String VALUES_KEY = RESERVED_KEY_PREFIX + "values";
+  static final String PARENT_KEY = RESERVED_KEY_PREFIX + "parent";
+  static final String ARRAY_KEY = RESERVED_KEY_PREFIX + "array";
+
+  // cache these values for better performance
+  private String name;
+  private ValueType type;
+  private boolean values;
+  private ColumnMetaData parent;
+  private boolean isArray;
+
+  private transient List<ColumnMetaData> children =
+    new ArrayList<ColumnMetaData>(0);
+  private transient int number = -1;
+
+  private ColumnMetaData() {}                     // non-public ctor
+
+  /** Construct given a name and type. */
+  public ColumnMetaData(String name, ValueType type) {
+    this.name = name;
+    setReserved(NAME_KEY, name);
+    this.type = type;
+    setReserved(TYPE_KEY, type.getName());
+  }
+
+  /** Return this column's name. */
+  public String getName() { return name; }
+
+  /** Return this column's type. */
+  public ValueType getType() { return type; }
+
+  /** Return this column's parent or null. */
+  public ColumnMetaData getParent() { return parent; }
+
+  /** Return this column's children or null. */
+  public List<ColumnMetaData> getChildren() { return children; }
+
+  /** Return true if this column is an array. */
+  public boolean isArray() { return isArray; }
+
+  /** Return this column's number in a file. */
+  public int getNumber() { return number; }
+
+  void setNumber(int number) { this.number = number; }
+
+  /** Set whether this column has an index of blocks by value.  This only makes
+   * sense for sorted columns and permits one to seek into a column by value.
+   */
+  public ColumnMetaData hasIndexValues(boolean values) {
+    if (isArray)
+      throw new TrevniRuntimeException("Array column cannot have index: "+this);
+    this.values = values;
+    return setReservedBoolean(VALUES_KEY, values);
+  }
+
+  /** Set this column's parent.  A parent must be a preceding array column. */
+  public ColumnMetaData setParent(ColumnMetaData parent) {
+    if (!parent.isArray())
+      throw new TrevniRuntimeException("Parent is not an array: "+parent);
+    if (values)
+      throw new TrevniRuntimeException("Array column cannot have index: "+this);
+    this.parent = parent;
+    parent.children.add(this);
+    return setReserved(PARENT_KEY, parent.getName());
+  }
+
+  /** Set whether this column is an array. */
+  public ColumnMetaData isArray(boolean isArray) {
+    if (values)
+      throw new TrevniRuntimeException("Array column cannot have index: "+this);
+    this.isArray = isArray;
+    return setReservedBoolean(ARRAY_KEY, isArray);
+  }
+
+  /** Get whether this column has an index of blocks by value. */
+  public boolean hasIndexValues() { return getBoolean(VALUES_KEY); }
+
+  static ColumnMetaData read(InputBuffer in, ColumnFileReader file)
+    throws IOException {
+    ColumnMetaData result = new ColumnMetaData();
+    MetaData.read(in, result);
+    result.name = result.getString(NAME_KEY);
+    result.type = ValueType.forName(result.getString(TYPE_KEY));
+    result.values = result.getBoolean(VALUES_KEY);
+    result.isArray = result.getBoolean(ARRAY_KEY);
+
+    String parentName = result.getString(PARENT_KEY);
+    if (parentName != null)
+      result.setParent(file.getColumnMetaData(parentName));
+
+    return result;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnMetaData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+class ColumnOutputBuffer {
+  private ColumnFileWriter writer;
+  private ColumnMetaData meta;
+  private Codec codec;
+  private Checksum checksum;
+  private OutputBuffer buffer;
+  private List<BlockDescriptor> blockDescriptors;
+  private List<byte[]> blockData;
+  private List<byte[]> firstValues;
+  private int rowCount;
+  private long size = 4;                          // room for block count
+
+  public ColumnOutputBuffer(ColumnFileWriter writer, ColumnMetaData meta)
+    throws IOException {
+    this.writer = writer;
+    this.meta = meta;
+    this.codec = Codec.get(meta);
+    this.checksum = Checksum.get(meta);
+    this.buffer = new OutputBuffer();
+    this.blockDescriptors = new ArrayList<BlockDescriptor>();
+    this.blockData = new ArrayList<byte[]>();
+    if (meta.hasIndexValues())
+      this.firstValues = new ArrayList<byte[]>();
+  }
+
+  public ColumnMetaData getMeta() { return meta; }
+  public OutputBuffer getBuffer() { return buffer; }
+
+  public void startRow() throws IOException {
+    if (buffer.isFull())
+      flushBuffer();
+  }
+
+  public void writeLength(int length) throws IOException {
+    throw new TrevniRuntimeException("Not an array column: "+meta);
+  }
+
+  public void writeValue(Object value) throws IOException {
+    buffer.writeValue(value, meta.getType());
+    if (meta.hasIndexValues() && rowCount == 0)
+      firstValues.add(buffer.toByteArray());
+  }
+
+  public void endRow() throws IOException {
+    rowCount++;
+  }
+
+  private void flushBuffer() throws IOException {
+    if (rowCount == 0) return;
+    ByteBuffer raw = buffer.asByteBuffer();
+    ByteBuffer c = codec.compress(raw);
+
+    blockDescriptors.add(new BlockDescriptor(rowCount,
+                                             raw.remaining(),
+                                             c.remaining()));
+
+    ByteBuffer data = ByteBuffer.allocate(c.remaining() + checksum.size());
+    data.put(c);
+    data.put(checksum.compute(raw));
+    blockData.add(data.array());
+
+    int sizeIncrement =
+      (4*3)                                       // descriptor
+      + (firstValues != null                      // firstValue
+         ? firstValues.get(firstValues.size()-1).length
+         : 0)
+      + data.position();                         // data
+    
+    writer.incrementSize(sizeIncrement);
+    size += sizeIncrement;                         
+
+    buffer = new OutputBuffer();
+    rowCount = 0;
+  }
+
+  public long size() throws IOException {
+    flushBuffer();
+    return size;
+  }
+
+  public void writeTo(OutputStream out) throws IOException {
+    OutputBuffer header = new OutputBuffer();
+    header.writeFixed32(blockDescriptors.size());
+    for (int i = 0; i < blockDescriptors.size(); i++) {
+      blockDescriptors.get(i).writeTo(header);
+      if (meta.hasIndexValues())
+        header.write(firstValues.get(i));
+    }
+    header.writeTo(out);
+
+    for (byte[] data : blockData)
+      out.write(data);
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnOutputBuffer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+
+/** An iterator over column values. */
+public class ColumnValues<T extends Comparable>
+  implements Iterator<T>, Iterable<T> {
+
+  private final ColumnDescriptor column;
+  private final ValueType type;
+  private final Codec codec;
+  private final Checksum checksum;
+  private final InputBuffer in;
+
+  private InputBuffer values;
+  private int block = -1;
+  private long row = 0;
+  private T previous; 
+
+  private int arrayLength;
+
+  ColumnValues(ColumnDescriptor column) throws IOException {
+    this.column = column;
+    this.type = column.metaData.getType();
+    this.codec = Codec.get(column.metaData);
+    this.checksum = Checksum.get(column.metaData);
+    this.in = new InputBuffer(column.file);
+
+    column.ensureBlocksRead();
+  }
+
+  /** Return the current row number within this file. */
+  public long getRow() { return row; }
+
+  /** Seek to the named row. */
+  public void seek(long r) throws IOException {
+    if (r < row || r >= column.lastRow(block))    // not in current block
+      startBlock(column.findBlock(r));            // seek to block start
+    while (r > row && hasNext()) {                // skip within block
+      values.skipValue(type);
+      row++;
+    }
+    previous = null;
+  }
+
+  /** Seek to the named value. */
+  public void seek(T v) throws IOException {
+    if (!column.metaData.hasIndexValues())
+      throw new TrevniRuntimeException
+        ("Column does not have value index: " +column.metaData.getName());
+
+    if (previous == null                          // not in current block?
+        || previous.compareTo(v) > 0
+        || (block != column.blockCount()-1
+            && column.firstValues[block+1].compareTo(v) <= 0))
+      startBlock(column.findBlock(v));            // seek to block start
+
+    while (hasNext()) {                           // scan block
+      long savedPosition = values.tell();
+      T savedPrevious = previous;
+      if (next().compareTo(v) >= 0) {
+        values.seek(savedPosition);
+        previous = savedPrevious;
+        row--;
+        return;
+      }
+    }
+  }
+
+  private void startBlock(int block) throws IOException {
+    this.block = block;
+    this.row = column.firstRows[block];
+
+    in.seek(column.blockStarts[block]);
+    int end = column.blocks[block].compressedSize;
+    byte[] raw = new byte[end+checksum.size()];
+    in.readFully(raw);
+    ByteBuffer data = codec.decompress(ByteBuffer.wrap(raw, 0, end));
+    if (!checksum.compute(data).equals
+        (ByteBuffer.wrap(raw, end, checksum.size())))
+      throw new IOException("Checksums mismatch.");
+    values = new InputBuffer(new InputBytes(data));
+  }
+
+  @Override public Iterator iterator() { return this; }
+
+  @Override public boolean hasNext() {
+    return block < column.blockCount()-1 || row < column.lastRow(block);
+  }
+
+  @Override public T next() {
+    if (column.metaData.isArray() || column.metaData.getParent() != null)
+      throw new TrevniRuntimeException
+        ("Column is array: " +column.metaData.getName());
+    try {
+      startRow();
+      return nextValue();
+    } catch (IOException e) {
+      throw new TrevniRuntimeException(e);
+    }
+  }
+
+  /** Expert: Must be called before any calls to {@link #nextLength()} or
+   * {@link #nextValue()}. */
+  public void startRow() throws IOException {
+    if (row >= column.lastRow(block)) {
+      if (block >= column.blockCount())
+        throw new TrevniRuntimeException("Read past end of column.");
+      startBlock(block+1);
+    }
+    row++;
+  }
+
+  /** Expert: Returns the next length in an array column. */
+  public int nextLength() throws IOException {
+    if (!column.metaData.isArray())
+      throw new TrevniRuntimeException
+        ("Column is not array: " +column.metaData.getName());
+    assert arrayLength == 0;
+    return arrayLength = values.readInt();
+  }
+
+  /** Expert: Returns the next value in a column. */
+  public T nextValue() throws IOException {
+    arrayLength--;
+    return previous = values.<T>readValue(type);
+  }
+
+  @Override public void remove() { throw new UnsupportedOperationException(); }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ColumnValues.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.nio.ByteBuffer;
+import java.util.zip.CRC32;
+
+/** Implements CRC32 checksum. */
+final class Crc32Checksum extends Checksum {
+  private CRC32 crc32 = new CRC32();
+
+  @Override public int size() { return 4; }
+
+  @Override public ByteBuffer compute(ByteBuffer data) {
+    crc32.reset();
+    crc32.update(data.array(), data.position(), data.remaining());
+
+    ByteBuffer result = ByteBuffer.allocate(size());
+    result.putInt((int)crc32.getValue());
+    result.flip();
+    return result;
+ }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Crc32Checksum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.zip.Deflater;
+import java.util.zip.DeflaterOutputStream;
+import java.util.zip.Inflater;
+import java.util.zip.InflaterOutputStream;
+
+/** Implements DEFLATE (RFC1951) compression and decompression. */
+class DeflateCodec extends Codec {
+  private ByteArrayOutputStream outputBuffer;
+  private Deflater deflater;
+  private Inflater inflater;
+
+  @Override
+  ByteBuffer compress(ByteBuffer data) throws IOException {
+    ByteArrayOutputStream baos = getOutputBuffer(data.remaining());
+    writeAndClose(data, new DeflaterOutputStream(baos, getDeflater()));
+    return ByteBuffer.wrap(baos.toByteArray());
+  }
+
+  @Override
+  ByteBuffer decompress(ByteBuffer data) throws IOException {
+    ByteArrayOutputStream baos = getOutputBuffer(data.remaining());
+    writeAndClose(data, new InflaterOutputStream(baos, getInflater()));
+    return ByteBuffer.wrap(baos.toByteArray());
+  }
+  
+  private void writeAndClose(ByteBuffer data, OutputStream out)
+    throws IOException {
+    out.write(data.array(), data.position(), data.remaining());
+    out.close();
+  }
+  
+  private Inflater getInflater() {
+    if (null == inflater)
+      inflater = new Inflater(true);
+    inflater.reset();
+    return inflater;
+  }
+
+  private Deflater getDeflater() {
+    if (null == deflater)
+      deflater = new Deflater(Deflater.DEFAULT_COMPRESSION, true);
+    deflater.reset();
+    return deflater;
+  }
+  
+  private ByteArrayOutputStream getOutputBuffer(int suggestedLength) {
+    if (null == outputBuffer)
+      outputBuffer = new ByteArrayOutputStream(suggestedLength);
+    outputBuffer.reset();
+    return outputBuffer;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/DeflateCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.Closeable;
+
+/** A byte source that supports positioned read and length. */
+public interface Input extends Closeable {
+  /** Return the total length of the input. */
+  long length() throws IOException;
+
+  /** Positioned read. */
+  int read(long position, byte[] b, int start, int len) throws IOException;
+}
+

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Input.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,340 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+
+/** Used to read values. */
+class InputBuffer {
+  private Input in;
+
+  private long inLength;
+  private long offset;                            // pos of next read from in
+
+  private byte[] buf;                             // data from input
+  private int pos;                                // position within buffer
+  private int limit;                              // end of valid buffer data
+
+  public InputBuffer(Input in) throws IOException { this(in, 0); }
+
+  public InputBuffer(Input in, long position) throws IOException {
+    this.in = in;
+    this.inLength = in.length();
+    this.offset = position;
+
+    if (in instanceof InputBytes) {               // use buffer directly
+      this.buf = ((InputBytes)in).getBuffer();
+      this.limit = (int)in.length();
+      this.offset = limit;
+    } else {                                      // create new buffer
+      this.buf = new byte[8192];                  // big enough for primitives
+    }
+  }
+
+  public void seek(long position) throws IOException {
+    if (position >= (offset-limit) && position <= offset) {
+      pos = (int)(limit - (offset - position));   // seek in buffer;
+      return;
+    }
+    pos = 0;
+    limit = 0;
+    offset = position;
+  }
+
+  public long tell() { return (offset-limit)+pos; }
+
+  public long length() { return inLength; }
+
+  public <T extends Comparable> T readValue(ValueType type) throws IOException {
+    switch (type) {
+    case NULL:
+      return (T)null;
+    case INT:
+      return (T)Integer.valueOf(readInt());
+    case LONG:
+      return (T)Long.valueOf(readLong());
+    case FIXED32:
+      return (T)Integer.valueOf(readFixed32());
+    case FIXED64:
+      return (T)Long.valueOf(readFixed64());
+    case FLOAT:
+      return (T)Float.valueOf(readFloat());
+    case DOUBLE:
+      return (T)Double.valueOf(readDouble());
+    case STRING:
+      return (T)readString();
+    case BYTES:
+      return (T)readBytes(null);
+    default:
+      throw new TrevniRuntimeException("Unknown value type: "+type);
+    }
+  }
+
+  public void skipValue(ValueType type) throws IOException {
+    switch (type) {
+    case NULL:
+                    break;
+    case INT:
+      readInt();    break;
+    case LONG:
+      readLong();   break;
+    case FIXED32:
+    case FLOAT:
+      skip(4);      break;
+    case FIXED64:
+    case DOUBLE:
+      skip(8);      break;
+    case STRING:
+    case BYTES:
+      skipBytes();  break;
+    default:
+      throw new TrevniRuntimeException("Unknown value type: "+type);
+    }
+  }
+
+  public int readInt() throws IOException {
+    if ((limit - pos) < 5) {                      // maybe not in buffer
+      int b = read();
+      int n = b & 0x7f;
+      for (int shift = 7; b > 0x7f; shift += 7) {
+        b = read();
+        n ^= (b & 0x7f) << shift;
+      }
+      return (n >>> 1) ^ -(n & 1);                  // back to two's-complement
+    }
+    int len = 1;
+    int b = buf[pos] & 0xff;
+    int n = b & 0x7f;
+    if (b > 0x7f) {
+      b = buf[pos + len++] & 0xff;
+      n ^= (b & 0x7f) << 7;
+      if (b > 0x7f) {
+        b = buf[pos + len++] & 0xff;
+        n ^= (b & 0x7f) << 14;
+        if (b > 0x7f) {
+          b = buf[pos + len++] & 0xff;
+          n ^= (b & 0x7f) << 21;
+          if (b > 0x7f) {
+            b = buf[pos + len++] & 0xff;
+            n ^= (b & 0x7f) << 28;
+            if (b > 0x7f) {
+              throw new IOException("Invalid int encoding");
+            }
+          }
+        }
+      }
+    }
+    pos += len;
+    if (pos > limit)
+      throw new EOFException();
+    return (n >>> 1) ^ -(n & 1);                  // back to two's-complement
+  }
+
+  public long readLong() throws IOException {
+    if ((limit - pos) < 10) {                     // maybe not in buffer
+      int b = read();
+      long n = b & 0x7f;
+      for (int shift = 7; b > 0x7f; shift += 7) {
+        b = read();
+        n ^= (b & 0x7fL) << shift;
+      }
+      return (n >>> 1) ^ -(n & 1);                // back to two's-complement
+    }
+
+    int b = buf[pos++] & 0xff;
+    int n = b & 0x7f;
+    long l;
+    if (b > 0x7f) {
+      b = buf[pos++] & 0xff;
+      n ^= (b & 0x7f) << 7;
+      if (b > 0x7f) {
+        b = buf[pos++] & 0xff;
+        n ^= (b & 0x7f) << 14;
+        if (b > 0x7f) {
+          b = buf[pos++] & 0xff;
+          n ^= (b & 0x7f) << 21;
+          if (b > 0x7f) {
+            // only the low 28 bits can be set, so this won't carry
+            // the sign bit to the long
+            l = innerLongDecode((long)n);
+          } else {
+            l = n;
+          }
+        } else {
+          l = n;
+        }
+      } else {
+        l = n;
+      }
+    } else {
+      l = n;
+    }
+    if (pos > limit) {
+      throw new EOFException();
+    }
+    return (l >>> 1) ^ -(l & 1); // back to two's-complement
+  }
+  
+  // splitting readLong up makes it faster because of the JVM does more
+  // optimizations on small methods
+  private long innerLongDecode(long l) throws IOException {
+    int len = 1;
+    int b = buf[pos] & 0xff;
+    l ^= (b & 0x7fL) << 28;
+    if (b > 0x7f) {
+      b = buf[pos + len++] & 0xff;
+      l ^= (b & 0x7fL) << 35;
+      if (b > 0x7f) {
+        b = buf[pos + len++] & 0xff;
+        l ^= (b & 0x7fL) << 42;
+        if (b > 0x7f) {
+          b = buf[pos + len++] & 0xff;
+          l ^= (b & 0x7fL) << 49;
+          if (b > 0x7f) {
+            b = buf[pos + len++] & 0xff;
+            l ^= (b & 0x7fL) << 56;
+            if (b > 0x7f) {
+              b = buf[pos + len++] & 0xff;
+              l ^= (b & 0x7fL) << 63;
+              if (b > 0x7f) {
+                throw new IOException("Invalid long encoding");
+              }
+            }
+          }
+        }
+      }
+    }
+    pos += len;
+    return l;
+  }
+
+  public float readFloat() throws IOException {
+    return Float.intBitsToFloat(readFixed32());
+  }
+
+  public int readFixed32() throws IOException {
+    if ((limit - pos) < 4)                        // maybe not in buffer
+      return read() | (read() << 8) | (read() << 16) | (read() << 24);
+
+    int len = 1;
+    int n = (buf[pos] & 0xff) | ((buf[pos + len++] & 0xff) << 8)
+        | ((buf[pos + len++] & 0xff) << 16) | ((buf[pos + len++] & 0xff) << 24);
+    if ((pos + 4) > limit)
+      throw new EOFException();
+    pos += 4;
+    return n;
+  }
+
+  public double readDouble() throws IOException {
+    return Double.longBitsToDouble(readFixed64());
+  }
+
+  public long readFixed64() throws IOException {
+    return (readFixed32() & 0xFFFFFFFFL) | (((long)readFixed32()) << 32);
+  }
+
+  private static final Charset UTF8 = Charset.forName("UTF-8");
+
+  public String readString() throws IOException {
+    int length = readInt();
+    if (length <= (limit - pos)) {                        // in buffer
+      String result = new String(buf, pos, length, UTF8); // read directly
+      pos += length;
+      return result;
+    }
+    byte[] bytes = new byte[length];
+    readFully(bytes, 0, length);
+    return new String(bytes, 0, length, UTF8);
+  }  
+
+  public byte[] readBytes() throws IOException {
+    byte[] result = new byte[readInt()];
+    readFully(result);
+    return result;
+  }
+
+  public ByteBuffer readBytes(ByteBuffer old) throws IOException {
+    int length = readInt();
+    ByteBuffer result;
+    if (old != null && length <= old.capacity()) {
+      result = old;
+      result.clear();
+    } else {
+      result = ByteBuffer.allocate(length);
+    }
+    readFully(result.array(), result.position(), length);
+    result.limit(length);
+    return result;
+  }
+
+  public void skipBytes() throws IOException {
+    skip(readInt());
+  }
+
+  private void skip(long length) throws IOException {
+    seek(tell()+length);
+  }
+
+  public int read() throws IOException {
+    if (pos >= limit) {
+      limit = readInput(buf, 0, buf.length);
+      pos = 0;
+    }
+    return buf[pos++] & 0xFF;
+  }
+
+  public void readFully(byte[] bytes) throws IOException {
+    readFully(bytes, 0, bytes.length);
+  }
+
+  public void readFully(byte[] bytes, int start, int len) throws IOException {
+    int buffered = limit - pos;
+    if (len > buffered) {                        // buffer is insufficient
+
+      System.arraycopy(buf, pos, bytes, start, buffered); // consume buffer
+      start += buffered;
+      len -= buffered;
+      pos += buffered;
+      if (len > buf.length) {                     // bigger than buffer
+        do {
+          int read = readInput(bytes, start, len); // read directly into result
+          len -= read;
+          start += read;
+        } while (len > 0);
+        return;
+      }
+
+      limit = readInput(buf, 0, buf.length);        // refill buffer
+      pos = 0;
+    }
+
+    System.arraycopy(buf, pos, bytes, start, len); // copy from buffer
+    pos += len;
+  }
+
+  private int readInput(byte[] b, int start, int len) throws IOException {
+    int read = in.read(offset, b, start, len);
+    if (read < 0) throw new EOFException();
+    offset += read;
+    return read;
+ }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBuffer.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message