avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r1383626 [3/3] - in /avro/trunk: ./ doc/src/content/xdocs/ lang/java/ lang/java/tools/ lang/java/tools/src/main/java/org/apache/avro/tool/ lang/java/trevni/ lang/java/trevni/avro/ lang/java/trevni/avro/src/ lang/java/trevni/avro/src/main/ l...
Date Tue, 11 Sep 2012 21:35:59 GMT
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/** An {@link Input} backed with data in a byte array. */
+public class InputBytes extends ByteArrayInputStream implements Input {
+
+  /** Construct for the given bytes. */
+  public InputBytes(byte[] data) { super(data); }
+
+  /** Construct for the given bytes. */
+  public InputBytes(ByteBuffer data) {
+    super(data.array(), data.position(), data.limit());
+  }
+
+  @Override
+  public long length() throws IOException { return this.count; }
+
+  @Override
+  public synchronized int read(long pos, byte[] b, int start, int len)
+    throws IOException {
+    this.pos = (int)pos;
+    return read(b, start, len);
+  }
+
+  byte[] getBuffer() { return buf; }
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputBytes.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.nio.channels.FileChannel;
+import java.nio.ByteBuffer;
+import java.io.IOException;
+
+/** An {@link Input} for files. */
+public class InputFile implements Input {
+
+  private FileChannel channel;
+
+  /** Construct for the given file. */
+  public InputFile(File file) throws IOException {
+    this.channel = new FileInputStream(file).getChannel();
+  }
+
+  @Override
+  public long length() throws IOException { return channel.size(); }
+
+  @Override
+  public int read(long position, byte[] b, int start, int len)
+    throws IOException {
+    return channel.read(ByteBuffer.wrap(b, start, len), position);
+  }
+
+  @Override
+  public void close() throws IOException { channel.close(); }
+
+}
+

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/InputFile.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Map;
+import java.util.LinkedHashMap;
+
+/** Base class for metadata. */
+public class MetaData<T extends MetaData> extends LinkedHashMap<String,byte[]> {
+
+  static final String RESERVED_KEY_PREFIX = "trevni.";
+
+  static final String CODEC_KEY = RESERVED_KEY_PREFIX + "codec";
+  static final String CHECKSUM_KEY = RESERVED_KEY_PREFIX + "checksum";
+
+  public static final Charset UTF8 = Charset.forName("UTF-8");
+
+  private MetaData<?> defaults;
+
+  void setDefaults(MetaData defaults) { this.defaults = defaults; }
+
+  /** Return the compression codec name. */
+  public String getCodec() { return getString(CODEC_KEY); }
+
+  /** Set the compression codec name. */
+  public T setCodec(String codec) {
+    setReserved(CODEC_KEY, codec);
+    return (T)this;
+  }
+   
+  /** Return the checksum algorithm name. */
+  public String getChecksum() { return getString(CHECKSUM_KEY); }
+
+  /** Set the checksum algorithm name. */
+  public T setChecksum(String checksum) {
+    setReserved(CHECKSUM_KEY, checksum);
+    return (T)this;
+  }
+
+  /** Return the value of a metadata property as a String. */
+  public String getString(String key) {
+    byte[] value = get(key);
+    if (value == null && defaults != null)
+      value = defaults.get(key);
+    if (value == null)
+      return null;
+    return new String(value, UTF8);
+  }
+
+  /** Return the value of a metadata property as a long. */
+  public long getLong(String key) {
+    return Long.parseLong(getString(key));
+  }
+
+  /** Return true iff a key has any value, false if it is not present. */
+  public boolean getBoolean(String key) {
+    return get(key) != null;
+  }
+
+  /** Set a metadata property to a binary value. */
+  public T set(String key, byte[] value) {
+    if (isReserved(key)) {
+      throw new TrevniRuntimeException("Cannot set reserved key: " + key);
+    }
+    put(key, value);
+    return (T)this;
+  }
+
+  /** Test if a metadata key is reserved. */
+  public static boolean isReserved(String key) {
+    return key.startsWith(RESERVED_KEY_PREFIX);
+  }
+
+  /** Set a metadata property to a String value. */
+  public T set(String key, String value) {
+    return set(key, value.getBytes(UTF8));
+  }
+
+  T setReserved(String key, String value) {
+    put(key, value.getBytes(UTF8));
+    return (T)this;
+  }
+
+  T setReservedBoolean(String key, boolean value) {
+    if (value)
+      setReserved(key, "");
+    else
+      remove(key);
+    return (T)this;
+  }
+
+  /** Set a metadata property to a long value. */
+  public T set(String key, long value) {
+    return set(key, Long.toString(value));
+  }
+
+  void write(OutputBuffer out) throws IOException {
+    out.writeInt(size());
+    for (Map.Entry<String,byte[]> e : entrySet()) {
+      out.writeString(e.getKey());
+      out.writeBytes(e.getValue());
+    }
+  }
+
+  static void read(InputBuffer in, MetaData<?> metaData) throws IOException {
+    int size = in.readInt();
+    for (int i = 0; i < size; i++)
+      metaData.put(in.readString(), in.readBytes());
+  }
+
+  @Override public String toString() {
+    StringBuffer buffer = new StringBuffer();
+    buffer.append("{ ");
+    for (Map.Entry<String,byte[]> e : entrySet()) {
+      buffer.append(e.getKey());
+      buffer.append("=");
+      try {
+        buffer.append(new String(e.getValue(), "ISO-8859-1"));
+      } catch (java.io.UnsupportedEncodingException error) {
+        throw new TrevniRuntimeException(error);
+      }
+      buffer.append(" ");
+    }
+    buffer.append("}");
+    return buffer.toString();
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/MetaData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.nio.ByteBuffer;
+
+/** Implements "null" (empty) checksum. */
+final class NullChecksum extends Checksum {
+
+  @Override public int size() { return 0; }
+
+  @Override public ByteBuffer compute(ByteBuffer data) {
+    return ByteBuffer.allocate(0);
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullChecksum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/** Implements "null" (pass through) codec. */
+final class NullCodec extends Codec {
+
+  @Override ByteBuffer compress(ByteBuffer buffer) throws IOException {
+    return buffer;
+  }
+
+  @Override ByteBuffer decompress(ByteBuffer data) throws IOException {
+    return data;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/NullCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.io.ByteArrayOutputStream;
+import java.nio.charset.Charset;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/** Used to write values. */
+class OutputBuffer extends ByteArrayOutputStream {
+  static final int BLOCK_SIZE = 64 * 1024;
+
+  public OutputBuffer() { super(BLOCK_SIZE + BLOCK_SIZE >> 2); }
+
+  public boolean isFull() { return size() >= BLOCK_SIZE; }
+
+  public ByteBuffer asByteBuffer() { return ByteBuffer.wrap(buf, 0, count); }
+
+  public void writeValue(Object value, ValueType type)
+    throws IOException {
+    switch (type) {
+    case NULL:
+                                              break;
+    case INT:
+      writeInt((Integer)value);               break;
+    case LONG:
+      writeLong((Long)value);                 break;
+    case FIXED32:
+      writeFixed32((Integer)value);           break;
+    case FIXED64:
+      writeFixed64((Long)value);              break;
+    case FLOAT:
+      writeFloat((Float)value);               break;
+    case DOUBLE:
+      writeDouble((Double)value);             break;
+    case STRING:
+      writeString((String)value);             break;
+    case BYTES:
+      if (value instanceof ByteBuffer)
+        writeBytes((ByteBuffer)value);
+      else
+        writeBytes((byte[])value);
+      break;
+    default:
+      throw new TrevniRuntimeException("Unknown value type: "+type);
+    }
+  }
+
+  private static final Charset UTF8 = Charset.forName("UTF-8");
+
+  public void writeString(String string) throws IOException {
+    byte[] bytes = string.getBytes(UTF8);
+    writeInt(bytes.length);
+    write(bytes, 0, bytes.length);
+  }
+
+  public void writeBytes(ByteBuffer bytes) throws IOException {
+    int pos = bytes.position();
+    int start = bytes.arrayOffset() + pos;
+    int len = bytes.limit() - pos;
+    writeBytes(bytes.array(), start, len);
+  }
+  
+  public void writeBytes(byte[] bytes) throws IOException {
+    writeBytes(bytes, 0, bytes.length);
+  }
+
+  public void writeBytes(byte[] bytes, int start, int len) throws IOException {
+    writeInt(len);
+    write(bytes, start, len);
+  }
+
+  public void writeFloat(float f) throws IOException {
+    writeFixed32(Float.floatToRawIntBits(f));
+  }
+
+  public void writeDouble(double d) throws IOException {
+    writeFixed64(Double.doubleToRawLongBits(d));
+  }
+
+  public void writeFixed32(int i) throws IOException {
+    ensure(4);
+    buf[count  ] = (byte)((i       ) & 0xFF);
+    buf[count+1] = (byte)((i >>>  8) & 0xFF);
+    buf[count+2] = (byte)((i >>> 16) & 0xFF);
+    buf[count+3] = (byte)((i >>> 24) & 0xFF);
+    count += 4;
+  }
+
+  public void writeFixed64(long l) throws IOException {
+    ensure(8);
+    int first = (int)(l & 0xFFFFFFFF);
+    int second = (int)((l >>> 32) & 0xFFFFFFFF);
+    buf[count  ] = (byte)((first        ) & 0xFF);
+    buf[count+4] = (byte)((second       ) & 0xFF);
+    buf[count+5] = (byte)((second >>>  8) & 0xFF);
+    buf[count+1] = (byte)((first >>>   8) & 0xFF);
+    buf[count+2] = (byte)((first >>>  16) & 0xFF);
+    buf[count+6] = (byte)((second >>> 16) & 0xFF);
+    buf[count+7] = (byte)((second >>> 24) & 0xFF);
+    buf[count+3] = (byte)((first >>>  24) & 0xFF);
+    count += 8;
+  }
+
+  public void writeInt(int n) throws IOException {
+    ensure(5);
+    n = (n << 1) ^ (n >> 31);                     // move sign to low-order bit
+    if ((n & ~0x7F) != 0) {
+      buf[count++] = (byte)((n | 0x80) & 0xFF);
+      n >>>= 7;
+      if (n > 0x7F) {
+        buf[count++] = (byte)((n | 0x80) & 0xFF);
+        n >>>= 7;
+        if (n > 0x7F) {
+          buf[count++] = (byte)((n | 0x80) & 0xFF);
+          n >>>= 7;
+          if (n > 0x7F) {
+            buf[count++] = (byte)((n | 0x80) & 0xFF);
+            n >>>= 7;
+          }
+        }
+      }
+    } 
+    buf[count++] = (byte) n;
+  }
+
+  public void writeLong(long n) throws IOException {
+    ensure(10);
+    n = (n << 1) ^ (n >> 63);                     // move sign to low-order bit
+    if ((n & ~0x7FL) != 0) {
+      buf[count++] = (byte)((n | 0x80) & 0xFF);
+      n >>>= 7;
+      if (n > 0x7F) {
+        buf[count++] = (byte)((n | 0x80) & 0xFF);
+        n >>>= 7;
+        if (n > 0x7F) {
+          buf[count++] = (byte)((n | 0x80) & 0xFF);
+          n >>>= 7;
+          if (n > 0x7F) {
+            buf[count++] = (byte)((n | 0x80) & 0xFF);
+            n >>>= 7;
+            if (n > 0x7F) {
+              buf[count++] = (byte)((n | 0x80) & 0xFF);
+              n >>>= 7;
+              if (n > 0x7F) {
+                buf[count++] = (byte)((n | 0x80) & 0xFF);
+                n >>>= 7;
+                if (n > 0x7F) {
+                  buf[count++] = (byte)((n | 0x80) & 0xFF);
+                  n >>>= 7;
+                  if (n > 0x7F) {
+                    buf[count++] = (byte)((n | 0x80) & 0xFF);
+                    n >>>= 7;
+                    if (n > 0x7F) {
+                      buf[count++] = (byte)((n | 0x80) & 0xFF);
+                      n >>>= 7;
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    buf[count++] = (byte) n;
+  }
+  
+  private void ensure(int n) {
+    if (count + n > buf.length)
+      buf = Arrays.copyOf(buf, Math.max(buf.length << 1, count + n));
+  }
+
+  public static int size(Object value, ValueType type) {
+    switch (type) {
+    case NULL:
+      return 0;
+    case INT:
+      return size((Integer)value);
+    case LONG:
+      return size((Long)value);
+    case FIXED32:
+    case FLOAT:
+      return 4;
+    case FIXED64:
+    case DOUBLE:
+      return 8;
+    case STRING:
+      return size((String)value);
+    case BYTES:
+      if (value instanceof ByteBuffer)
+        return size((ByteBuffer)value);
+      return size((byte[])value);
+    default:
+      throw new TrevniRuntimeException("Unknown value type: "+type);
+    }
+  }
+
+  public static int size(int n) {
+    n = (n << 1) ^ (n >> 31);                     // move sign to low-order bit
+    if (n <= (1<<7*1)-1)
+      return 1;
+    if (n <= (1<<7*2)-1)
+      return 2;
+    if (n <= (1<<7*3)-1)
+      return 3;
+    if (n <= (1<<7*4)-1)
+      return 4;
+    return 5;
+  }
+
+  public static int size(long n) {
+    n = (n << 1) ^ (n >> 63);                     // move sign to low-order bit
+    if (n <= (1<<7*1)-1)
+      return 1;
+    if (n <= (1<<7*2)-1)
+      return 2;
+    if (n <= (1<<7*3)-1)
+      return 3;
+    if (n <= (1<<7*4)-1)
+      return 4;
+    if (n <= (1<<7*5)-1)
+      return 5;
+    if (n <= (1<<7*6)-1)
+      return 6;
+    if (n <= (1<<7*7)-1)
+      return 7;
+    if (n <= (1<<7*8)-1)
+      return 8;
+    if (n <= (1<<7*9)-1)
+      return 9;
+    return 10;
+  }
+
+  public static int size(ByteBuffer bytes) {
+    int length = bytes.remaining();
+    return size(length) + length;
+  }
+
+  public static int size(byte[] bytes) {
+    int length = bytes.length;
+    return size(length) + length;
+  }
+
+  public static int size(String string) {
+    int length = utf8Length(string);
+    return size(length) + length;
+  }
+
+  private static int utf8Length(String string) {
+    int stringLength = string.length();
+    int utf8Length = 0;
+    for (int i = 0; i < stringLength; i++) {
+      char c = string.charAt(i);
+      int p = c;                                  // code point
+      if (Character.isHighSurrogate(c)            // surrogate pair
+          && i != stringLength-1
+          && Character.isLowSurrogate(string.charAt(i+1))) {
+        p = string.codePointAt(i);
+        i++;
+      }
+      if (p <= 0x007F) {
+        utf8Length += 1;
+      } else if (p <= 0x07FF) {
+        utf8Length += 2;
+      } else if (p <= 0x0FFFF) {
+        utf8Length += 3;
+      } else if (p <= 0x01FFFFF) {
+        utf8Length += 4;
+      } else if (p <= 0x03FFFFFF) {
+        utf8Length += 5;
+      } else {
+        utf8Length += 6;
+      }
+    }
+    return utf8Length;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/OutputBuffer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import org.xerial.snappy.Snappy;
+
+/** Implements <a href="http://code.google.com/p/snappy/">Snappy</a> codec. */
+final class SnappyCodec extends Codec {
+
+  @Override ByteBuffer compress(ByteBuffer in) throws IOException {
+    ByteBuffer out =
+      ByteBuffer.allocate(Snappy.maxCompressedLength(in.remaining()));
+    int size = Snappy.compress(in.array(), in.position(), in.remaining(),
+                               out.array(), 0);
+    out.limit(size);
+    return out;
+  }
+
+  @Override ByteBuffer decompress(ByteBuffer in) throws IOException {
+    ByteBuffer out = ByteBuffer.allocate
+      (Snappy.uncompressedLength(in.array(),in.position(),in.remaining()));
+    int size = Snappy.uncompress(in.array(),in.position(),in.remaining(),
+                                 out.array(), 0);
+    out.limit(size);
+    return out;
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/SnappyCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.trevni;
+
+/** Base runtime exception thrown by Trevni. */
+public class TrevniRuntimeException extends RuntimeException {
+  public TrevniRuntimeException(Throwable cause) { super(cause); }
+  public TrevniRuntimeException(String message) { super(message); }
+  public TrevniRuntimeException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
+

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/TrevniRuntimeException.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+/** The datatypes that may be stored in a column. */
+public enum ValueType {
+  NULL, INT, LONG, FIXED32, FIXED64, FLOAT, DOUBLE, STRING, BYTES;
+  private String name;
+  private ValueType() { this.name = this.name().toLowerCase(); }
+
+  /** Return the name of this type. */
+  public String getName() { return name; }
+
+  /** Return a type given its name. */
+  public static ValueType forName(String name) {
+    return valueOf(name.toUpperCase());
+  }
+ 
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/ValueType.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,23 @@
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<body>
+A column file format.
+</body>
+</html>

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/main/java/overview.html
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/overview.html?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/overview.html (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/overview.html Tue Sep 11 21:35:56 2012
@@ -0,0 +1,88 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<html>
+<head>
+   <title>Avro</title>
+</head>
+<body>Avro is a data serialization system.
+
+  <h2>Overview</h2>
+
+  <p>Avro provides:
+    <ul>
+      <li>Rich data structures.
+      <li>A compact, fast, binary data format.
+      <li>A container file, to store persistent data.
+      <li>Remote procedure call (RPC).
+      <li>Simple integration with dynamic languages.  Code generation
+      is not required to read or write data files nor to use or
+      implement RPC protocols.  Code generation as an optional
+      optimization, only worth implementing for statically typed
+      languages.
+    </ul>  
+
+  <h2>Schemas</h2>
+
+  <p>Avro relies on <i>{@link org.apache.avro.Schema schemas}</i>.
+  When Avro data is read, the schema used when writing it is always
+  present.  This permits each datum to be written with no per-value
+  overheads, making serialization both fast and small.  This also
+  facilitates use with dynamic, scripting languages, since data,
+  together with its schema, is fully self-describing.
+
+  <p>When Avro data is stored in a {@link
+  org.apache.avro.file.DataFileWriter file}, its schema is stored with
+  it, so that files may be processed later by any program.  If the
+  program reading the data expects a different schema this can be
+  easily resolved, since both schemas are present.
+
+  <p>When Avro is used in {@link org.apache.avro.ipc RPC}, the client
+    and server exchange schemas in the connection handshake.  (This
+    can be optimized so that, for most calls, no schemas are actually
+    transmitted.)  Since both client and server both have the other's
+    full schema, correspondence between same named fields, missing
+    fields, extra fields, etc. can all be easily resolved.
+
+  <p>Avro schemas are defined with
+  with <a href="http://www.json.org/">JSON</a> .  This facilitates
+  implementation in languages that already have JSON libraries.
+
+  <h2>Comparison with other systems</h2>
+
+  Avro provides functionality similar to systems such
+  as <a href="http://incubator.apache.org/thrift/">Thrift</a>,
+  <a href="http://code.google.com/protobuf/">Protocol Buffers</a>,
+  etc.  Avro differs from these systems in the following fundamental
+  aspects.
+  <ul>
+    <li><i>Dynamic typing</i>: Avro does not require that code be
+    generated.  Data is always accompanied by a schema that permits
+    full processing of that data without code generation, static
+    datatypes, etc.  This facilitates construction of generic
+    data-processing systems and languages.
+    <li><i>Untagged data</i>: Since the schema is present when data is
+    read, considerably less type information need be encoded with
+    data, resulting in smaller serialization size.</li>
+    <li><i>No manually-assigned field IDs</i>: When a schema changes,
+    both the old and new schema are always present when processing
+    data, so differences may be resolved symbolically, using field
+    names.
+  </ul>  
+
+</body>
+</html>

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Collection;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(value = Parameterized.class)
+public class TestColumnFile {
+
+  private static final File FILE = new File("target", "test.trv");
+  private static final int COUNT = 1024*64;
+
+  private String codec;
+  private String checksum;
+
+  public TestColumnFile(String codec, String checksum) {
+    this.codec = codec;
+    this.checksum = checksum;
+  }
+
+  @Parameters public static Collection<Object[]> codecs() {
+    Object[][] data = new Object[][] {{"null", "null"},
+                                      {"snappy", "crc32"},
+                                      {"deflate", "crc32"}};
+    return Arrays.asList(data);
+  }
+
+  private ColumnFileMetaData createFileMeta() {
+    return new ColumnFileMetaData()
+      .setCodec(codec)
+      .setChecksum(checksum);
+  }
+
+  @Test public void testEmptyFile() throws Exception {
+    FILE.delete();
+    ColumnFileWriter out = new ColumnFileWriter(createFileMeta());
+    out.writeTo(FILE);
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(0, in.getRowCount());
+    Assert.assertEquals(0, in.getColumnCount());
+    in.close();
+  }
+
+  @Test public void testEmptyColumn() throws Exception {
+    FILE.delete();
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.INT));
+    out.writeTo(FILE);
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(0, in.getRowCount());
+    Assert.assertEquals(1, in.getColumnCount());
+    ColumnValues<Integer> values = in.getValues("test");
+    for (int i : values)
+      throw new Exception("no value should be found");
+    in.close();
+  }
+
+  @Test public void testInts() throws Exception {
+    FILE.delete();
+
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.INT));
+    Random random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      out.writeRow(TestUtil.randomLength(random));
+    out.writeTo(FILE);
+
+    random = TestUtil.createRandom();
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(COUNT, in.getRowCount());
+    Assert.assertEquals(1, in.getColumnCount());
+    Iterator<Integer> i = in.getValues("test");
+    int count = 0;
+    while (i.hasNext()) {
+      Assert.assertEquals(TestUtil.randomLength(random), (int)i.next());
+      count++;
+    }
+    Assert.assertEquals(COUNT, count);
+  }
+
+  @Test public void testLongs() throws Exception {
+    FILE.delete();
+
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.LONG));
+    Random random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      out.writeRow(random.nextLong());
+    out.writeTo(FILE);
+
+    random = TestUtil.createRandom();
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(COUNT, in.getRowCount());
+    Assert.assertEquals(1, in.getColumnCount());
+    Iterator<Long> i = in.getValues("test");
+    int count = 0;
+    while (i.hasNext()) {
+      Assert.assertEquals(random.nextLong(), (long)i.next());
+      count++;
+    }
+    Assert.assertEquals(COUNT, count);
+  }
+
+  @Test public void testStrings() throws Exception {
+    FILE.delete();
+
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.STRING));
+    Random random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      out.writeRow(TestUtil.randomString(random));
+    out.writeTo(FILE);
+
+    random = TestUtil.createRandom();
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(COUNT, in.getRowCount());
+    Assert.assertEquals(1, in.getColumnCount());
+    Iterator<String> i = in.getValues("test");
+    int count = 0;
+    while (i.hasNext()) {
+      Assert.assertEquals(TestUtil.randomString(random), i.next());
+      count++;
+    }
+    Assert.assertEquals(COUNT, count);
+  }
+
+  @Test public void testTwoColumn() throws Exception {
+    FILE.delete();
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("a", ValueType.FIXED32),
+                           new ColumnMetaData("b", ValueType.STRING));
+    Random random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      out.writeRow(random.nextInt(), TestUtil.randomString(random));
+    out.writeTo(FILE);
+
+    random = TestUtil.createRandom();
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    Assert.assertEquals(COUNT, in.getRowCount());
+    Assert.assertEquals(2, in.getColumnCount());
+    Iterator<String> i = in.getValues("a");
+    Iterator<String> j = in.getValues("b");
+    int count = 0;
+    while (i.hasNext() && j.hasNext()) {
+      Assert.assertEquals(random.nextInt(), i.next());
+      Assert.assertEquals(TestUtil.randomString(random), j.next());
+      count++;
+    }
+    Assert.assertEquals(COUNT, count);
+  }
+
+  @Test public void testSeekLongs() throws Exception {
+    FILE.delete();
+
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.LONG));
+    Random random = TestUtil.createRandom();
+
+    int seekCount = COUNT/1024;
+    int[] seekRows = new int[seekCount];
+    Map<Integer,Integer> seekRowMap = new HashMap<Integer,Integer>(seekCount);
+    while (seekRowMap.size() < seekCount) {
+      int row = random.nextInt(COUNT);
+      if (!seekRowMap.containsKey(row)) {
+        seekRows[seekRowMap.size()] = row;
+        seekRowMap.put(row, seekRowMap.size());
+      }
+    }
+
+    Long[] seekValues = new Long[seekCount];
+    for (int i = 0; i < COUNT; i++) {
+      long l = random.nextLong();
+      out.writeRow(l);
+      if (seekRowMap.containsKey(i))
+        seekValues[seekRowMap.get(i)] = l;
+    }
+    out.writeTo(FILE);
+
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    ColumnValues<Long> v = in.getValues("test");
+
+    for (int i = 0; i < seekCount; i++) {
+      v.seek(seekRows[i]);
+      Assert.assertEquals(seekValues[i], v.next());
+    }
+
+  }
+
+  @Test public void testSeekStrings() throws Exception {
+    FILE.delete();
+
+    ColumnFileWriter out =
+      new ColumnFileWriter(createFileMeta(),
+                           new ColumnMetaData("test", ValueType.STRING)
+                           .hasIndexValues(true));
+
+    Random random = TestUtil.createRandom();
+
+    int seekCount = COUNT/1024;
+    Map<Integer,Integer> seekRowMap = new HashMap<Integer,Integer>(seekCount);
+    while (seekRowMap.size() < seekCount) {
+      int row = random.nextInt(COUNT);
+      if (!seekRowMap.containsKey(row))
+        seekRowMap.put(row, seekRowMap.size());
+    }
+
+    String[] values = new String[COUNT];
+    for (int i = 0; i < COUNT; i++)
+      values[i] = TestUtil.randomString(random);
+    Arrays.sort(values);
+
+    String[] seekValues = new String[seekCount];
+    for (int i = 0; i < COUNT; i++) {
+      out.writeRow(values[i]);
+      if (seekRowMap.containsKey(i))
+        seekValues[seekRowMap.get(i)] = values[i];
+    }
+    out.writeTo(FILE);
+
+    ColumnFileReader in = new ColumnFileReader(FILE);
+    ColumnValues<String> v = in.getValues("test");
+
+    for (int i = 0; i < seekCount; i++) {
+      v.seek(seekValues[i]);
+      Assert.assertEquals(seekValues[i], v.next());
+    }
+
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.util.Random;
+
+import java.io.ByteArrayOutputStream;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestIOBuffers {
+
+  private static final int COUNT = 1000;
+
+  @Test public void testEmpty() throws Exception {
+    OutputBuffer out = new OutputBuffer();
+    ByteArrayOutputStream temp = new ByteArrayOutputStream();
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    Assert.assertEquals(0, in.tell());
+    Assert.assertEquals(0, in.length());
+  }
+
+  @Test public void testZero() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    out.writeInt(0);
+    byte[] bytes = out.toByteArray();
+    Assert.assertEquals(1, bytes.length);
+    Assert.assertEquals(0, bytes[0]);
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    Assert.assertEquals(0, in.readInt());
+  }
+
+  @Test public void testInt() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeInt(random.nextInt());
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(random.nextInt(), in.readInt());
+  }
+
+  @Test public void testLong() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeLong(random.nextLong());
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(random.nextLong(), in.readLong());
+  }
+
+  @Test public void testFixed32() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeFixed32(random.nextInt());
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(random.nextInt(), in.readFixed32());
+  }
+
+  @Test public void testFixed64() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeFixed64(random.nextLong());
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(random.nextLong(), in.readFixed64());
+  }
+  
+  @Test public void testFloat() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeFloat(random.nextFloat());
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(random.nextFloat(), in.readFloat(), 0);
+  }
+  
+  @Test public void testDouble() throws Exception {
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeDouble(Double.MIN_VALUE);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(Double.MIN_VALUE, in.readDouble(), 0);
+  }
+  
+  @Test public void testBytes() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeBytes(TestUtil.randomBytes(random));
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(TestUtil.randomBytes(random), in.readBytes(null));
+  }
+
+  @Test public void testString() throws Exception {
+    Random random = TestUtil.createRandom();
+    OutputBuffer out = new OutputBuffer();
+    for (int i = 0; i < COUNT; i++)
+      out.writeString(TestUtil.randomString(random));
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    random = TestUtil.createRandom();
+    for (int i = 0; i < COUNT; i++)
+      Assert.assertEquals(TestUtil.randomString(random), in.readString());
+  }
+  @Test public void testSkipNull() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(null, ValueType.NULL);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.NULL);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipInt() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Integer.MAX_VALUE, ValueType.INT);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.INT);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipLong() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Long.MAX_VALUE, ValueType.LONG);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.LONG);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipFixed32() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Integer.MAX_VALUE, ValueType.FIXED32);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.LONG);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipFixed64() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Long.MAX_VALUE, ValueType.FIXED64);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.LONG);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipFloat() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Float.MAX_VALUE, ValueType.FLOAT);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.FLOAT);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipDouble() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue(Double.MAX_VALUE, ValueType.DOUBLE);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.DOUBLE);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipString() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue("trevni", ValueType.STRING);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.STRING);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+  @Test public void testSkipBytes() throws Exception {
+    long sentinel = Long.MAX_VALUE;
+    OutputBuffer out = new OutputBuffer();
+    out.writeValue("trevni".getBytes(), ValueType.BYTES);
+    out.writeLong(sentinel);
+    
+    InputBuffer in = new InputBuffer(new InputBytes(out.toByteArray()));
+    in.skipValue(ValueType.BYTES);
+    Assert.assertEquals(sentinel, in.readLong());
+  }
+}

Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestIOBuffers.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.Arrays;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestInputBytes {
+
+  private static final int SIZE = 1000;
+  private static final int COUNT = 100;
+
+  @Test public void testRandomReads() throws Exception {
+    Random random = new Random();
+    int length = random.nextInt(SIZE);
+    byte[] data = new byte[length];
+    random.nextBytes(data);
+
+    Input in = new InputBytes(data);
+      
+    for (int i = 0; i < COUNT; i++) {
+      int p = random.nextInt(length);
+      int l = Math.min(random.nextInt(SIZE/10), length-p);
+      byte[] buffer = new byte[l];
+      in.read(p, buffer, 0, l);
+      Assert.assertArrayEquals(Arrays.copyOfRange(data, p, p+l), buffer);
+    }
+  }
+}

Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestInputBytes.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java Tue Sep 11 21:35:56 2012
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.util.Random;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestUtil {
+
+  private static long seed;
+  private static boolean seedSet;
+
+  /** Returns the random seed for this test run.  By default uses the current
+   * time, but a test run can be replicated by specifying the "test.seed"
+   * system property.  The seed is printed the first time it's accessed so that
+   * failures can be replicated if needed. */
+  public static long getRandomSeed() {
+    if (!seedSet) {
+      String configured = System.getProperty("test.seed");
+      if (configured != null)
+        seed = Long.valueOf(configured);
+      else 
+        seed = System.currentTimeMillis();
+      System.out.println("test.seed="+seed);
+      seedSet = true;
+    }
+    return seed;
+ }
+
+  public static Random createRandom() {
+    return new Random(getRandomSeed());
+  }
+
+  public static ByteBuffer randomBytes(Random random) {
+    byte[] bytes = new byte[randomLength(random)];
+    random.nextBytes(bytes);
+    return ByteBuffer.wrap(bytes);
+  }
+
+  public static String randomString(Random random) {
+    int length = randomLength(random);
+    char[] chars = new char[length];
+    for (int i = 0; i < length; i++)
+      chars[i] = (char)('a'+random.nextInt('z'-'a'));
+    return new String(chars);
+  }
+
+  /** Returns [0-15] 15/16 times.
+   * Returns [0-255] 255/256 times.
+   * Returns [0-4095] 4095/4096 times.
+   * Returns [0-65535] every time. */
+  public static int randomLength(Random random) {
+    int n = random.nextInt();
+    if (n < 0) n = -n;
+    return n &
+      ((n & 0xF0000) != 0
+       ? 0xF
+       : ((n & 0xFF0000) != 0
+          ? 0xFF
+          : ((n & 0xFFF0000) != 0
+             ? 0xFFF
+             : 0xFFFF)));
+  }
+
+  @Test public void testRandomLength() {
+    long total = 0;
+    int count = 1024 * 1024;
+    int min = Short.MAX_VALUE;
+    int max = 0;
+    Random r = createRandom();
+    for (int i = 0; i < count; i++) {
+      int length = randomLength(r);
+      if (min > length) min = length;
+      if (max < length) max = length;
+      total += length;
+    }
+    Assert.assertEquals(0, min);
+    Assert.assertTrue(max > 1024 * 32);
+
+    float average = total / (float)count;
+    Assert.assertTrue(average > 16.0f);
+    Assert.assertTrue(average < 64.0f);
+
+  }
+
+}

Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: avro/trunk/lang/java/trevni/doc/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Sep 11 21:35:56 2012
@@ -0,0 +1 @@
+target

Added: avro/trunk/lang/java/trevni/doc/apt/spec.apt
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/apt/spec.apt?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/apt/spec.apt (added)
+++ avro/trunk/lang/java/trevni/doc/apt/spec.apt Tue Sep 11 21:35:56 2012
@@ -0,0 +1,467 @@
+~~ Licensed to the Apache Software Foundation (ASF) under one or more
+~~ contributor license agreements.  See the NOTICE file distributed with
+~~ this work for additional information regarding copyright ownership.
+~~ The ASF licenses this file to You under the Apache License, Version 2.0
+~~ (the "License"); you may not use this file except in compliance with
+~~ the License.  You may obtain a copy of the License at
+~~
+~~     http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License.
+  ---
+  Trevni: A Column File Format
+  ---
+
+Trevni: A Column File Format
+
+  Version 0.1
+
+  DRAFT
+
+  This document is the authoritative specification of a file format.
+  Its intent is to permit compatible, independent implementations that
+  read and/or write files in this format.
+
+Introduction
+
+  Data sets are often described as a <table> composed of <rows> and
+  <columns>.  Each record in the dataset is considered a row, with
+  each field of the record occupying a different column.  Writing
+  records to a file one-by-one as they are created results in a
+  <row-major> format, like Hadoop’s SequenceFile or Avro data files.
+
+  In many cases higher query performance may be achieved if the data
+  is instead organized in a <column-major> format, where multiple
+  values of a given column are stored adjacently.  This document
+  defines such a column-major file format for datasets.
+
+  To permit scalable, distributed query evaluation, datasets are
+  partitioned into row groups, containing distinct collections of
+  rows.  Each row group is organized in column-major order, while row
+  groups form a row-major partitioning of the entire dataset.
+
+Rationale
+
+* Goals
+
+  The format is meant satisfy the following goals:
+
+  [[1]] Maximize the size of row groups.  Disc drives are used most
+  efficiently when sequentially accessing data.  Consider a drive that
+  takes 10ms to seek and transfers at 100MB/second.  If a 10-column
+  dataset whose values are all the same size is split into 10MB row
+  groups, then accessing a single column will require a sequence of
+  seek+1MB reads, for a cost of 20ms/MB processed.  If the same
+  dataset is split into 100MB row groups then this drops to 11ms/MB
+  processed.  This effect is exaggerated for datasets with larger
+  numbers of columns and with columns whose values are smaller than
+  average.  So we’d prefer row groups that are 100MB or greater.
+
+  [[1]] Permit random access within a row group.  Some queries will
+  first examine one column, and, only when certain relatively rare
+  criteria are met, examine other columns.  Rather than iterating
+  through selected columns of the row-group in parallel, one might
+  iterate through one column and randomly access another.  This is
+  called support for WHERE clauses, after the SQL operator of that
+  name.
+
+  [[1]] Minimize the number of files per dataset.  HDFS is a primary
+  intended deployment platform for these files.  The HDFS Namenode
+  requires memory for each file in the filesystem, thus for a format
+  to be HDFS-friendly it should strive to require the minimum number
+  of distinct files.
+
+  [[1]] Support co-location of columns within row-groups.  Row groups
+  are the unit of parallel operation on a column dataset.  For
+  efficient file i/o, the entirety of a row-group should ideally
+  reside on the host that is evaluating the query in order to avoid
+  network latencies and bottlenecks.
+
+  [[1]] Data integrity.  The format should permit applications to
+  detect data corruption.  Many file systems may prevent corruption,
+  but files may be moved between filesystems and be subject to
+  corruption at points in that process.  It is best if the data in a
+  file can be validated independently.
+
+  [[1]] Extensibility.  The format should permit applications to store
+  additional annotations about a datasets in the files, such as type
+  information, origin, etc.  Some environments may have metadata
+  stores for such information, but not all do, and files might be
+  moved among systems with different metadata systems.  The ability to
+  keep such information within the file simplifies the coordination of
+  such information.
+
+  [[1]] Minimal overhead.  The column format should not make datasets
+  appreciably larger.  Storage is a primary cost and a choice to use
+  this format should not require additional storage.
+
+  [[1]] Primary format.  The column format should be usable as a
+  primary format for datasets, not as an auxiliary, accelerated
+  format. Applications that process a dataset in row-major order
+  should be able to easily consume column files and applications that
+  produce datasets in row-major order should be able to easily
+  generate column files.
+
+* Design
+
+  To meet these goals we propose the following design.
+
+  [[1]] Each row group is a separate file.  All values of a column in
+  a file are written contiguously.  This maximizes the row group size,
+  optimizing performance when querying few and small columns.
+
+  [[1]] Each file occupies a single HDFS block.  A larger than normal
+  block size may be specified, e.g., ~1GB instead of the typical
+  ~100MB.  This guarantees co-location and eliminates network use when
+  query processing can be co-located with the file.  This also
+  moderates the memory impact on the HDFS Namenode since no small
+  files are written.
+
+  [[1]] Each column in a file is written as a sequence of ~64kB
+  compressed blocks.  The sequence is prefixed by a table describing
+  all of the blocks in the column to permit random access within the
+  column.
+
+  [[1]] Application-specific metadata may be added at the file,
+  column, and block levels.
+
+  [[1]] Checksums are included with each block, providing data integrity.
+
+* Discussion
+
+  The use of a single block per file achieves the same effect as the
+  custom block placement policy described in the {{CIF}} paper,
+  but while still permitting HDFS rebalancing and not increasing the
+  number of files in the namespace.
+
+Format Specification
+
+  This section formally describes the proposed column file format.
+
+* Data Model
+
+  We assume a simple data model, where a record is a set of named
+  fields, and the value of each field is a sequence of untyped bytes.
+  A type system may be layered on top of this, as specified in the
+  Type Mapping section below.
+
+* Primitive Values
+
+  We define the following primitive value types:
+
+  * Signed 64-bit <<long>> values are written using a variable-length
+zig-zag coding, where the high-order bit in each byte determines
+whether subsequent bytes are present.  For example:
+
+*--------------*------*
+ decimal value | hex bytes
+*--------------*------*
+0              | 00
+*--------------*------*
+-1             | 01
+*--------------*------*
+1              | 02
+*--------------*------*
+...             
+*--------------*------*
+-64            | 7f
+*--------------*------*
+64             | 80 01
+*--------------*------*
+...             
+*--------------*------*
+
+  * <<bytes>> are encoded as a <long> followed by that many bytes of data.
+
+  * a <<string>> is encoded as a <long> followed by that many bytes of
+    UTF-8 encoded character data.
+
+  For example, the three-character string "foo" would be encoded as
+  the <long> value 3 (encoded as hex 06) followed by the UTF-8
+  encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): 06 66 6f 6f
+
+* Type Names
+
+  The following type names are used to describe column values:
+
+  * <<null>>, requires zero bytes.  Sometimes used in array columns.
+
+  * <<int>>, like <long>, but restricted to 32-bit signed values
+
+  * <<long>> 64-bit signed values, represented as above
+
+  * <<fixed32>> 32-bit values stored as four bytes, little-endian.
+
+  * <<fixed64>> 64-bit values stored as eight bytes, little-endian.
+
+  * <<float>> 32-bit IEEE floating point value, little-endian
+
+  * <<double>> 64-bit IEEE floating point value, little-endian
+
+  * <<string>> as above
+
+  * <<bytes>> as above, may be used to encapsulate more complex objects
+
+  []
+
+  Type names are represented as <strings> (UTF-8 encoded, length-prefixed).
+
+* Metadata
+
+  <<Metadata>> consists of:
+
+  * A <long> indicating the number of metadata key/value pairs.
+
+  * For each pair, a <string> key and <bytes> value.
+
+  []
+
+  All metadata properties that start with "trevni." are reserved.
+
+** File Metadata
+
+  The following file metadata properties are defined:
+
+  * <<trevni.codec>> the name of the default compression codec used to
+    compress blocks, as a <string>. Implementations are required to
+    support the "null" codec.  Optional.  If absent, it is assumed to
+    be "null".  Codecs are described in more detail below.
+
+  * <<trevni.checksum>> the name of the checksum algorithm used in this
+    file, as a <string>.  Implementations are required to support the
+    "crc-32” checksum.  Optional.  If absent, it is assumed to be
+    "null".  Checksums are described in more detail below.
+
+  []
+
+** Column Metadata
+
+  The following column metadata properties are defined:
+
+  * <<trevni.codec>> the name of the compression codec used to compress
+    the blocks of this column, as a <string>. Implementations are
+    required to support the "null" codec.  Optional.  If absent, it is
+    assumed to be "null".  Codecs are described in more detail below.
+
+  * <<trevni.name>> the name of the column, as a <string>.  Required.
+
+  * <<trevni.type>> the type of data in the column.  One of the type names
+    above.  Required.
+
+  * <<trevni.values>> if present, indicates that the initial value of each
+    block in this column will be stored in the block’s descriptor.
+    Not permitted for array columns or columns that specify a parent.
+
+  * <<trevni.array>> if present, indicates that each row in this column
+    contains a sequence of values of the named type rather than just a
+    single value.  An integer length precedes each sequence of values
+    indicating the count of values in the sequence.
+
+  * <<trevni.parent>> if present, the name of an <array> column whose
+    lengths are also used by this column.  Thus values of this column
+    are sequences but no lengths are stored in this column.
+
+  []
+
+  For example, consider the following row, as JSON, where all values
+  are primitive types, but one has multiple values.
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"}
+---
+
+  The columns for this might be specified as:
+
+---
+name=id       type=int
+name=date     type=long
+name=from     type=string
+name=to       type=string  array=true
+name=content  type=string 
+---
+
+  If a row contains an array of records, e.g. "received" in the following:
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"
+ "received"=[{"date"=234234234234, "host"="192.168.0.0.1"},
+             {"date"=234234545645, "host"="192.168.0.0.2"}]
+}
+---
+
+  Then one can define a parent column followed by a column for each
+  field in the record, adding the following columns:
+
+---
+name=received  type=null    array=true
+name=date      type=long    parent=received
+name=host      type=string  parent=received
+---
+
+  If an array value itself contains an array, e.g. the "sigs" below:
+
+---
+{"id"=566, "date"=23423234234
+ "from"="foo@bar.com",
+ "to"=["bar@baz.com", "bang@foo.com"],
+ "content"="Hi!"
+ "received"=[{"date"=234234234234, "host"="192.168.0.0.1",
+              "sigs"=[{"algo"="weak", "value"="0af345de"}]},
+             {"date"=234234545645, "host"="192.168.0.0.2",
+              "sigs"=[]}]
+}
+---
+
+  Then a parent column may be defined that itself has a parent column.
+
+---
+name=sigs   type=null    array=true  parent=received
+name=algo   type=string              parent=sigs
+name=value  type=string              parent=sigs
+---
+
+** Block Metadata
+
+  No block metadata properties are currently defined.
+
+* File Format
+
+  A <<file>> consists of:
+
+  * A <file header>, followed by
+
+  * one or more <columns>.
+
+  []
+
+  A <<file header>> consists of:
+
+  * Four bytes, ASCII 'T', 'r', 'v', followed by 1.
+
+  * a <fixed64> indicating the number of rows in the file
+
+  * a <fixed32> indicating the number of columns in the file
+
+  * file <metadata>.
+
+  * for each column, its <column metadata>
+
+  * for each column, its starting position in the file as a <fixed64>.
+
+  []
+
+  A <<column>> consists of:
+
+  * A <fixed32> indicating the number of blocks in this column.
+
+  * For each block, a <block descriptor>
+
+  * One or more <blocks>.
+
+  []
+
+  A <<block descriptor>> consists of:
+
+  * A <fixed32> indicating the number of rows in the block
+
+  * A <fixed32> indicating the size in bytes of the block before the
+    codec is applied (excluding checksum).
+
+  * A <fixed32> indicating the size in bytes of the block after the
+    codec is applied (excluding checksum).
+
+  * If this column’s metadata declares it to include values, the first
+    value in the column, serialized according to this column's type.
+
+  []
+
+  A <<block>> consists of:
+
+  * The serialized column values.  If a column is an array column then
+    value sequences are preceded by their length, as an <int>.  If a
+    codec is specified, the values and lengths are compressed by that
+    codec.
+
+  * The checksum, as determined by the file metadata.
+
+  []
+
+* Codecs
+
+  [null] The "null" codec simply passes data through uncompressed.
+
+  [deflate] The "deflate" codec writes the data block using the
+  deflate algorithm as specified in RFC 1951.
+
+  [snappy] The "snappy" codec uses Google's Snappy compression library.
+
+* Checksum algorithms
+
+  [null] The "null" checksum contains zero bytes.
+
+  [crc-32] Each "crc-32" checksum contains the four bytes of an ISO
+  3309 CRC-32 checksum of the uncompressed block data as a fixed32.
+
+* Type Mappings
+
+  We define a standard mapping for how types defined in various
+  serialization systems are represented in a column file.  Records
+  from these systems are <shredded> into columns.  When records are
+  nested, a depth-first recursive walk can assign a separate column
+  for each primitive value.
+
+** Avro
+
+** Protocol Buffers
+
+** Thrift
+
+Implementation Notes
+
+  Some possible techniques for writing column files include:
+
+  [[1]] Use a standard ~100MB block, buffer in memory up to the block
+  size, then flush the file directly to HDFS.  A single reduce task
+  might create multiple output files.  The namenode requires memory
+  proportional to the number of names and blocks*replication.  This
+  would increase the number of names but not blocks, so this should
+  still be much better than a file per column.
+
+  [[1]] Spill each column to a separate local, temporary file then,
+  when the file is closed, append these files, writing a single file
+  to HDFS whose block size is set to be that of the entire file.  This
+  would be a bit slower than and may have trouble when the local disk
+  is full, but it would better use HDFS namespace and further reduce
+  seeks when processing columns whose values are small.
+
+  [[1]] Use a separate mapreduce job to convert row-major files to
+  column-major.  The map output would output a by (row#, column#,
+  value) tuple, partitioned by row# but sorted by column# then row#.
+  The reducer could directly write the column file.  But the column
+  file format would need to be changed to write counts, descriptors,
+  etc. at the end of files rather than at the front.
+
+  []
+
+  (1) is the simplest to implement and most implementations should
+  start with it.
+
+* References
+
+  {CIF} {{{http://arxiv.org/pdf/1105.4252.pdf}<Column-Oriented Storage
+  Techniques for MapReduce>}}, Floratou, Patel, Shekita, & Tata, VLDB
+  2011.
+
+  {DREMEL} {{{http://research.google.com/pubs/archive/36632.pdf}<Dremel:
+  Interactive Analysis of Web-Scale Datasets>}}, Melnik, Gubarev, Long,
+  Romer, Shivakumar, & Tolton, VLDB 2010.

Propchange: avro/trunk/lang/java/trevni/doc/apt/spec.apt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/doc/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/pom.xml (added)
+++ avro/trunk/lang/java/trevni/doc/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>trevni-java</artifactId>
+    <groupId>org.apache.trevni</groupId>
+    <version>1.7.2-SNAPSHOT</version>
+    <relativePath>..</relativePath>
+  </parent>
+
+  <groupId>org.apache.trevni</groupId>
+  <artifactId>trevni-doc</artifactId>
+  <version>1.7.2-SNAPSHOT</version>
+
+  <name>Trevni Specification</name>
+  <url>http://avro.apache.org/</url>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-site-plugin</artifactId>
+          <version>${maven-site-plugin.version}</version>
+          <configuration>
+            <generateReports>false</generateReports>
+            <siteDirectory>.</siteDirectory>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+
+  </build>
+
+</project>

Propchange: avro/trunk/lang/java/trevni/doc/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/doc/resources/css/site.css
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/resources/css/site.css?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/resources/css/site.css (added)
+++ avro/trunk/lang/java/trevni/doc/resources/css/site.css Tue Sep 11 21:35:56 2012
@@ -0,0 +1,31 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#banner {
+  height: 93px;
+  background: none;
+}
+
+#bannerLeft img {
+  height: 90px;
+  margin-left: 30px;
+  margin-top: 4px;
+}
+
+#bannerRight img {
+  margin: 17px;
+}
+

Propchange: avro/trunk/lang/java/trevni/doc/resources/css/site.css
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/doc/site.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/doc/site.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/doc/site.xml (added)
+++ avro/trunk/lang/java/trevni/doc/site.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project>
+  <skin>
+    <groupId>org.apache.maven.skins</groupId>
+    <artifactId>maven-stylus-skin</artifactId>
+    <version>1.2</version>
+  </skin>
+  <body>
+    <menu name="Trevni">
+      <item name="Spec" href="spec.html" />
+    </menu>
+  </body>
+</project>

Propchange: avro/trunk/lang/java/trevni/doc/site.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: avro/trunk/lang/java/trevni/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/pom.xml?rev=1383626&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/pom.xml (added)
+++ avro/trunk/lang/java/trevni/pom.xml Tue Sep 11 21:35:56 2012
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+  xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>avro-parent</artifactId>
+    <groupId>org.apache.avro</groupId>
+    <version>1.7.2-SNAPSHOT</version>
+    <relativePath>../</relativePath>
+  </parent>
+
+  <artifactId>trevni-java</artifactId>
+  <name>Trevni Java</name>
+  <groupId>org.apache.trevni</groupId>
+  <description>Trevni Java</description>
+  <url>http://avro.apache.org/</url>
+  <packaging>pom</packaging>
+
+  <modules>
+    <module>core</module>
+    <module>avro</module>
+    <module>doc</module>
+  </modules>
+
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <configuration>
+            <failIfNoTests>false</failIfNoTests>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>${compiler-plugin.version}</version>
+          <configuration>
+            <source>1.6</source>
+            <target>1.6</target>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-checkstyle-plugin</artifactId>
+          <version>${checkstyle-plugin.version}</version>
+          <configuration>
+            <consoleOutput>true</consoleOutput>
+            <configLocation>checkstyle.xml</configLocation>
+          </configuration>
+          <executions>
+            <execution>
+              <id>checkstyle-check</id>
+              <phase>test</phase>
+              <goals>
+                <goal>check</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <executions>
+            <execution>
+              <goals>
+                <goal>test-jar</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <profiles>
+  </profiles>
+
+</project>
+

Propchange: avro/trunk/lang/java/trevni/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: avro/trunk/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/pom.xml?rev=1383626&r1=1383625&r2=1383626&view=diff
==============================================================================
--- avro/trunk/pom.xml (original)
+++ avro/trunk/pom.xml Tue Sep 11 21:35:56 2012
@@ -251,6 +251,9 @@
                 <copy todir="${avro.docDir}/java">
                   <fileset dir="lang/java/target/site/apidocs"/>
                 </copy>
+                <copy todir="build/avro-doc-${project.version}/trevni">
+                  <fileset dir="lang/java/trevni/doc/target/site"/>
+                </copy>
               </target>
             </configuration>
           </plugin>



Mime
View raw message