avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r811015 - in /hadoop/avro/trunk: ./ src/c++/api/ src/c++/impl/ src/c++/jsonschemas/ src/c++/parser/ src/c++/test/
Date Thu, 03 Sep 2009 16:22:49 GMT
Author: cutting
Date: Thu Sep  3 16:22:48 2009
New Revision: 811015

URL: http://svn.apache.org/viewvc?rev=811015&view=rev
Log:
AVRO-98.  Fix C++ schema parser to permit out-of-order attribugtes and ignore extra attributes.  Contributed by Scott Banachowski.

Added:
    hadoop/avro/trunk/src/c++/api/CompilerNode.hh
    hadoop/avro/trunk/src/c++/jsonschemas/int
    hadoop/avro/trunk/src/c++/jsonschemas/map
    hadoop/avro/trunk/src/c++/jsonschemas/verboseint
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/c++/api/Compiler.hh
    hadoop/avro/trunk/src/c++/api/NodeConcepts.hh
    hadoop/avro/trunk/src/c++/api/NodeImpl.hh
    hadoop/avro/trunk/src/c++/impl/Compiler.cc
    hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
    hadoop/avro/trunk/src/c++/impl/ValidSchema.cc
    hadoop/avro/trunk/src/c++/jsonschemas/array
    hadoop/avro/trunk/src/c++/jsonschemas/enum
    hadoop/avro/trunk/src/c++/jsonschemas/record
    hadoop/avro/trunk/src/c++/jsonschemas/record2
    hadoop/avro/trunk/src/c++/parser/avro.l
    hadoop/avro/trunk/src/c++/parser/avro.y
    hadoop/avro/trunk/src/c++/test/precompile.cc
    hadoop/avro/trunk/src/c++/test/testparser.cc

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Thu Sep  3 16:22:48 2009
@@ -66,6 +66,9 @@
 
     AVRO-83. In generated Java code, elide unions with null. (cutting)
 
+    AVRO-98. Fix C++ schema parser to permit JSON attributes in any
+    order and to ignore extra attributes. (Scott Banachowski via cutting)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/avro/trunk/src/c++/api/Compiler.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/Compiler.hh?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/Compiler.hh (original)
+++ hadoop/avro/trunk/src/c++/api/Compiler.hh Thu Sep  3 16:22:48 2009
@@ -19,12 +19,12 @@
 #ifndef avro_Compiler_hh__
 #define avro_Compiler_hh__
 
-#include <vector>
+#include <boost/ptr_container/ptr_vector.hpp>
 
 #include <FlexLexer.h>
 #include "Types.hh"
 #include "Node.hh"
-#include "SymbolMap.hh"
+#include "CompilerNode.hh"
 
 namespace avro {
 
@@ -35,12 +35,11 @@
     
 class CompilerContext {
 
+
   public:
 
     CompilerContext(std::istream &is) :
-        lexer_(&is),
-        size_(0),
-        inEnum_(false)
+        lexer_(&is)
     {}
 
     /// Called by the lexer whenever it encounters text that is not a symbol it recognizes
@@ -49,21 +48,23 @@
         text_ = text;
     }
 
-    void addRecord();
-    void addEnum();
-    void addArray();
-    void addMap();
-    void addUnion();
-    void addFixed();
-
-    void endCompound(avro::Type type);
-
-    void addPrimitive(avro::Type type);
-    void addSymbol();
-    void addSize();
+    void addNamedType();
+
+    void startType();
+    void stopType();
+
+    void addType(avro::Type type);
 
-    void addName();
-    void addFieldName();
+    void setSizeAttribute();
+    void setNameAttribute();
+    void setSymbolsAttribute();
+
+    void setFieldsAttribute();
+    void setItemsAttribute();
+    void setValuesAttribute();
+    void setTypesAttribute();
+
+    void textContainsFieldName();
 
     const FlexLexer &lexer() const {
         return lexer_;
@@ -78,18 +79,15 @@
 
   private:
 
+    typedef boost::ptr_vector<CompilerNode> Stack;
+
     void add(const NodePtr &node);
-    void addCompound(const NodePtr &node);
 
     yyFlexLexer lexer_;
     std::string text_;
-    std::string fieldName_;
-    int64_t     size_;
-    bool        inEnum_;
-    SymbolMap   map_;
     
     NodePtr root_;
-    std::vector<NodePtr> stack_;
+    Stack   stack_;
 };
 
 class ValidSchema;

Added: hadoop/avro/trunk/src/c++/api/CompilerNode.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/CompilerNode.hh?rev=811015&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/api/CompilerNode.hh (added)
+++ hadoop/avro/trunk/src/c++/api/CompilerNode.hh Thu Sep  3 16:22:48 2009
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef avro_CompilerNode_hh__
+#define avro_CompilerNode_hh__
+
+#include "NodeConcepts.hh"
+
+namespace avro {
+
+
+/// This is a generic "untyped" node that may store values for all possible
+/// attributes of Avro complex types.  This allows a Node to be assembled by
+/// the compiler, before it knows what attributes the Node actually contains.
+/// All the Avro types defined (see NodeImpl) may be copy constructed from a
+/// CompilerNode, at which time the attributes actually required by the Avro
+/// type are copied from the CompilerNode, and all unused attributes are
+/// dropped.
+
+class CompilerNode
+{
+
+  public:
+
+    enum AttributeType {
+        NONE,
+        FIELDS,
+        VALUES,
+        ITEMS,
+        TYPES
+    };
+
+    CompilerNode() :
+        type_(AVRO_NUM_TYPES),
+        attributeType_(NONE)
+    {}
+
+    CompilerNode(const CompilerNode &rhs) :
+        type_(rhs.type_),
+        attributeType_(rhs.attributeType_)
+    {}
+
+
+    AttributeType attributeType() const {
+        return attributeType_;
+    }
+
+    void setAttributeType(AttributeType attributeType) {
+        attributeType_ = attributeType;
+    }
+
+    Type type() const {
+        return type_;
+    }
+
+    void setType(Type type) {
+        type_ = type;
+    } 
+
+    void addNode(const NodePtr &node) {
+        switch(attributeType_) {
+          case FIELDS:
+            fieldsAttribute_.add(node);
+            break;
+          case VALUES:
+            valuesAttribute_.add(node);
+            break;
+          case ITEMS:
+            itemsAttribute_.add(node);
+            break;
+          case TYPES:
+            typesAttribute_.add(node);
+            break;
+
+          default:
+            throw Exception("Can't add node if the attribute type is not set");
+        }
+    }
+
+
+    // attribute used by records, enums, and fixed:
+    concepts::SingleAttribute<std::string> nameAttribute_;
+
+    // attribute used by fixed:
+    concepts::SingleAttribute<int> sizeAttribute_;
+
+  //private:
+    // attributes used by records:
+    concepts::MultiAttribute<NodePtr>     fieldsAttribute_;
+    concepts::MultiAttribute<std::string> fieldsNamesAttribute_;
+
+    // attribute used by enums:
+    concepts::MultiAttribute<std::string> symbolsAttribute_;
+
+    // attribute used by arrays:
+    concepts::SingleAttribute<NodePtr> itemsAttribute_;
+
+    // attribute used by maps:
+    concepts::SingleAttribute<NodePtr> valuesAttribute_;
+
+    // attribute used by unions:
+    concepts::MultiAttribute<NodePtr> typesAttribute_;
+
+    Type type_;
+    AttributeType attributeType_;
+
+};
+
+} // namespace avro
+
+#endif

Modified: hadoop/avro/trunk/src/c++/api/NodeConcepts.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeConcepts.hh?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeConcepts.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeConcepts.hh Thu Sep  3 16:22:48 2009
@@ -48,98 +48,125 @@
 {
     static const bool hasAttribute = false;
 
-    const Attribute &get() const {
-        static Attribute empty;
-        throw Exception("This type does not have attribute");
-        return empty;
-    }
+    NoAttribute()
+    {}
 
-    void set(const Attribute &value) {
-        throw Exception("This type does not have attribute");
-    }
-};
+    // copy constructing from any attribute type is a no-op
+    // template<typename T>
+    NoAttribute(const NoAttribute<Attribute> &rhs)
+    {}
 
-template <typename Attribute>
-struct HasAttribute
-{
-    static const bool hasAttribute = true;
+    size_t size() const {
+        return 0;
+    }
 
-    const Attribute &get() const {
-        return val_;
+    void add( const Attribute &attr) {
+        throw Exception("This type does not have attribute");
     }
 
-    void set(const Attribute &val) {
-        val_ = val;
+    const Attribute &get(size_t index = 0) const {
+        static const Attribute empty = Attribute();
+        throw Exception("This type does not have attribute");
+        return empty;
     }
 
-  private:
-    Attribute val_;
 };
 
-
-template<typename LeafType>
-struct NoLeafAttributes
+template<typename Attribute>
+struct SingleAttribute
 {
-    static const bool hasAttribute = false;
+    static const bool hasAttribute = true;
 
-    NoLeafAttributes(size_t min, size_t max) 
-    {}
+    SingleAttribute() : attr_(), size_(0)
+    { }
+
+    // copy constructing from another single attribute is allowed
+    SingleAttribute(const SingleAttribute<Attribute> &rhs) : 
+        attr_(rhs.attr_), size_(rhs.size_)
+    { }
+
+    SingleAttribute(const NoAttribute<Attribute> &rhs) : 
+        attr_(), size_(0)
+    { }
+
+    // copy constructing from any other type is a no-op
+    //template<typename T>
+    //SingleAttribute(T&) : attr_(), size_(0)
+    //{}
 
     size_t size() const {
-        return 0;
+        return size_;
     }
 
-    void add( const LeafType &newLeaf) {
-        throw Exception("This type does not have leaf types");
+    void add(const Attribute &attr) {
+        if(size_ == 0) {
+            size_ = 1;
+        }
+        else {
+            throw Exception("SingleAttribute can only be set once");
+        }
+        attr_ = attr;
     }
 
-    const LeafType &at(size_t index) const {
-        static LeafType null;
-        throw Exception("This type does not have leaf types");
-        return null;
+    const Attribute &get(size_t index = 0) const {
+        if(index != 0) {
+            throw Exception("SingleAttribute has only 1 value");
+        }
+        return attr_;
     }
 
-    bool inRange() const {
-        return true;
-    }
+  private:
+
+    template<typename T> friend class MultiAttribute;
+
+    Attribute attr_;
+    int       size_;
 };
 
-template<typename LeafType>
-struct HasLeafAttributes
+template<typename Attribute>
+struct MultiAttribute
 {
     static const bool hasAttribute = true;
 
-    HasLeafAttributes(size_t min, size_t max) :
-        minSize_(min), maxSize_(max)
-    {
-        attrs_.reserve(minSize_);
-    }
+    MultiAttribute() 
+    { }
+
+    // copy constructing from another single attribute is allowed, it
+    // pushes the attribute
+    MultiAttribute(const SingleAttribute<Attribute> &rhs) 
+    { 
+        // since map is the only type that does this we know it's
+        // final size will be two, so reserve 
+        attrs_.reserve(2);
+        attrs_.push_back(rhs.attr_);
+    }
+
+    MultiAttribute(const MultiAttribute<Attribute> &rhs)  :
+        attrs_(rhs.attrs_)
+    { }
+
+    MultiAttribute(const NoAttribute<Attribute> &rhs)
+    {}
 
     size_t size() const {
         return attrs_.size();
     }
 
-    void add(const LeafType &attr) {
-        if(attrs_.size() == maxSize_) {
-            throw Exception("Too many attributes");
-        }
+    void add(const Attribute &attr) {
         attrs_.push_back(attr); 
     }
 
-    const LeafType &at(size_t index) const {
+    const Attribute &get(size_t index = 0) const {
         return attrs_.at(index);
     }
 
-    bool inRange() const {
-        size_t size = attrs_.size();
-        return size >= minSize_ && size <= maxSize_;
+    Attribute &at(size_t index) {
+        return attrs_.at(index);
     }
 
   private:
 
-    std::vector<LeafType> attrs_;
-    const size_t minSize_;
-    const size_t maxSize_;
+    std::vector<Attribute> attrs_;
 };
 
 

Modified: hadoop/avro/trunk/src/c++/api/NodeImpl.hh
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/api/NodeImpl.hh?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/api/NodeImpl.hh (original)
+++ hadoop/avro/trunk/src/c++/api/NodeImpl.hh Thu Sep  3 16:22:48 2009
@@ -22,7 +22,7 @@
 #include <limits>
 
 #include "Node.hh"
-#include "NodeConcepts.hh"
+#include "CompilerNode.hh"
 
 namespace avro {
 
@@ -41,10 +41,22 @@
 
   protected:
 
-    NodeImpl(Type type, size_t minLeaves = 0, size_t maxLeaves = 0) :
+    NodeImpl(Type type) :
         Node(type),
-        leafAttributes_(minLeaves, maxLeaves),
-        leafNamesAttributes_(minLeaves, maxLeaves)
+        leafAttributes_(),
+        leafNameAttributes_()
+    { }
+
+    NodeImpl(Type type, 
+             const NameConcept &name, 
+             const LeavesConcept &leaves, 
+             const LeafNamesConcept &leafNames,
+             const SizeConcept &size) :
+        Node(type),
+        nameAttribute_(name),
+        leafAttributes_(leaves),
+        leafNameAttributes_(leafNames),
+        sizeAttribute_(size)
     { }
 
     bool hasName() const {
@@ -52,7 +64,7 @@
     }
 
     void doSetName(const std::string &name) {
-        nameAttribute_.set(name);
+        nameAttribute_.add(name);
     }
     
     const std::string &name() const {
@@ -68,32 +80,30 @@
     }
 
     const NodePtr &leafAt(int index) const { 
-        return leafAttributes_.at(index);
+        return leafAttributes_.get(index);
     }
 
     void doAddName(const std::string &name) { 
-        leafNamesAttributes_.add(name);
+        leafNameAttributes_.add(name);
     }
 
     size_t names() const {
-        return leafNamesAttributes_.size();
+        return leafNameAttributes_.size();
     }
 
     const std::string &nameAt(int index) const { 
-        return leafNamesAttributes_.at(index);
+        return leafNameAttributes_.get(index);
     }
 
     void doSetFixedSize(int size) {
-        sizeAttribute_.set(size);
+        sizeAttribute_.add(size);
     }
 
     int fixedSize() const {
         return sizeAttribute_.get();
     }
 
-    bool isValid() const {
-        return leafAttributes_.inRange() && leafAttributes_.inRange();
-    }
+    virtual bool isValid() const = 0;
 
     void printBasicInfo(std::ostream &os) const;
 
@@ -101,31 +111,32 @@
    
     NameConcept nameAttribute_;
     LeavesConcept leafAttributes_;
-    LeafNamesConcept leafNamesAttributes_;
+    LeafNamesConcept leafNameAttributes_;
     SizeConcept sizeAttribute_;
 };
 
-typedef concepts::NoAttribute<std::string>  NoName;
-typedef concepts::HasAttribute<std::string> HasName;
-
-typedef concepts::NoLeafAttributes<NodePtr>  NoLeaves;
-typedef concepts::HasLeafAttributes<NodePtr> HasLeaves;
+typedef concepts::NoAttribute<std::string>     NoName;
+typedef concepts::SingleAttribute<std::string> HasName;
 
-typedef concepts::NoLeafAttributes<std::string>  NoLeafNames;
-typedef concepts::HasLeafAttributes<std::string> HasLeafNames;
-
-typedef concepts::NoAttribute<int>  NoSize;
-typedef concepts::HasAttribute<int> HasSize;
-
-typedef NodeImpl< NoName,  NoLeaves,  NoLeafNames,  NoSize > NodeImplPrimitive;
-typedef NodeImpl< HasName, NoLeaves,  NoLeafNames,  NoSize > NodeImplSymbolic;
-
-typedef NodeImpl< HasName, HasLeaves, HasLeafNames, NoSize > NodeImplRecord;
-typedef NodeImpl< HasName, NoLeaves,  HasLeafNames, NoSize > NodeImplEnum;
-typedef NodeImpl< NoName,  HasLeaves, NoLeafNames,  NoSize > NodeImplArray;
-typedef NodeImpl< NoName,  HasLeaves, NoLeafNames,  NoSize > NodeImplMap;
-typedef NodeImpl< NoName,  HasLeaves, NoLeafNames,  NoSize > NodeImplUnion;
-typedef NodeImpl< HasName, NoLeaves,  NoLeafNames,  HasSize > NodeImplFixed;
+typedef concepts::NoAttribute<NodePtr>      NoLeaves;
+typedef concepts::SingleAttribute<NodePtr>  SingleLeaf;
+typedef concepts::MultiAttribute<NodePtr>   MultiLeaves;
+
+typedef concepts::NoAttribute<std::string>     NoLeafNames;
+typedef concepts::MultiAttribute<std::string>  LeafNames;
+
+typedef concepts::NoAttribute<int>     NoSize;
+typedef concepts::SingleAttribute<int> HasSize;
+
+typedef NodeImpl< NoName,  NoLeaves,    NoLeafNames,  NoSize  > NodeImplPrimitive;
+typedef NodeImpl< HasName, NoLeaves,    NoLeafNames,  NoSize  > NodeImplSymbolic;
+
+typedef NodeImpl< HasName, MultiLeaves, LeafNames,    NoSize  > NodeImplRecord;
+typedef NodeImpl< HasName, NoLeaves,    LeafNames,    NoSize  > NodeImplEnum;
+typedef NodeImpl< NoName,  SingleLeaf,  NoLeafNames,  NoSize  > NodeImplArray;
+typedef NodeImpl< NoName,  MultiLeaves, NoLeafNames,  NoSize  > NodeImplMap;
+typedef NodeImpl< NoName,  MultiLeaves, NoLeafNames,  NoSize  > NodeImplUnion;
+typedef NodeImpl< HasName, NoLeaves,    NoLeafNames,  HasSize > NodeImplFixed;
 
 class NodePrimitive : public NodeImplPrimitive
 {
@@ -134,7 +145,22 @@
     NodePrimitive(Type type) :
         NodeImplPrimitive(type)
     { }
+
+    NodePrimitive(const CompilerNode &compilerNode) :
+        NodeImplPrimitive(
+            compilerNode.type(), 
+            NoName(),
+            NoLeaves(), 
+            NoLeafNames(),
+            NoSize()
+        )
+    { }
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return true;
+    }
 };
 
 class NodeSymbolic : public NodeImplSymbolic
@@ -144,8 +170,23 @@
     NodeSymbolic() :
         NodeImplSymbolic(AVRO_SYMBOLIC)
     { }
+
+    NodeSymbolic(const CompilerNode &compilerNode) :
+        NodeImplSymbolic(
+            AVRO_SYMBOLIC, 
+            compilerNode.nameAttribute_,
+            NoLeaves(), 
+            NoLeafNames(),
+            NoSize()
+        )
+    { }
+
     void printJson(std::ostream &os, int depth) const;
 
+    bool isValid() const {
+        return (nameAttribute_.size() == 1);
+    }
+
 };
 
 class NodeRecord : public NodeImplRecord
@@ -153,9 +194,28 @@
   public:
 
     NodeRecord() :
-        NodeImplRecord(AVRO_RECORD, 1, std::numeric_limits<size_t>::max()) 
+        NodeImplRecord(AVRO_RECORD) 
+    { }
+
+    NodeRecord(const CompilerNode &compilerNode) :
+        NodeImplRecord(
+            AVRO_RECORD, 
+            compilerNode.nameAttribute_,
+            compilerNode.fieldsAttribute_, 
+            compilerNode.fieldsNamesAttribute_,
+            NoSize()
+        )
     { }
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (
+                (nameAttribute_.size() == 1) && 
+                (leafAttributes_.size() > 0) &&
+                (leafAttributes_.size() == leafNameAttributes_.size())
+               );
+    }
 };
 
 class NodeEnum : public NodeImplEnum
@@ -163,9 +223,28 @@
   public:
 
     NodeEnum() :
-        NodeImplEnum(AVRO_ENUM, 1, std::numeric_limits<size_t>::max()) 
+        NodeImplEnum(AVRO_ENUM) 
+    { }
+
+    NodeEnum(const CompilerNode &compilerNode) :
+        NodeImplEnum(
+            AVRO_ENUM, 
+            compilerNode.nameAttribute_,
+            NoLeaves(), 
+            compilerNode.symbolsAttribute_,
+            NoSize()
+        )
     { }
+
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (
+                (nameAttribute_.size() == 1) && 
+                (leafNameAttributes_.size() > 1) 
+               );
+    }
 };
 
 class NodeArray : public NodeImplArray
@@ -173,9 +252,25 @@
   public:
 
     NodeArray() :
-        NodeImplArray(AVRO_ARRAY, 1, 1)
+        NodeImplArray(AVRO_ARRAY)
+    { }
+
+    NodeArray(const CompilerNode &compilerNode) :
+        NodeImplArray(
+            AVRO_ARRAY, 
+            NoName(),
+            compilerNode.itemsAttribute_,
+            NoLeafNames(),
+            NoSize()
+        )
     { }
+
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (leafAttributes_.size() == 1);
+    }
 };
 
 class NodeMap : public NodeImplMap
@@ -183,12 +278,34 @@
   public:
 
     NodeMap() :
-        NodeImplMap(AVRO_MAP, 2, 2)
+        NodeImplMap(AVRO_MAP)
     { 
          NodePtr key(new NodePrimitive(AVRO_STRING));
          doAddLeaf(key);
     }
+
+    NodeMap(const CompilerNode &compilerNode) :
+        NodeImplMap(
+            AVRO_MAP, 
+            NoName(),
+            compilerNode.valuesAttribute_, 
+            NoLeafNames(),
+            NoSize()
+        )
+    { 
+        // need to add the key for the map too
+        NodePtr key(new NodePrimitive(AVRO_STRING));
+        doAddLeaf(key);
+
+        // key goes before value
+        std::swap(leafAttributes_.at(0), leafAttributes_.at(1));
+    }
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (leafAttributes_.size() == 2);
+    }
 };
 
 class NodeUnion : public NodeImplUnion
@@ -196,9 +313,25 @@
   public:
 
     NodeUnion() :
-        NodeImplUnion(AVRO_UNION, 2, std::numeric_limits<size_t>::max())
+        NodeImplUnion(AVRO_UNION)
     { }
+
+    NodeUnion(const CompilerNode &compilerNode) :
+        NodeImplUnion(
+            AVRO_UNION, 
+            NoName(),
+            compilerNode.typesAttribute_,
+            NoLeafNames(),
+            NoSize()
+        )
+    { }
+
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (leafAttributes_.size() > 1);
+    }
 };
 
 class NodeFixed : public NodeImplFixed
@@ -208,7 +341,26 @@
     NodeFixed() :
         NodeImplFixed(AVRO_FIXED)
     { }
+
+    NodeFixed(const CompilerNode &compilerNode) :
+        NodeImplFixed(
+            AVRO_FIXED, 
+            compilerNode.nameAttribute_,
+            NoLeaves(), 
+            NoLeafNames(),
+            compilerNode.sizeAttribute_
+        )
+    { }
+
+
     void printJson(std::ostream &os, int depth) const;
+
+    bool isValid() const {
+        return (
+                (nameAttribute_.size() == 1) && 
+                (sizeAttribute_.size() == 1) 
+               );
+    }
 };
 
 template < class A, class B, class C, class D >
@@ -220,7 +372,7 @@
     } 
     NodePtr symbol(new NodeSymbolic);
 
-    NodePtr &node = const_cast<NodePtr &>(leafAttributes_.at(index));
+    NodePtr &node = const_cast<NodePtr &>(leafAttributes_.get(index));
     symbol->setName(node->name());
     node = symbol;
 }
@@ -240,7 +392,7 @@
     int count = leaves();
     count = count ? count : names();
     for(int i= 0; i < count; ++i) {
-        if( leafNamesAttributes_.hasAttribute ) {
+        if( leafNameAttributes_.hasAttribute ) {
             os << "name " << nameAt(i) << '\n';
         }
         if( leafAttributes_.hasAttribute) {
@@ -252,6 +404,8 @@
     }
 }
 
+NodePtr nodeFromCompilerNode(CompilerNode &compilerNode);
+
 } // namespace avro
 
 #endif

Modified: hadoop/avro/trunk/src/c++/impl/Compiler.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/Compiler.cc?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/Compiler.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/Compiler.cc Thu Sep  3 16:22:48 2009
@@ -26,7 +26,7 @@
 
 namespace avro {
 
-//#define DEBUG_VERBOSE
+ #define DEBUG_VERBOSE
 
 int
 compileJsonSchema(std::istream &is, ValidSchema &schema)
@@ -48,147 +48,122 @@
         root_ = node;
     }
     else {
-
-        NodePtr &owner = stack_.back();
-
-        owner->addLeaf(node);
-        if(owner->type() == AVRO_RECORD) {
-            owner->addName(fieldName_);
-        }   
+        stack_.back().addNode(node);
     }   
 }
 
-void 
-CompilerContext::addCompound(const NodePtr &node)
+void
+CompilerContext::startType()
 {
-    add(node);
-    stack_.push_back(node);
+#ifdef DEBUG_VERBOSE
+    std::cerr << "Start type definition\n";
+#endif
+    stack_.push_back(new CompilerNode());
 }
 
 void
-CompilerContext::endCompound(Type type)
+CompilerContext::stopType()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Got end of " << type << '\n';
+    std::cerr << "Stop type " << stack_.back().type() << '\n';
 #endif
+
     assert(!stack_.empty());
+    NodePtr nodePtr(nodeFromCompilerNode(stack_.back()));
     stack_.pop_back();
-    inEnum_ = false;
+    add(nodePtr);
 }
 
 void 
-CompilerContext::addRecord()
-{
+CompilerContext::addType(Type type)
+{    
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding record " << text_ << '\n';
+    std::cerr << "Setting type to " << type << '\n';
 #endif
-    NodePtr node(new NodeRecord());
-    node->setName(text_);
-    addCompound(node);
+    stack_.back().setType(type);
 }
 
 void 
-CompilerContext::addEnum()
+CompilerContext::setSizeAttribute()
 {
+    int size = atol(text_.c_str()); 
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding enum " << text_ << '\n';
+    std::cerr << "Setting size to " << size << '\n';
 #endif
-    NodePtr node(new NodeEnum());
-    node->setName(text_);
-    addCompound(node);
-    inEnum_ = true;
+    stack_.back().sizeAttribute_.add(size);
 }
 
 void 
-CompilerContext::addUnion()
+CompilerContext::addNamedType()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding union\n";
+    std::cerr << "Adding named type " << text_ << '\n';
 #endif
-    NodePtr node(new NodeUnion());
-    addCompound(node);
+    stack_.back().setType(AVRO_SYMBOLIC);
+    stack_.back().nameAttribute_.add(text_);
 }
 
 void 
-CompilerContext::addMap()
+CompilerContext::setNameAttribute()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding map\n";
+    std::cerr << "Setting name to " << text_ << '\n';
 #endif
-    NodePtr node(new NodeMap());
-    addCompound(node);
+    stack_.back().nameAttribute_.add(text_);
 }
 
 void 
-CompilerContext::addArray()
+CompilerContext::setSymbolsAttribute()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding array\n";
+    std::cerr << "Adding enum symbol " << text_ << '\n';
 #endif
-    NodePtr node(new NodeArray());
-    addCompound(node);
+    stack_.back().symbolsAttribute_.add(text_);
 }
 
 void 
-CompilerContext::addFixed()
+CompilerContext::setValuesAttribute()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding fixed " << text_ << '\n';
+    std::cerr << "Ready for map type\n";
 #endif
-    NodePtr node(new NodeFixed());
-    node->setName(text_);
-    node->setFixedSize(size_);
-    add(node);
-} 
-
-void 
-CompilerContext::addPrimitive(Type type)
-{    
-#ifdef DEBUG_VERBOSE
-    std::cout << "Adding " << type << '\n';
-#endif
-    NodePtr node(new NodePrimitive(type));
-    add(node);
+    stack_.back().setAttributeType(CompilerNode::VALUES);
 }
 
 void 
-CompilerContext::addSize()
+CompilerContext::setTypesAttribute()
 {
-    size_ = atol(text_.c_str()); 
 #ifdef DEBUG_VERBOSE
-    std::cout << "Got size " << size_ << '\n';
+    std::cerr << "Ready for union types\n";
 #endif
+    stack_.back().setAttributeType(CompilerNode::TYPES);
 }
 
 void 
-CompilerContext::addSymbol()
+CompilerContext::setItemsAttribute()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Adding symbol " << text_ << '\n';
+    std::cerr << "Ready for array type\n";
 #endif
-    NodePtr node(new NodeSymbolic());
-    node->setName(text_);
-    add(node);
+    stack_.back().setAttributeType(CompilerNode::ITEMS);
 }
 
 void 
-CompilerContext::addName()
+CompilerContext::setFieldsAttribute()
 {
-    if(inEnum_) {
 #ifdef DEBUG_VERBOSE
-        std::cout << "Got enum symbol " << text_ << '\n';
+    std::cerr << "Ready for record fields\n";
 #endif
-        stack_.back()->addName(text_);
-    }
+    stack_.back().setAttributeType(CompilerNode::FIELDS);
 }
 
 void 
-CompilerContext::addFieldName()
+CompilerContext::textContainsFieldName()
 {
 #ifdef DEBUG_VERBOSE
-    std::cout << "Got field name " << text_ << '\n';
+    std::cerr << "Setting field name to " << text_ << '\n';
 #endif
-    fieldName_ = text_;
+    stack_.back().fieldsNamesAttribute_.add(text_);
 }
 
 } // namespace avro

Modified: hadoop/avro/trunk/src/c++/impl/NodeImpl.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/NodeImpl.cc?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/NodeImpl.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/NodeImpl.cc Thu Sep  3 16:22:48 2009
@@ -42,18 +42,7 @@
 void 
 NodePrimitive::printJson(std::ostream &os, int depth) const
 {
-    // printing long form is optional
-    /*
-    if(depth == 0) {
-        os << "{\n";
-        os << indent(depth+1) << "\"type\": " << '"' << type() << '"';
-        os << indent(depth) << "\n}";
-    }
-    else {
-        os << type();
-    }
-    */
-    os << '"' << type() << '"';
+    os << '\"' << type() << '\"';
 }
 
 void 
@@ -79,9 +68,9 @@
             os << indent(depth) << "},\n";
         }
         os << indent(depth) << "{\n";
-        os << indent(++depth) << "\"name\": \"" << leafNamesAttributes_.at(i) << "\",\n";
+        os << indent(++depth) << "\"name\": \"" << leafNameAttributes_.get(i) << "\",\n";
         os << indent(depth) << "\"type\": ";
-        leafAttributes_.at(i)->printJson(os, depth);
+        leafAttributes_.get(i)->printJson(os, depth);
         os << '\n';
         --depth;
     }
@@ -100,13 +89,13 @@
     }
     os << indent(depth) << "\"symbols\": [\n";
 
-    int names = leafNamesAttributes_.size();
+    int names = leafNameAttributes_.size();
     ++depth;
     for(int i = 0; i < names; ++i) {
         if(i > 0) {
             os << ",\n";
         }
-        os << indent(depth) << '\"' << leafNamesAttributes_.at(i) << '\"';
+        os << indent(depth) << '\"' << leafNameAttributes_.get(i) << '\"';
     }
     os << '\n';
     os << indent(--depth) << "]\n";
@@ -119,7 +108,7 @@
     os << "{\n";
     os << indent(depth+1) << "\"type\": \"array\",\n";
     os << indent(depth+1) <<  "\"items\": ";
-    leafAttributes_.at(0)->printJson(os, depth);
+    leafAttributes_.get()->printJson(os, depth);
     os << '\n';
     os << indent(depth) << '}';
 }
@@ -130,7 +119,7 @@
     os << "{\n";
     os << indent(depth+1) <<"\"type\": \"map\",\n";
     os << indent(depth+1) << "\"values\": ";
-    leafAttributes_.at(1)->printJson(os, depth);
+    leafAttributes_.get(1)->printJson(os, depth);
     os << '\n';
     os << indent(depth) << '}';
 }
@@ -146,7 +135,7 @@
             os << ",\n";
         }
         os << indent(depth);
-        leafAttributes_.at(i)->printJson(os, depth);
+        leafAttributes_.get(i)->printJson(os, depth);
     }
     os << '\n';
     os << indent(--depth) << ']';
@@ -162,4 +151,52 @@
     os << indent(--depth) << '}';
 }
 
+NodePtr
+nodeFromCompilerNode(CompilerNode &node)
+{
+    NodePtr ptr;
+
+    switch(node.type()) {
+
+      case AVRO_ARRAY:
+        ptr = ( new NodeArray(node));
+        break;
+    
+      case AVRO_ENUM:
+        ptr = ( new NodeEnum(node));
+        break;
+
+      case AVRO_FIXED:
+        ptr = ( new NodeFixed(node));
+        break;
+    
+      case AVRO_MAP:
+        ptr = ( new NodeMap(node));
+        break;
+
+      case AVRO_RECORD:
+        ptr = ( new NodeRecord(node));
+        break;
+    
+      case AVRO_UNION:
+        ptr = ( new NodeUnion(node));
+        break;
+    
+      case AVRO_SYMBOLIC:
+        ptr = ( new NodeSymbolic(node));
+        break;
+    
+      default:
+        if(isPrimitive(node.type())) {
+            ptr = ( new NodePrimitive(node.type()));        
+        }
+        else {
+            throw Exception("Unknown type in nodeFromCompilerNode");
+        }
+        break;
+    }
+
+    return ptr;
+}
+
 } // namespace avro

Modified: hadoop/avro/trunk/src/c++/impl/ValidSchema.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/impl/ValidSchema.cc?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/impl/ValidSchema.cc (original)
+++ hadoop/avro/trunk/src/c++/impl/ValidSchema.cc Thu Sep  3 16:22:48 2009
@@ -50,7 +50,7 @@
     }
 
     if(!node->isValid()) {
-        throw Exception("Schema is invalid");
+        throw Exception( boost::format("Schema is invalid, due to bad node of type %1%") % node->type());
     }
     if(node->hasName()) {
         if(node->type() == AVRO_SYMBOLIC) {

Modified: hadoop/avro/trunk/src/c++/jsonschemas/array
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/array?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/array (original)
+++ hadoop/avro/trunk/src/c++/jsonschemas/array Thu Sep  3 16:22:48 2009
@@ -1 +1 @@
-{ "type" : "array", "items" : "int" }
+{ "type" : "array", "items" : "int" , "name":"test" }

Modified: hadoop/avro/trunk/src/c++/jsonschemas/enum
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/enum?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/enum (original)
+++ hadoop/avro/trunk/src/c++/jsonschemas/enum Thu Sep  3 16:22:48 2009
@@ -1,10 +1,10 @@
         {
             "type": "enum",
-            "name": "myenum",
             "symbols": [
                 "zero",
                 "int",
                 "two",
                 "three"
-            ]
+            ],
+            "name": "myenum"
         }

Added: hadoop/avro/trunk/src/c++/jsonschemas/int
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/int?rev=811015&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/int (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/int Thu Sep  3 16:22:48 2009
@@ -0,0 +1 @@
+"int"

Added: hadoop/avro/trunk/src/c++/jsonschemas/map
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/map?rev=811015&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/map (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/map Thu Sep  3 16:22:48 2009
@@ -0,0 +1,5 @@
+{
+    "type": "map",
+    "values": {"type":"int"},
+    "name": "noname"
+}

Modified: hadoop/avro/trunk/src/c++/jsonschemas/record
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/record?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/record (original)
+++ hadoop/avro/trunk/src/c++/jsonschemas/record Thu Sep  3 16:22:48 2009
@@ -3,7 +3,7 @@
   "name": "LongList",
   "fields" : [
     {"name": "value", "type": "long"},           
-    {"name": "next", "type": "int" }
+    {"type": "int", "name": "next", "metadata" : "two"}
   ]
 }
 

Modified: hadoop/avro/trunk/src/c++/jsonschemas/record2
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/record2?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/record2 (original)
+++ hadoop/avro/trunk/src/c++/jsonschemas/record2 Thu Sep  3 16:22:48 2009
@@ -3,7 +3,7 @@
   "name": "LongList",
   "fields" : [
     {"name": "value", "type": "long"},           
-    {"name": "next", "type": ["int"] },
+    {"name": "next", "type": ["int", "float"] },
     {"name": "hello", "type": {"type" : "array" , "items" :"float"}}
   ]
 }

Added: hadoop/avro/trunk/src/c++/jsonschemas/verboseint
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/jsonschemas/verboseint?rev=811015&view=auto
==============================================================================
--- hadoop/avro/trunk/src/c++/jsonschemas/verboseint (added)
+++ hadoop/avro/trunk/src/c++/jsonschemas/verboseint Thu Sep  3 16:22:48 2009
@@ -0,0 +1 @@
+{ "type": "int", "metadata1" : "ju\"nk", metadata2: 123, metadata3 : {"ju{nk"}, "name":"hello", metadata4: ["he]ll}o"]}

Modified: hadoop/avro/trunk/src/c++/parser/avro.l
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/parser/avro.l?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/parser/avro.l (original)
+++ hadoop/avro/trunk/src/c++/parser/avro.l Thu Sep  3 16:22:48 2009
@@ -40,7 +40,7 @@
 {
     avro::CompilerContext *c = static_cast<avro::CompilerContext *>(ctx);
     int ret = c->lexer().yylex();
-    if(ret == AVRO_LEX_NAME || ret == AVRO_LEX_SYMBOL || ret == AVRO_LEX_SIZE) {
+    if( ret > AVRO_LEX_OUTPUT_TEXT_BEGIN && ret < AVRO_LEX_OUTPUT_TEXT_END ) { 
         c->setText( c->lexer().YYText()) ;
     }
     return ret;
@@ -50,79 +50,124 @@
 
 %x READTYPE
 %x STARTTYPE
+%x STARTSCHEMA
 %x READNAME
+%x READFIELD
 %x READFIELDS
+%x READFIELDNAME
 %x READSYMBOLS
-%s READSIZE
+%x READSYMBOL
+%x READSIZE
 %x INUNION
 %x INOBJECT
+%x READMETADATA
+%x SKIPJSONSTRING
+%x SKIPJSONARRAY
+%x SKIPJSONOBJECT
 
-ws [ \t\n]
+ws [ \t\r\n]
+nonws [^ \t\r\n]
 delim {ws}*:{ws}*
 avrotext [a-zA-Z_][a-zA-Z0-9_]*
 startunion \[
 startobject \{
 integer [0-9]+
+anytext .*
 
 %%
-<READTYPE>int                  return AVRO_LEX_INT;
-<READTYPE>long                 return AVRO_LEX_LONG;
-<READTYPE>null                 return AVRO_LEX_NULL;
-<READTYPE>boolean              return AVRO_LEX_BOOL;
-<READTYPE>float                return AVRO_LEX_FLOAT;
-<READTYPE>double               return AVRO_LEX_DOUBLE;
-<READTYPE>string               return AVRO_LEX_STRING;
-<READTYPE>bytes                return AVRO_LEX_BYTES;
-<READTYPE>record               return AVRO_LEX_RECORD;
-<READTYPE>enum                 return AVRO_LEX_ENUM;
-<READTYPE>map                  return AVRO_LEX_MAP;
-<READTYPE>array                return AVRO_LEX_ARRAY;
-<READTYPE>fixed                return AVRO_LEX_FIXED;
-<READTYPE>{avrotext}           return AVRO_LEX_SYMBOL;
-<READTYPE>\"                   yy_pop_state(); 
-
-
-<READNAME>{avrotext}           return AVRO_LEX_NAME;
-<READNAME>\"                   yy_pop_state();
-
-<READFIELDS>\"type\"{delim}    yy_push_state(STARTTYPE); 
-<READFIELDS>\"name\"{delim}\"  yy_push_state( READNAME); 
-<READFIELDS>\]                 yy_pop_state(); 
-<READFIELDS>[,\{\}]            return yytext[0];
-<READFIELDS>{ws}               ;
+<READTYPE>int                   return AVRO_LEX_INT;
+<READTYPE>long                  return AVRO_LEX_LONG;
+<READTYPE>null                  return AVRO_LEX_NULL;
+<READTYPE>boolean               return AVRO_LEX_BOOL;
+<READTYPE>float                 return AVRO_LEX_FLOAT;
+<READTYPE>double                return AVRO_LEX_DOUBLE;
+<READTYPE>string                return AVRO_LEX_STRING;
+<READTYPE>bytes                 return AVRO_LEX_BYTES;
+<READTYPE>record                return AVRO_LEX_RECORD;
+<READTYPE>enum                  return AVRO_LEX_ENUM;
+<READTYPE>map                   return AVRO_LEX_MAP;
+<READTYPE>array                 return AVRO_LEX_ARRAY;
+<READTYPE>fixed                 return AVRO_LEX_FIXED;
+<READTYPE>{avrotext}            return AVRO_LEX_NAMED_TYPE;
+<READTYPE>\"                    yy_pop_state(); 
+
+<READNAME>{avrotext}            return AVRO_LEX_NAME;
+<READNAME>\"                    yy_pop_state();
+
+<READSYMBOL>{avrotext}          return AVRO_LEX_SYMBOL;
+<READSYMBOL>\"                  yy_pop_state();
+
+<READFIELDNAME>{avrotext}       return AVRO_LEX_FIELD_NAME;
+<READFIELDNAME>\"               yy_pop_state();
+
+<READFIELD>\"type\"{delim}      yy_push_state(STARTSCHEMA); 
+<READFIELD>\"name\"{delim}\"    yy_push_state(READFIELDNAME); 
+<READFIELD>\}                   yy_pop_state(); return AVRO_LEX_FIELD_END;
+<READFIELD>,                    return yytext[0];
+<READFIELD>{nonws}+{delim}      yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<READFIELD>{ws}                 ;
+
+<READFIELDS>\{                  yy_push_state(READFIELD); return AVRO_LEX_FIELD;
+<READFIELDS>\]                  yy_pop_state(); return AVRO_LEX_FIELDS_END;
+<READFIELDS>,                   return yytext[0];
+<READFIELDS>{ws}                ;
 
-<READSYMBOLS>\"                 yy_push_state(READNAME); 
+<READSYMBOLS>\"                 yy_push_state(READSYMBOL); 
 <READSYMBOLS>,                  return yytext[0];
-<READSYMBOLS>\]                 yy_pop_state();
+<READSYMBOLS>\]                 yy_pop_state(); return AVRO_LEX_SYMBOLS_END;
 <READSYMBOLS>{ws}               ;
 
 <READSIZE>{integer}             yy_pop_state(); return AVRO_LEX_SIZE;
 
-<INUNION>\"                 yy_push_state(READTYPE); 
-<INUNION>{startobject}      yy_push_state( INOBJECT); return yytext[0];
-<INUNION>\]                 yy_pop_state(); return yytext[0];
-<INUNION>,                  return yytext[0];
-<INUNION>{ws}               ;
-
-<INOBJECT>\"type\"{delim}      yy_push_state(STARTTYPE); return AVRO_LEX_TYPE;
-<INOBJECT>\"name\"{delim}\"    yy_push_state(READNAME); 
-<INOBJECT>\"size\"{delim}      yy_push_state(READSIZE);
-<INOBJECT>\"items\"{delim}     yy_push_state(STARTTYPE); return AVRO_LEX_ITEMS;
-<INOBJECT>\"values\"{delim}    yy_push_state(STARTTYPE); return AVRO_LEX_VALUES;
-<INOBJECT>\"fields\"{delim}\[  yy_push_state( READFIELDS); return AVRO_LEX_FIELDS; 
-<INOBJECT>\"symbols\"{delim}\[ yy_push_state( READSYMBOLS); 
-<INOBJECT>,                    return yytext[0];
-<INOBJECT>\}                   yy_pop_state(); return yytext[0];
-<INOBJECT>{ws}                 ;
-
-<STARTTYPE>\"                  yy_pop_state(); yy_push_state(READTYPE); 
-<STARTTYPE>{startunion}        yy_pop_state(); yy_push_state(INUNION); return yytext[0];
-<STARTTYPE>{startobject}       yy_pop_state(); yy_push_state(INOBJECT); return yytext[0];
-
-{startobject}                  yy_push_state( INOBJECT); return yytext[0];
-{startunion}                   yy_push_state( INUNION); return yytext[0];
-\"                             yy_push_state( READTYPE);
-{ws}                           ;
+<INUNION>\"                     yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE;
+<INUNION>{startobject}          yy_push_state(INOBJECT); return yytext[0];
+<INUNION>\]                     yy_pop_state(); return yytext[0];
+<INUNION>,                      return yytext[0];
+<INUNION>{ws}                   ;
+
+<SKIPJSONSTRING>\"              yy_pop_state();
+<SKIPJSONSTRING>\\.             ;
+<SKIPJSONSTRING>[^\"\\]+        ;
+
+<SKIPJSONOBJECT>\}              yy_pop_state();
+<SKIPJSONOBJECT>\{              yy_push_state(SKIPJSONOBJECT);
+<SKIPJSONOBJECT>\"              yy_push_state(SKIPJSONSTRING);
+<SKIPJSONOBJECT>[^\{\}\"]+      ;
+
+<SKIPJSONARRAY>\]               yy_pop_state();
+<SKIPJSONARRAY>\[               yy_push_state(SKIPJSONARRAY);
+<SKIPJSONARRAY>\"               yy_push_state(SKIPJSONSTRING);
+<SKIPJSONARRAY>[^\[\]\"]+       ;  
+
+<READMETADATA>\"                yy_pop_state(); yy_push_state(SKIPJSONSTRING);
+<READMETADATA>\{                yy_pop_state(); yy_push_state(SKIPJSONOBJECT);
+<READMETADATA>\[                yy_pop_state(); yy_push_state(SKIPJSONARRAY);
+<READMETADATA>[^\"\{\[,\}]+     yy_pop_state();
+
+<INOBJECT>\"type\"{delim}       yy_push_state(STARTTYPE); return AVRO_LEX_TYPE;
+<INOBJECT>\"name\"{delim}\"     yy_push_state(READNAME); 
+<INOBJECT>\"size\"{delim}       yy_push_state(READSIZE);
+<INOBJECT>\"items\"{delim}      yy_push_state(STARTSCHEMA); return AVRO_LEX_ITEMS;
+<INOBJECT>\"values\"{delim}     yy_push_state(STARTSCHEMA); return AVRO_LEX_VALUES;
+<INOBJECT>\"fields\"{delim}\[   yy_push_state(READFIELDS); return AVRO_LEX_FIELDS; 
+<INOBJECT>\"symbols\"{delim}\[  yy_push_state(READSYMBOLS); return AVRO_LEX_SYMBOLS;
+<INOBJECT>,                     return yytext[0];
+<INOBJECT>\}                    yy_pop_state(); return yytext[0];
+<INOBJECT>{nonws}+{delim}       yy_push_state(READMETADATA); return AVRO_LEX_METADATA;
+<INOBJECT>{ws}                  ;
+
+<STARTTYPE>\"                   yy_pop_state(); yy_push_state(READTYPE); 
+<STARTTYPE>{startunion}         yy_pop_state(); yy_push_state(INUNION); return yytext[0];
+<STARTTYPE>{startobject}        yy_pop_state(); yy_push_state(INOBJECT); return yytext[0];
+
+<STARTSCHEMA>\"                 yy_pop_state(); yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE;
+<STARTSCHEMA>{startunion}       yy_pop_state(); yy_push_state(INUNION); return yytext[0];
+<STARTSCHEMA>{startobject}      yy_pop_state(); yy_push_state(INOBJECT); return yytext[0];
+
+{startobject}                   yy_push_state(INOBJECT); return yytext[0];
+{startunion}                    yy_push_state(INUNION); return yytext[0];
+\"                              yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE;
+{ws}                            ;
 
 %%
 

Modified: hadoop/avro/trunk/src/c++/parser/avro.y
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/parser/avro.y?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/parser/avro.y (original)
+++ hadoop/avro/trunk/src/c++/parser/avro.y Thu Sep  3 16:22:48 2009
@@ -42,125 +42,160 @@
 %pure-parser
 %error-verbose
 
-%token AVRO_LEX_INT AVRO_LEX_LONG AVRO_LEX_FLOAT AVRO_LEX_DOUBLE
-%token AVRO_LEX_BOOL AVRO_LEX_NULL AVRO_LEX_BYTES AVRO_LEX_STRING
+%token AVRO_LEX_INT AVRO_LEX_LONG 
+%token AVRO_LEX_FLOAT AVRO_LEX_DOUBLE
+%token AVRO_LEX_BOOL AVRO_LEX_NULL 
+%token AVRO_LEX_BYTES AVRO_LEX_STRING 
 %token AVRO_LEX_RECORD AVRO_LEX_ENUM AVRO_LEX_ARRAY AVRO_LEX_MAP AVRO_LEX_UNION AVRO_LEX_FIXED
-%token AVRO_LEX_SYMBOL AVRO_LEX_SIZE
-%token AVRO_LEX_TYPE AVRO_LEX_ITEMS AVRO_LEX_NAME AVRO_LEX_VALUES AVRO_LEX_FIELDS 
 
-%%
+%token AVRO_LEX_METADATA
 
-avroschema: 
-        primitive | avroobject | union_t
-        ;
+%token AVRO_LEX_SYMBOLS AVRO_LEX_SYMBOLS_END
+%token AVRO_LEX_FIELDS AVRO_LEX_FIELDS_END AVRO_LEX_FIELD AVRO_LEX_FIELD_END
 
-avroobject:
-        primitiveobject | record_t | array_t | map_t | enum_t | fixed_t
-        ;
+%token AVRO_LEX_TYPE AVRO_LEX_ITEMS AVRO_LEX_VALUES 
 
-primitiveobject:
-        '{' AVRO_LEX_TYPE primitive '}'
-        ;
+// Tokens that output text:
+%token AVRO_LEX_OUTPUT_TEXT_BEGIN
+%token AVRO_LEX_NAME
+%token AVRO_LEX_NAMED_TYPE
+%token AVRO_LEX_FIELD_NAME
+%token AVRO_LEX_SYMBOL
+%token AVRO_LEX_SIZE
+%token AVRO_LEX_OUTPUT_TEXT_END
 
+%token AVRO_LEX_SIMPLE_TYPE
+
+%%
+
+avroschema: 
+        simpleprimitive | object | union_t
+        ;
+ 
 primitive:
-        AVRO_LEX_INT    { context(ctx).addPrimitive(avro::AVRO_INT); }
+        AVRO_LEX_INT    { context(ctx).addType(avro::AVRO_INT); }
         |
-        AVRO_LEX_LONG   { context(ctx).addPrimitive(avro::AVRO_LONG); }
+        AVRO_LEX_LONG   { context(ctx).addType(avro::AVRO_LONG); }
         |
-        AVRO_LEX_FLOAT  { context(ctx).addPrimitive(avro::AVRO_FLOAT); }
+        AVRO_LEX_FLOAT  { context(ctx).addType(avro::AVRO_FLOAT); }
         |
-        AVRO_LEX_DOUBLE { context(ctx).addPrimitive(avro::AVRO_DOUBLE); }
+        AVRO_LEX_DOUBLE { context(ctx).addType(avro::AVRO_DOUBLE); }
         |
-        AVRO_LEX_BOOL   { context(ctx).addPrimitive(avro::AVRO_BOOL); }
+        AVRO_LEX_BOOL   { context(ctx).addType(avro::AVRO_BOOL); }
         |
-        AVRO_LEX_NULL   { context(ctx).addPrimitive(avro::AVRO_NULL); }
+        AVRO_LEX_NULL   { context(ctx).addType(avro::AVRO_NULL); }
         |
-        AVRO_LEX_BYTES  { context(ctx).addPrimitive(avro::AVRO_BYTES); }
+        AVRO_LEX_BYTES  { context(ctx).addType(avro::AVRO_BYTES); }
         |
-        AVRO_LEX_STRING { context(ctx).addPrimitive(avro::AVRO_STRING); }
+        AVRO_LEX_STRING { context(ctx).addType(avro::AVRO_STRING); }
         |
-        AVRO_LEX_SYMBOL { context(ctx).addSymbol(); }
+        AVRO_LEX_NAMED_TYPE { context(ctx).addNamedType(); }
         ;
 
-recordtag: 
-        AVRO_LEX_TYPE AVRO_LEX_RECORD 
+simpleprimitive:
+        AVRO_LEX_SIMPLE_TYPE { context(ctx).startType(); } primitive { context(ctx).stopType(); }
         ;
 
-enumtag: 
-        AVRO_LEX_TYPE AVRO_LEX_ENUM 
+primitive_t:
+        AVRO_LEX_TYPE primitive
         ;
 
-arraytag:
-        AVRO_LEX_TYPE AVRO_LEX_ARRAY
-        { context(ctx).addArray(); }
+array_t:
+        AVRO_LEX_TYPE AVRO_LEX_ARRAY { context(ctx).addType(avro::AVRO_ARRAY); }
         ;
 
-maptag:
-        AVRO_LEX_TYPE AVRO_LEX_MAP
-        { context(ctx).addMap(); }
+enum_t: 
+        AVRO_LEX_TYPE AVRO_LEX_ENUM { context(ctx).addType(avro::AVRO_ENUM); }
         ;
 
-fixedtag:
-        AVRO_LEX_TYPE AVRO_LEX_FIXED
+fixed_t:
+        AVRO_LEX_TYPE AVRO_LEX_FIXED { context(ctx).addType(avro::AVRO_FIXED); }
         ;
 
-record_t:
-        '{' recordtag ',' name { context(ctx).addRecord() } ',' AVRO_LEX_FIELDS fieldlist '}'
-        { context(ctx).endCompound(avro::AVRO_RECORD); }
+map_t: 
+        AVRO_LEX_TYPE AVRO_LEX_MAP { context(ctx).addType(avro::AVRO_MAP); }
         ;
 
-enum_t:
-       '{'  enumtag ',' name { context(ctx).addEnum() } ',' namelist '}'
-        { context(ctx).endCompound(avro::AVRO_ENUM); }
+record_t: 
+        AVRO_LEX_TYPE AVRO_LEX_RECORD { context(ctx).addType(avro::AVRO_RECORD); }
         ;
 
-array_t: 
-       '{'  arraytag ',' AVRO_LEX_ITEMS avroschema '}'
-        { context(ctx).endCompound(avro::AVRO_ARRAY); }
+type_attribute:
+        array_t | enum_t | fixed_t | map_t | record_t | primitive_t
         ;
 
-map_t: 
-        '{' maptag ',' AVRO_LEX_VALUES avroschema '}'
-        { context(ctx).endCompound(avro::AVRO_MAP); }
+union_t:
+        '[' { context(ctx).startType(); context(ctx).addType(avro::AVRO_UNION); context(ctx).setTypesAttribute(); } 
+        unionlist
+        ']' { context(ctx).stopType(); }
         ;
 
-union_t:
-        '[' { context(ctx).addUnion(); } unionlist ']'
-        { context(ctx).endCompound(avro::AVRO_UNION); }
+object: 
+        '{' { context(ctx).startType(); } 
+         attributelist
+        '}' { context(ctx).stopType(); }
+        ;
+        
+name_attribute:
+        AVRO_LEX_NAME { context(ctx).setNameAttribute(); }
         ;
 
-fixed_t:
-        '{' fixedtag ',' size ',' name '}'
-        { context(ctx).addFixed(); }
+size_attribute:
+        AVRO_LEX_SIZE { context(ctx).setSizeAttribute(); }
         ;
 
-name:
-        AVRO_LEX_NAME 
-        { context(ctx).addName(); }
+values_attribute:
+        AVRO_LEX_VALUES { context(ctx).setValuesAttribute(); } avroschema 
         ;
 
-size:
-        AVRO_LEX_SIZE 
-        { context(ctx).addSize(); }
+fields_attribute:
+        AVRO_LEX_FIELDS { context(ctx).setFieldsAttribute(); } fieldslist AVRO_LEX_FIELDS_END
         ;
 
-namelist:
-        name | namelist ',' name
+items_attribute:
+        AVRO_LEX_ITEMS { context(ctx).setItemsAttribute(); } avroschema
         ;
 
-field:
-        '{' fieldname ',' avroschema '}'
+symbols_attribute:
+        AVRO_LEX_SYMBOLS symbollist AVRO_LEX_SYMBOLS_END
+        ;
+
+attribute:
+        type_attribute | name_attribute | fields_attribute | items_attribute | size_attribute | values_attribute | symbols_attribute | AVRO_LEX_METADATA
+        ;
+
+attributelist: 
+        attribute | attributelist ',' attribute
+        ;
+
+symbol:
+        AVRO_LEX_SYMBOL { context(ctx).setSymbolsAttribute(); }
+        ;
+
+symbollist:
+        symbol | symbollist ',' symbol
+        ;
+
+fieldsetting:
+        fieldname | avroschema | AVRO_LEX_METADATA
+        ;
+
+fieldsettinglist:
+        fieldsetting | fieldsettinglist ',' fieldsetting 
+        ;
+
+fields:
+        AVRO_LEX_FIELD fieldsettinglist AVRO_LEX_FIELD_END
         ;   
 
 fieldname:
-        AVRO_LEX_NAME 
-        { context(ctx).addFieldName(); }
+        AVRO_LEX_FIELD_NAME { context(ctx).textContainsFieldName(); }
         ;
 
-fieldlist:
-        field | fieldlist ',' field
+fieldslist:
+        fields | fieldslist ',' fields
         ;
 
 unionlist: 
         avroschema | unionlist ',' avroschema
-
+        ;

Modified: hadoop/avro/trunk/src/c++/test/precompile.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/precompile.cc?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/precompile.cc (original)
+++ hadoop/avro/trunk/src/c++/test/precompile.cc Thu Sep  3 16:22:48 2009
@@ -24,7 +24,7 @@
 
 int main()
 {
-
+    int ret = 0;
     try {
         avro::ValidSchema schema;
         avro::compileJsonSchema(std::cin, schema);
@@ -32,8 +32,9 @@
         schema.toFlatList(std::cout);
     }
     catch (std::exception &e) {
-        std::cout << "Failed to parse or compile schema: " << e.what() << std::endl;
+        std::cerr << "Failed to parse or compile schema: " << e.what() << std::endl;
+        ret = 1;
     }
 
-    return 0;
+    return ret;
 }

Modified: hadoop/avro/trunk/src/c++/test/testparser.cc
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/c%2B%2B/test/testparser.cc?rev=811015&r1=811014&r2=811015&view=diff
==============================================================================
--- hadoop/avro/trunk/src/c++/test/testparser.cc (original)
+++ hadoop/avro/trunk/src/c++/test/testparser.cc Thu Sep  3 16:22:48 2009
@@ -24,7 +24,7 @@
 
 int main()
 {
-
+    int ret = 0;
     try {
         avro::ValidSchema schema;
         avro::compileJsonSchema(std::cin, schema);
@@ -32,8 +32,9 @@
         schema.toJson(std::cout);
     }
     catch (std::exception &e) {
-        std::cout << "Failed to parse or compile schema: " << e.what() << std::endl;
+        std::cerr << "Failed to parse or compile schema: " << e.what() << std::endl;
+        ret = 1;
     }
 
-    return 0;
+    return ret;
 }



Mime
View raw message