openoffice-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject svn commit: r1601582 [1/3] - in /openoffice/trunk/main/ooxml/source/framework: JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ SchemaParser/src/org/apache/openoffice/ooxml/schema/ SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/ S...
Date Tue, 10 Jun 2014 09:35:41 GMT
Author: af
Date: Tue Jun 10 09:35:39 2014
New Revision: 1601582

URL: http://svn.apache.org/r1601582
Log:
125035: Can now create a validating parser.

Added:
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/HopcroftMinimizer.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/NonValidatingCreator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/SkipData.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/State.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContainer.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContext.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateSet.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/Transition.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/ValidatingCreator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/ValidatingCreatorVisitor.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/PermutationIterator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/SchemaBase.java
Removed:
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/
Modified:
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
    openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/AttributeIterator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/DereferencingNodeIterator.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeGroupReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/INodeReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/Node.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/QualifiedName.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/All.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Any.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexContent.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexTypeReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Element.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ElementReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Extension.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/GroupReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/ProcessTypeVisitor.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/RequestVisitor.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/SchemaOptimizer.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/NamespaceMap.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/Schema.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleContent.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleTypeReference.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/SchemaParser.java
    openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/XmlNamespace.java

Added: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,41 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/** List of all accepting states.
+ * 
+ *  The accepting status of states is important when a closing tag is seen.
+ *  It denotes the end of the input stream for the state machine of the currently
+ *  processed element.  It is an error when the current state is not accepting
+ *  when a closing tag is processed.  
+ */
+public class AcceptingStateTable
+{
+    public AcceptingStateTable (final Iterable<String[]> aData)
+    {
+        maAcceptingStates = new HashSet<>();
+        
+        for (final String[] aLine : aData)
+        {
+            // Create new transition.
+            final int nStateId = Integer.parseInt(aLine[1]);
+
+            maAcceptingStates.add(nStateId);
+        }
+        Log.Std.printf("read %d accepting states\n",  maAcceptingStates.size());
+    }
+
+    
+    
+    
+    public boolean Contains (final int nStateId)
+    {
+        return maAcceptingStates.contains(nStateId);
+    }
+    
+    
+    
+    
+    private final Set<Integer> maAcceptingStates;
+}

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java Tue Jun 10 09:35:39 2014
@@ -21,14 +21,14 @@
 
 package org.apache.openoffice.ooxml.parser;
 
-import java.io.File;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Vector;
 
 public class AttributeManager
 {
-    public AttributeManager (final File aDataLocation)
+    public AttributeManager (final Vector<String[]> aData)
     {
         maStateIdToAttributesMap = new HashMap<>();        
     }

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java Tue Jun 10 09:35:39 2014
@@ -21,46 +21,28 @@
 
 package org.apache.openoffice.ooxml.parser;
 
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Vector;
 
 public class NameMap
 {
-    NameMap (final File aDataLocation)
+    NameMap (final Vector<String[]> aData)
     {
         maNameToIdMap = new HashMap<>();
         maIdToNameMap = new Vector<>();
 
-        try
+        for (final String[] aLine : aData)
         {
-            final BufferedReader aReader = new BufferedReader(
-                new FileReader(
-                    new File(aDataLocation, "names.lst")));
+            final int nId = Integer.parseInt(aLine[1]);
             
-            while (true)
-            {
-                final String sLine = aReader.readLine();
-                if (sLine == null)
-                    break;
-                final String aParts[] = sLine.split("\\s+");
-                final int nId = Integer.parseInt(aParts[0]);
-                maNameToIdMap.put(aParts[1], nId);
-                if (maIdToNameMap.size() <= nId)
-                    maIdToNameMap.setSize(nId+1);
-                maIdToNameMap.set(nId, aParts[1]);
-            }
+            maNameToIdMap.put(aLine[2], nId);
             
-            aReader.close();
-        } 
-        catch (final Exception aException)
-        {
-            throw new RuntimeException(aException);
+            if (maIdToNameMap.size() <= nId)
+                maIdToNameMap.setSize(nId+1);
+            maIdToNameMap.set(nId, aLine[2]);
         }
-        
+            
         if (Log.Dbg != null)
             Log.Dbg.printf("initialized name map with %d definitions\n", maNameToIdMap.size());
     }
@@ -89,7 +71,10 @@ public class NameMap
     
     public String GetNameForId (final int nId)
     {
-        return maIdToNameMap.get(nId);
+        if (nId == -1)
+            return "<none>";
+        else
+            return maIdToNameMap.get(nId);
     }
     
     

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java Tue Jun 10 09:35:39 2014
@@ -21,40 +21,21 @@
 
 package org.apache.openoffice.ooxml.parser;
 
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Vector;
 
 public class NamespaceMap
 {
-    NamespaceMap (final File aDataLocation)
+    NamespaceMap (final Vector<String[]> aData)
     {
         maUriToPrefixMap = new HashMap<>();
         
-        try
+        for (final String[] aLine : aData)
         {
-            final BufferedReader aReader = new BufferedReader(
-                new FileReader(
-                    new File(aDataLocation, "namespaces.lst")));
-            
-            while (true)
-            {
-                final String sLine = aReader.readLine();
-                if (sLine == null)
-                    break;
-                final String aParts[] = sLine.split("\\s+");
-                maUriToPrefixMap.put(aParts[0], aParts[1]);
-            }
-            
-            aReader.close();
-        } 
-        catch (final Exception aException)
-        {
-            throw new RuntimeException(aException);
+            maUriToPrefixMap.put(aLine[2], aLine[1]);
         }
-        
+            
         if (Log.Dbg != null)
             Log.Dbg.printf("initialized namespace map with %d definitions\n", maUriToPrefixMap.size());
     }

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java Tue Jun 10 09:35:39 2014
@@ -76,8 +76,6 @@ public class OOXMLParser
         }
     }
     
- 
-    
     
     private static InputStream GetInputStream (final String sInputName)
     {
@@ -165,13 +163,17 @@ public class OOXMLParser
                     case XMLStreamReader.START_ELEMENT:
                         ++nElementCount;
                         if (aMachine.IsInSkipState())
+                        {
+                            Log.Dbg.printf("is skip state -> starting to skip\n");
                             nElementCount += Skip(aReader);
+                        }
                         else if ( ! aMachine.ProcessStartElement(
                             aReader.getNamespaceURI(),
                             aReader.getLocalName(),
                             aReader.getLocation(),
                             aAttributeProvider))
                         {
+                            Log.Dbg.printf("starting to skip to recover from error\n");
                             nElementCount += Skip(aReader);
                         }
                         break;
@@ -185,7 +187,7 @@ public class OOXMLParser
                         
                     case XMLStreamReader.CHARACTERS:
                         final String sText = aReader.getText();
-                        Log.Dbg.printf("text [%s]\n", sText);
+                        Log.Dbg.printf("text [%s]\n", sText.replace("\n", "\\n"));
                         aMachine.ProcessCharacters(sText);
                         break;
                         
@@ -251,7 +253,7 @@ public class OOXMLParser
                         throw new RuntimeException("saw end of document while skipping elements\n");
                         
                     case XMLStreamReader.CHARACTERS:
-                        Log.Dbg.printf("skipping text [%s]\n", aReader.getText());
+                        SkipText(aReader.getText());
                         break;
 
                     default:
@@ -265,4 +267,11 @@ public class OOXMLParser
         }
         return nElementCount;
     }
+    
+    
+    
+    private static void SkipText (final String sText)
+    {
+        Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n"));
+    }
 }

Added: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,64 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+/** A simple reader for the parse table data that allows simple filtering on the
+ *  first word in each line.
+ *  
+ *  Lines that only contain comments or whitespace are ignored.
+ *
+ */
+public class ParseTableReader
+{
+    public ParseTableReader (final File aFile)
+    {
+        maSections = new HashMap<>();
+        
+        try
+        {
+            final BufferedReader aReader = new BufferedReader(new FileReader(aFile));
+            
+            while (true)
+            {
+                final String sLine = aReader.readLine();
+                if (sLine == null)
+                    break;
+                if (sLine.startsWith("#"))
+                    continue;
+                final String aParts[] = sLine.split("\\s+");
+                
+                GetSection(aParts[0]).add(aParts);
+            }
+            
+            aReader.close();
+        } 
+        catch (final Exception aException)
+        {
+            throw new RuntimeException(aException);
+        }
+    }
+
+
+    
+    
+    public Vector<String[]> GetSection (final String sSectionName)
+    {
+        Vector<String[]> aSection = maSections.get(sSectionName);
+        if (aSection == null)
+        {
+            aSection = new Vector<>();
+            maSections.put(sSectionName, aSection);
+        }
+        return aSection;
+    }
+
+    
+    
+
+    private final Map<String,Vector<String[]>> maSections;
+}

Added: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,42 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/** Table of all skip states.
+ * 
+ *  A skip state corresponds to the 'any' element in the schemas.
+ *  It means that the content of the element is specified by an extension of the
+ *  schema which may or may not be known at parse time.
+ *  At the moment the whole element is skipped, i.e. ignored.
+ * 
+ */
+public class SkipStateTable
+{
+    public SkipStateTable (final Iterable<String[]> aData)
+    {
+        maSkipStates = new HashSet<>();
+        
+        for (final String[] aLine : aData)
+        {
+            // Create new transition.
+            final int nStateId = Integer.parseInt(aLine[1]);
+
+            maSkipStates.add(nStateId);
+        }
+        Log.Std.printf("read %d skip states\n",  maSkipStates.size());
+    }
+
+    
+    
+    
+    public boolean Contains (final int nStateId)
+    {
+        return maSkipStates.contains(nStateId);
+    }
+    
+    
+    
+    
+    private final Set<Integer> maSkipStates;
+}

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java Tue Jun 10 09:35:39 2014
@@ -22,7 +22,6 @@
 package org.apache.openoffice.ooxml.parser;
 
 import java.io.File;
-import java.util.Set;
 import java.util.Stack;
 
 import javax.xml.stream.Location;
@@ -32,16 +31,19 @@ import javax.xml.stream.Location;
  */
 public class StateMachine
 {
-    public StateMachine (final File aDataLocation)
+    public StateMachine (final File aParseTableFile)
     {
-        maNamespaceMap = new NamespaceMap(aDataLocation);
-        maNameMap = new NameMap(aDataLocation);
-        maTransitions = new TransitionTable(aDataLocation);
-        maSkipStates = maTransitions.GetSkipStates();
-        maAttributeManager = new AttributeManager(aDataLocation);
+        final ParseTableReader aReader = new ParseTableReader(aParseTableFile);
+        maNamespaceMap = new NamespaceMap(aReader.GetSection("namespace"));
+        maElementNameMap = new NameMap(aReader.GetSection("element-name"));
+        maStateNameMap = new NameMap(aReader.GetSection("state-name"));
+        maTransitions = new TransitionTable(aReader.GetSection("transition"));
+        maSkipStates = new SkipStateTable(aReader.GetSection("skip"));
+        maAcceptingStates = new AcceptingStateTable(aReader.GetSection("accepting-state"));
+        maAttributeManager = new AttributeManager(aReader.GetSection("attribute"));
         
-        mnStartStateId = maNameMap.GetIdForName(null, "_start_");
-        mnEndStateId = maNameMap.GetIdForName(null, "_end_");
+        mnStartStateId = Integer.parseInt(aReader.GetSection("start-state").firstElement()[1]);
+        mnEndStateId = Integer.parseInt(aReader.GetSection("end-state").firstElement()[1]);
         mnCurrentStateId = mnStartStateId;
         maStateStack = new Stack<>();
         Log.Dbg.printf("starting in state _start_ (%d)\n", mnCurrentStateId);
@@ -56,12 +58,13 @@ public class StateMachine
         final Location aLocation,
         final AttributeProvider aAttributes)
     {
-        final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
         boolean bResult = false;
+
         try
         {
-            final int nElementId = maNameMap.GetIdForName(sPrefix, sElementName);
-            Log.Dbg.printf("%s:%s(%d, aArgumentList) L%dC%d\n",
+            final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
+            final int nElementId = maElementNameMap.GetIdForName(sPrefix, sElementName);
+            Log.Dbg.printf("%s:%s(%d) L%dC%d\n",
                 sPrefix,
                 sElementName,
                 nElementId,
@@ -73,21 +76,25 @@ public class StateMachine
                 nElementId);
             if (aTransition == null)
             {
-                Log.Err.printf(
-                    "can not find transition for state %s and element %s at L%dC%d\n",
-                    maNameMap.GetNameForId(mnCurrentStateId),
-                    maNameMap.GetNameForId(nElementId),
+                final String sText = String.format(
+                    "can not find transition for state %s(%d) and element %s(%d) at L%dC%d\n",
+                    maStateNameMap.GetNameForId(mnCurrentStateId),
+                    mnCurrentStateId,
+                    maElementNameMap.GetNameForId(nElementId),
+                    nElementId,
                     aLocation.getLineNumber(),
                     aLocation.getColumnNumber());
+                Log.Err.printf(sText);
+                Log.Dbg.printf(sText);
             }
             else
             {
                 Log.Dbg.printf(" %s(%d) -> %s(%d) via %s(%d)",
-                    maNameMap.GetNameForId(mnCurrentStateId),
+                    maStateNameMap.GetNameForId(mnCurrentStateId),
                     mnCurrentStateId,
-                    maNameMap.GetNameForId(aTransition.GetEndStateId()),
+                    maStateNameMap.GetNameForId(aTransition.GetEndStateId()),
                     aTransition.GetEndStateId(),
-                    maNameMap.GetNameForId(aTransition.GetActionId()),
+                    maStateNameMap.GetNameForId(aTransition.GetActionId()),
                     aTransition.GetActionId());
                 Log.Dbg.printf("\n");
                 
@@ -101,7 +108,10 @@ public class StateMachine
         }
         catch (RuntimeException aException)
         {
-            aException.printStackTrace();
+            System.err.printf("error at line %d and column %d\n",
+                aLocation.getLineNumber(),
+                aLocation.getColumnNumber());
+            throw aException;
         }
         return bResult;
     }
@@ -114,6 +124,15 @@ public class StateMachine
         final String sElementName,
         final Location aLocation)
     {
+        if ( ! maAcceptingStates.Contains(mnCurrentStateId)
+            && mnCurrentStateId!=-1)
+        {
+            Log.Dbg.printf("current state %s(%d) is not an accepting state\n",
+                maStateNameMap.GetNameForId(mnCurrentStateId),
+                mnCurrentStateId);
+            throw new RuntimeException("not expecting end element "+sElementName);
+        }
+
         final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
         
         final int nOldStateId = mnCurrentStateId;
@@ -126,9 +145,9 @@ public class StateMachine
             aLocation.getLineNumber(),
             aLocation.getColumnNumber());
         Log.Dbg.printf(" %s(%d) <- %s(%d)\n",
-            maNameMap.GetNameForId(nOldStateId),
+            maStateNameMap.GetNameForId(nOldStateId),
             nOldStateId,
-            maNameMap.GetNameForId(mnCurrentStateId),
+            maStateNameMap.GetNameForId(mnCurrentStateId),
             mnCurrentStateId);
     }
     
@@ -145,7 +164,7 @@ public class StateMachine
     
     public boolean IsInSkipState ()
     {
-        return maSkipStates.contains(mnCurrentStateId);
+        return maSkipStates.Contains(mnCurrentStateId);
     }
     
     
@@ -171,32 +190,25 @@ public class StateMachine
         final int nOldState,
         final int nNewState)
     {
-        switch(aTransition.GetAction())
-        {
-            case 'p' :
-                // Parse action.
-                maStateStack.push(mnCurrentStateId);
-                Log.Dbg.IncreaseIndentation();
-                final int nActionId = aTransition.GetActionId(); 
-                SetCurrentState(nActionId);
-                maAttributeManager.ParseAttributes(nActionId, aAttributes);
-                break;
-                
-            default:
-                throw new RuntimeException();
-        }
+        maStateStack.push(mnCurrentStateId);
+        Log.Dbg.IncreaseIndentation();
+        final int nActionId = aTransition.GetActionId(); 
+        SetCurrentState(nActionId);
+        maAttributeManager.ParseAttributes(nActionId, aAttributes);
     }
     
     
     
     
     private final NamespaceMap maNamespaceMap;
-    private final NameMap maNameMap;
+    private final NameMap maElementNameMap;
+    private final NameMap maStateNameMap;
     private final TransitionTable maTransitions;
     private final AttributeManager maAttributeManager;
     private int mnCurrentStateId;
     private Stack<Integer> maStateStack;
     private final int mnStartStateId;
     private final int mnEndStateId;
-    private static Set<Integer> maSkipStates;
+    private SkipStateTable maSkipStates;
+    private AcceptingStateTable maAcceptingStates;
 }

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java Tue Jun 10 09:35:39 2014
@@ -27,13 +27,11 @@ class Transition
         final int nStartStateId,
         final int nEndStateId,
         final int nElementId,
-        final String sAction,
         final int nActionStateId)
     {
         mnStartStateId = nStartStateId;
         mnEndStateId = nEndStateId;
         mnElementId = nElementId;
-        mcAction = sAction.charAt(0);
         mnActionStateId = nActionStateId;
     }
 
@@ -64,14 +62,6 @@ class Transition
     
     
     
-    public char GetAction ()
-    {
-        return mcAction;
-    }
-    
-    
-    
-    
     public int GetActionId ()
     {
         return mnActionStateId;
@@ -83,6 +73,5 @@ class Transition
     private final int mnStartStateId;
     private final int mnEndStateId;
     private final int mnElementId;
-    private final char mcAction;
     private final int mnActionStateId;
 }

Modified: openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java Tue Jun 10 09:35:39 2014
@@ -21,66 +21,38 @@
 
 package org.apache.openoffice.ooxml.parser;
 
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
-import java.util.Set;
+import java.util.Vector;
 
 public class TransitionTable
 {
-    public TransitionTable (final File aDataLocation)
+    public TransitionTable (final Vector<String[]> aData)
     {
         maTransitions = new HashMap<>();
-        maSkipStates = new HashSet<>();
         int nTransitionCount = 0;
         
-        try
+        for (final String[] aLine : aData)
         {
-            final BufferedReader aReader = new BufferedReader(new FileReader(new File(aDataLocation, "transitions.lst")));
-            while(true)
+            // Create new transition.
+            final int nStartStateId = Integer.parseInt(aLine[1]);
+            final int nEndStateId = Integer.parseInt(aLine[2]);
+            final int nElementId = Integer.parseInt(aLine[3]);
+            final int nElementStateId = Integer.parseInt(aLine[4]);
+            final Transition aTransition = new Transition(
+                nStartStateId,
+                nEndStateId,
+                nElementId,
+                nElementStateId);
+            ++nTransitionCount;
+
+            Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId());
+            if (aPerElementTransitions == null)
             {
-                // Read line, ignore comments, split into parts at whitespace.
-                final String sLine = aReader.readLine();
-                if (sLine == null)
-                    break;
-                if (sLine.startsWith("#"))
-                    continue;
-                final String[] aParts = sLine.split("\\s+");
-                
-                // Create new transition.
-                final int nStartStateId = Integer.parseInt(aParts[0]);
-                final int nEndStateId = Integer.parseInt(aParts[1]);
-                final int nElementId = Integer.parseInt(aParts[2]);
-                final int nActionStateId = Integer.parseInt(aParts[4]);
-                if (nElementId==-1 && nActionStateId==-1)
-                    maSkipStates.add(nStartStateId);
-                else
-                {
-                    final Transition aTransition = new Transition(
-                        nStartStateId,
-                        nEndStateId,
-                        nElementId,
-                        aParts[3],
-                        nActionStateId);
-                    ++nTransitionCount;
-
-                    Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId());
-                    if (aPerElementTransitions == null)
-                    {
-                        aPerElementTransitions = new HashMap<>();
-                        maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions);
-                    }
-                    aPerElementTransitions.put(aTransition.GetElementId(), aTransition);
-                }
+                aPerElementTransitions = new HashMap<>();
+                maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions);
             }
-            aReader.close();
-        }
-        catch (final Exception aException)
-        {
-            aException.printStackTrace();
+            aPerElementTransitions.put(aTransition.GetElementId(), aTransition);
         }
         Log.Std.printf("read %d transitions\n",  nTransitionCount);
     }
@@ -102,14 +74,5 @@ public class TransitionTable
     
     
     
-    public Set<Integer> GetSkipStates ()
-    {
-        return maSkipStates;
-    }
-    
-    
-    
-    
     private final Map<Integer,Map<Integer,Transition>> maTransitions;
-    private final Set<Integer> maSkipStates;
 }

Modified: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java?rev=1601582&r1=1601581&r2=1601582&view=diff
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java (original)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java Tue Jun 10 09:35:39 2014
@@ -24,23 +24,26 @@ package org.apache.openoffice.ooxml.sche
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Queue;
 import java.util.Set;
 import java.util.Vector;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import javax.xml.stream.XMLStreamException;
 
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
+import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomaton;
+import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
 import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
 import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
-import org.apache.openoffice.ooxml.schema.generator.automaton.NonValidatingCreator;
-import org.apache.openoffice.ooxml.schema.generator.automaton.StackAutomaton;
 import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
 import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
-import org.apache.openoffice.ooxml.schema.parser.XmlNamespace;
 
 public class SchemaReader
 {
@@ -49,6 +52,15 @@ public class SchemaReader
         if (aArgumentList.length != 1)
         {
             System.err.printf("usage: SchemaParser <driver-file>\n");
+            System.err.printf(" driver file can contain these lines:\n");
+            System.err.printf("# Comments\n");
+            System.err.printf("    are ignored\n");
+            System.err.printf("schema <mark> <file-name>\n");
+            System.err.printf("    specifies a top-level schema file to read\n");
+            System.err.printf("output-schema <file-name>\n");
+            System.err.printf("    write schema information to file\n");
+            System.err.printf("output-optimized-schema <file-name>\n");
+            System.err.printf("    write information about optimized schema to file\n");
             System.exit(1);
         }
     
@@ -61,15 +73,15 @@ public class SchemaReader
     
     private SchemaReader (final File aDriverFile)
     {
-        maSchema = new Schema();
+        maSchemaBase = new SchemaBase();
+        maTopLevelSchemas = new HashMap<>();
         maMainSchemaFiles = new Vector<>();
         maSchemaFiles = new HashSet<>();
-        maTodo = new LinkedList<String>();
+        maWorkList = new LinkedList<>();
+        maOutputOperations = new Vector<>();
         mnTotalLineCount = 0;
         mnTotalByteCount = 0;
 
-        XmlNamespace.Apply(maSchema);
-        
         ParseDriverFile(aDriverFile);
     }
     
@@ -87,43 +99,80 @@ public class SchemaReader
             System.exit(1);
         }
         
-        final Pattern aSchemaPattern = Pattern.compile("^\\s*schema\\s+(.*)\\s+(.*)");
-        final Pattern aOutputPattern = Pattern.compile("^\\s*output-directory\\s+(.*)");
         try
         {
             final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
             while(true)
             {
-                final String sLine = aIn.readLine();
+                String sLine = aIn.readLine();
                 if (sLine == null)
                     break;
                 // Lines starting with # are comment lines and are ignored.
-                if (sLine.matches("^\\s*#"))
+                if (sLine.matches("^\\s*#.*"))
                     continue;
                 // Lines containing only whitespace are also ignored.
                 else if (sLine.matches("^\\s*$"))
                     continue;
                 
-                Matcher aMatcher = aSchemaPattern.matcher(sLine);
-                if (aMatcher.matches())
-                {
-                    maMainSchemaFiles.add(new String[]{aMatcher.group(1), aMatcher.group(2)});
-                }
-                else
+                // Handle line continuation.
+                while (sLine.endsWith("\\"))
+                    sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
+                
+                final Vector<String> aParts = SplitLine(sLine);
+                switch (aParts.get(0))
                 {
-                    aMatcher = aOutputPattern.matcher(sLine);
-                    if (aMatcher.matches())
-                    {
-                        maOutputDirectory = new File(aMatcher.group(1));
-                        if (maOutputDirectory.exists() && ! maOutputDirectory.canWrite())
+                	case "schema":
+                		maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
+                		break;
+
+                	case "output-schema":
+                        maOutputOperations.add(new Runnable()
                         {
-                            System.err.printf("can not write output file '%s' \n", maOutputDirectory.toString());
-                            System.exit(1);
-                        }
-                    }
-                }
+                            final File maFile = CreateCheckedOutputFile(aParts.get(1));
+                            @Override public void run()
+                            {
+                                WriteSchema(maFile);
+                            }
+                        }); 
+                        break;
 
+                    case "output-optimized-schema":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File maFile = CreateCheckedOutputFile(aParts.get(1));
+                            @Override public void run()
+                            {
+                                WriteOptimizedSchema(maFile);
+                            }
+                        }); 
+                        break;
 
+                    case "output-nonvalidating-parse-tables":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File maAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+                            final File maParseTableFile = CreateCheckedOutputFile(aParts.get(2));
+                            @Override public void run() {WriteNonValidatingParseTables(
+                                maAutomatonLogFile,
+                                maParseTableFile);}
+                        }); 
+                        break;
+                        
+                    case "output-validating-parse-tables":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File maAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+                            final File maParseTableFile = CreateCheckedOutputFile(aParts.get(2));
+                            @Override public void run() {WriteValidatingParseTables(
+                                maAutomatonLogFile,
+                                maParseTableFile);}
+                        }); 
+                        break;
+                        
+                    default:
+                        System.err.printf("unknown command '%s' in driver file", aParts.get(0));
+                        System.exit(1);
+                }
             }
             aIn.close();
         } 
@@ -146,20 +195,19 @@ public class SchemaReader
         {
             aException.printStackTrace();
         }
-        final Schema aOptimizedSchema = maSchema.GetOptimizedSchema();
         
-        System.out.printf("    used are %d complex types, %d simple types, %d groups and %d top level elements\n",
-            aOptimizedSchema.ComplexTypes.GetCount(),
-            aOptimizedSchema.SimpleTypes.GetCount(),
-            aOptimizedSchema.Groups.GetCount(),
-            aOptimizedSchema.TopLevelElements.GetCount());
-
-        LogGenerator.Write(maSchema, new File(maOutputDirectory, "original-schema.txt"));
-        LogGenerator.Write(aOptimizedSchema, new File(maOutputDirectory, "bla.txt"));
-        
-        final StackAutomaton aAutomaton = CreateStackAutomaton(aOptimizedSchema);
+        maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
+        for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
+            aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
+
+        System.out.printf("    optimization left %d complex types and %d simple types\n",
+            maOptimizedSchemaBase.ComplexTypes.GetCount(),
+            maOptimizedSchemaBase.SimpleTypes.GetCount());
         
-        new ParserTablesGenerator(aAutomaton).Generate(new File("/tmp/ooxml-parser"));
+        for (final Runnable aOperation : maOutputOperations)
+        {
+            aOperation.run();
+        }
     }
 
     
@@ -169,10 +217,10 @@ public class SchemaReader
         throws XMLStreamException
     {
         System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
-        
+
         for (final String[] aEntry : maMainSchemaFiles)
         {
-            final String sShortName = aEntry[0];
+            final String sMainSchemaShortname = aEntry[0];
             final String sMainSchemaFile = aEntry[1];
             final File aMainSchemaFile = new File(sMainSchemaFile);
             if ( ! aMainSchemaFile.exists())
@@ -186,41 +234,58 @@ public class SchemaReader
                 System.exit(1);
             }
 
-            AddSchemaReference(sMainSchemaFile);
+            final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
+            ParseSchemaFile(sMainSchemaFile, aSchema);
+            maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
         }
          
         long nStartTime = System.currentTimeMillis();
-    
-        while ( ! maTodo.isEmpty())
+        while ( ! maWorkList.isEmpty())
         {
-            final String sSchemaName = maTodo.poll();
-            System.out.printf("parsing %s\n", sSchemaName);
-            maSchemaFiles.add(sSchemaName);
-            
-            final SchemaParser aParser = new SchemaParser(new File(sSchemaName), maSchema);
-            aParser.Parse();
-            
-            mnTotalLineCount += aParser.GetLineCount();
-            mnTotalByteCount += aParser.GetByteCount();
-            for (final File aFile : aParser.GetImportedSchemaFilenames())
-                AddSchemaReference(aFile.getAbsolutePath());
+            ParseSchemaFile(maWorkList.poll(), null);
         }
         long nEndTime = System.currentTimeMillis();
+
         System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
             maSchemaFiles.size(),
             mnTotalLineCount,
             mnTotalByteCount,
             (nEndTime-nStartTime)/1000.0);
-        System.out.printf("    found %d complex types, %d simple types, %d groups and %d top level elements\n",
-            maSchema.ComplexTypes.GetCount(),
-            maSchema.SimpleTypes.GetCount(),
-            maSchema.Groups.GetCount(),
-            maSchema.TopLevelElements.GetCount());
+        System.out.printf("    found %d complex types and %d simple types\n",
+            maSchemaBase.ComplexTypes.GetCount(),
+            maSchemaBase.SimpleTypes.GetCount());
+        
+        int nTopLevelElementCount = 0;
+        for (final Schema aSchema : maTopLevelSchemas.values())
+        	nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
+        System.out.printf("    the %d top level schemas have %d elements\n",
+        		maTopLevelSchemas.size(),
+        		nTopLevelElementCount);
     }
     
     
     
     
+    private void ParseSchemaFile (
+    		final String sSchemaFilename,
+    		final Schema aSchema)
+    				throws XMLStreamException
+    {
+        System.out.printf("parsing %s\n", sSchemaFilename);
+        maSchemaFiles.add(sSchemaFilename);
+        
+        final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
+        aParser.Parse();
+        
+        mnTotalLineCount += aParser.GetLineCount();
+        mnTotalByteCount += aParser.GetByteCount();
+        for (final File aFile : aParser.GetImportedSchemaFilenames())
+            AddSchemaReference(aFile.getAbsolutePath());
+    }
+
+    
+    
+    
     private void AddSchemaReference (final String sSchemaFilename)
     {
         if ( ! maSchemaFiles.contains(sSchemaFilename))
@@ -230,45 +295,148 @@ public class SchemaReader
 
             // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'. 
             maSchemaFiles.add(sSchemaFilename);
-            maTodo.add(sSchemaFilename);
+            maWorkList.add(sSchemaFilename);
         }
     }
     
     
     
     
-    private static StackAutomaton CreateStackAutomaton (final Schema aSchema)
+    /** Split the given string at whitespace but not at whitespace inside double quotes.
+     *  
+     */
+    private Vector<String> SplitLine (final String sLine)
+    {
+    	final Vector<String> aParts = new Vector<>();
+    	
+    	boolean bIsInsideQuotes = false;
+    	for (final String sPart : sLine.split("\""))
+    	{
+    		if (bIsInsideQuotes)
+    			aParts.add(sPart);
+    		else
+    	    	for (final String sInnerPart : sPart.split("\\s+"))
+    	    	{
+    	    		if (sInnerPart == null)
+    	    			throw new RuntimeException();
+    	    		else if ( ! sInnerPart.isEmpty())
+    	    			aParts.add(sInnerPart);
+    	    	}
+
+    		bIsInsideQuotes = ! bIsInsideQuotes;
+    	}
+    	
+    	return aParts;
+    }
+
+    
+    
+
+    /** Create a File object for a given file name.
+     *  Check that the file is writable, i.e. its directory exists and that if
+     *  the file already exists it can be replaced.
+     *  Throws a RuntimeException when a check fails.
+     */
+    private File CreateCheckedOutputFile (final String sFilename)
+    {
+        final File aFile = new File(sFilename);
+        if ( ! aFile.getParentFile().exists())
+            throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
+        if (aFile.exists() && ! aFile.canWrite())
+            throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
+        return aFile;
+    }
+
+    
+    
+
+    private void WriteSchema (final File aOutputFile)
+    {
+        LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
+    }
+    
+    
+    
+    
+    private void WriteOptimizedSchema (final File aOutputFile)
+    {
+        LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
+    }
+    
+    
+    
+    
+    private void WriteNonValidatingParseTables (
+        final File aAutomatonLogFile,
+        final File aParseTableFile)
     {
         long nStartTime = System.currentTimeMillis();
-        StackAutomaton aAutomaton = new NonValidatingCreator(aSchema).Create(new File("/tmp/schema.log"));
+        final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+        FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
         long nEndTime = System.currentTimeMillis();
         System.out.printf(
-            "created stack automaton in %fs, it has %d states and %d transitions\n",
-            (nEndTime-nStartTime)/1000.0,
-            aAutomaton.GetStateCount(),
-            aAutomaton.GetTransitionCount());
-        
-        /*
+            "created %d non-validating automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        new ParserTablesGenerator(aAutomatons, maOptimizedSchemaBase.Namespaces)
+            .Generate(aParseTableFile);
+    }
+
+    
+    
+    
+    private void WriteValidatingParseTables (
+        final File aAutomatonLogFile,
+        final File aParseTableFile)
+    {
+        long nStartTime = System.currentTimeMillis();
+        final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+        FiniteAutomatonContainer aAutomatons = aCreator.Create();
+        long nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d validating stack automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+
         nStartTime = System.currentTimeMillis();
-        aAutomaton = aAutomaton.Optimize();
+        aAutomatons = aAutomatons.CreateDFAs();
         nEndTime = System.currentTimeMillis();
         System.out.printf(
-            "optimized stack automaton in %fs, it now has %d states and %d transitions\n",
+            "created %d deterministic automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        nStartTime = System.currentTimeMillis();
+        aAutomatons = aAutomatons.MinimizeDFAs();
+        nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "minimized automaton in %fs, there are now %d states and %d transitions\n",
             (nEndTime-nStartTime)/1000.0,
-            aAutomaton.GetStateCount(),
-            aAutomaton.GetTransitionCount());
-        */
-        return aAutomaton;
-    }
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount());
 
+        new ParserTablesGenerator(aAutomatons, maOptimizedSchemaBase.Namespaces)
+            .Generate(aParseTableFile);
+    }
+    
     
     
     
-    private final Schema maSchema;
+    private final SchemaBase maSchemaBase;
+    private SchemaBase maOptimizedSchemaBase;
+    private final Map<String,Schema> maTopLevelSchemas;
     private final Vector<String[]> maMainSchemaFiles;
-    private File maOutputDirectory;
+    private final Queue<String> maWorkList;
+    private final Vector<Runnable> maOutputOperations;
     private final Set<String> maSchemaFiles;
-    private final Queue<String> maTodo;
     private int mnTotalLineCount;
     private int mnTotalByteCount;
 }

Added: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,58 @@
+package org.apache.openoffice.ooxml.schema;
+
+import org.apache.openoffice.ooxml.schema.automaton.HopcroftMinimizer;
+import org.apache.openoffice.ooxml.schema.automaton.State;
+import org.apache.openoffice.ooxml.schema.automaton.StateContainer;
+import org.apache.openoffice.ooxml.schema.automaton.StateContext;
+import org.apache.openoffice.ooxml.schema.automaton.Transition;
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** A simple test of the minimization algorithm for DFAs.
+ *
+ *  May lead to the use of a testing framework in the future.
+ */
+public class Test
+{
+    public static void main (final String ... aArgumentList)
+    {
+        new Test("S", new String[]{"E"}, new String[][]{
+            {"S", "A", "a"},
+            {"A", "B", "b"},
+            {"A", "C", "b"},
+            {"B", "E", "c"},
+            {"C", "E", "c"},
+        });
+    }
+    private Test (
+        final String sStartState,
+        final String[] aAcceptingStates,
+        final String[][] aTransitions)
+    {
+        final StateContainer aOriginalStateContainer = new StateContainer();
+        final StateContext aStates = new StateContext(
+            aOriginalStateContainer,
+            sStartState);
+        for (final String sAcceptingState : aAcceptingStates)
+        {
+            final State s = aStates.CreateState(sAcceptingState);
+            s.SetIsAccepting();
+        }
+        for (final String[] aTransition : aTransitions)
+        {
+            final State start = aStates.GetOrCreateState(
+                new QualifiedName(aTransition[0]),
+                null);
+            final State end = aStates.GetOrCreateState(
+                new QualifiedName(aTransition[1]),
+                null);
+            final QualifiedName element = new QualifiedName(aTransition[2]);
+            final String type = "T_"+aTransition[2];
+            
+            start.AddTransition(new Transition(start, end, element, type));
+        }            
+        HopcroftMinimizer.MinimizeDFA (
+            new StateContainer(),
+            aStates,
+            System.out);
+    }
+}

Added: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,258 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** Convert an NFA into a DFA via the powerset construction (also called subset
+ *  construction).
+ */
+public class DFACreator
+{
+    /** For a given non-deterministic finite automaton create an equivalent
+     *  deterministic finite automaton.
+     */
+    public static FiniteAutomaton CreateDFAforNFA (
+        final StateContainer aDFAStateContainer,
+        final StateContext aNFAStateContext,
+        final QualifiedName aTypeName)
+    {
+        final DFACreator aCreator = new DFACreator(aDFAStateContainer, aNFAStateContext, aTypeName);
+        aCreator.CreateDFAforNFA();
+        return new FiniteAutomaton(aCreator.maDFAStateContext);
+    }
+
+    
+    
+    
+    private DFACreator (
+        final StateContainer aDFAStateContainer,
+        final StateContext aNFAStateContext,
+        final QualifiedName aTypeName)
+    {
+        maNFAStateContext = aNFAStateContext;
+        
+        // Create the set of state sets where each element corresponds to a
+        // state in the DFA.
+        maNFASetToDFAStateMap = new TreeMap<>();
+        maDFAStateContext = new StateContext(
+            aDFAStateContainer,
+            aTypeName == null
+                ? "<TOP-LEVEL>"
+                : aTypeName.GetStateName());
+        
+        maDFATransitions = new HashSet<>();
+        maAcceptingDFAStates = new Vector<>();
+    }
+    
+    
+    
+    
+    private void CreateDFAforNFA ()
+    {
+        final State aNFAStartState = maNFAStateContext.GetStartState();
+
+        // Initialize the creation process by adding the epsilon closure of the
+        // original start state to the work list.
+        final StateSet aStartSet = GetEpsilonClosure(new StateSet(aNFAStartState));
+        maNFASetToDFAStateMap.put(aStartSet, maDFAStateContext.GetStartState());
+
+        PropagateStateFlags(aStartSet, maDFAStateContext.GetStartState());
+
+        final Queue<StateSet> aWorklist = new LinkedList<>();
+        aWorklist.add(aStartSet);
+        
+        while ( ! aWorklist.isEmpty())
+        {
+            final Collection<StateSet> aAdditionalWorkList = ProcessTransitionFront(
+                aWorklist.poll());
+
+            aWorklist.addAll(aAdditionalWorkList);
+        }
+    }
+
+    
+    
+
+    private Collection<StateSet> ProcessTransitionFront (
+        final StateSet aSet)
+    {
+        final Set<StateSet> aLocalWorklist = new TreeSet<>();
+        
+        // Find all regular transitions that start from any state in the set.
+        final Map<String,Vector<Transition>> aTransitions = GetTransitionFront(aSet);
+        
+        // Create new state sets for states that are reachable via the same element and
+        // the following epsilon transitions.
+        for (final Entry<String,Vector<Transition>> aEntry : aTransitions.entrySet())
+        {
+            // Create new state sets for both the end state of the transition.
+            final StateSet aEpsilonClosure = GetEpsilonClosure(GetEndStateSet(aEntry.getValue()));
+    
+            // When these are new state sets then add them to the worklist
+            // and the set of sets.
+            State aDFAState = maNFASetToDFAStateMap.get(aEpsilonClosure);
+            if (aDFAState == null)
+            {
+                aLocalWorklist.add(aEpsilonClosure);
+                aDFAState = aEpsilonClosure.CreateStateForStateSet(maDFAStateContext);
+                PropagateStateFlags(aEpsilonClosure, aDFAState);
+                maNFASetToDFAStateMap.put(aEpsilonClosure, aDFAState);
+                if (aDFAState.IsAccepting())
+                    maAcceptingDFAStates.add(aDFAState);
+            }
+            
+            final State aStartState = maNFASetToDFAStateMap.get(aSet);
+            final QualifiedName aElementName = GetElementName(aEntry.getValue());
+            final String sElementTypeName = GetElementTypeName(aEntry.getValue());
+            assert(aElementName != null);
+            final Transition aTransition = new Transition(
+                aStartState,
+                aDFAState,
+                aElementName,
+                sElementTypeName);
+            aStartState.AddTransition(aTransition);
+            maDFATransitions.add(aTransition);
+        }
+        
+        return aLocalWorklist;
+    }
+    
+    
+    
+    
+    private QualifiedName GetElementName (final Vector<Transition> aTransitions)
+    {
+        for (final Transition aTransition : aTransitions)
+            return aTransition.GetElementName();
+        return null;
+    }
+
+
+
+
+    private String GetElementTypeName (final Vector<Transition> aTransitions)
+    {
+        for (final Transition aTransition : aTransitions)
+            return aTransition.GetElementTypeName();
+        return null;
+    }
+
+
+
+
+    /** Return the epsilon closure of the given set of states.
+     *  The result is the set of all states that are reachable via zero, one or
+     *  more epsilon transitions from at least one state in the given set of
+     *  states.
+     */
+    private StateSet GetEpsilonClosure ( final StateSet aSet)
+    {
+        final StateSet aClosure = new StateSet(aSet);
+        
+        final Queue<State> aWorkList = new LinkedList<>();
+        for (final State aState : aSet.GetStates())
+            aWorkList.add(aState);
+        
+        while( ! aWorkList.isEmpty())
+        {
+            final State aState = aWorkList.poll();
+            for (final EpsilonTransition aTransition : aState.GetEpsilonTransitions())
+            {
+                final State aEndState = aTransition.GetEndState();
+                if ( ! aClosure.ContainsState(aEndState))
+                {
+                    aClosure.AddState(aEndState);
+                    aWorkList.add(aEndState);
+                }
+            }
+        }
+        
+        return aClosure;
+    }
+
+    
+    
+    
+    /** Return the list of regular transitions (i.e. not epsilon transitions)
+     *  that start from any of the states in the given set.
+     *  The returned map is a partition of the transitions according to their
+     *  triggering XML element.
+     */
+    private Map<String, Vector<Transition>> GetTransitionFront (final StateSet aSet)
+    {
+        final Map<String, Vector<Transition>> aTransitions = new HashMap<>();
+
+        for (final State aState : aSet.GetStates())
+            for (final Transition aTransition : aState.GetTransitions())
+            {
+                final String sElementName;
+                final QualifiedName aElementName = aTransition.GetElementName();
+                if (aElementName != null)
+                    sElementName = aElementName.GetDisplayName();
+                else
+                    sElementName = null; // For skip transitions.
+                    
+                Vector<Transition> aElementTransitions = aTransitions.get(sElementName);
+                if (aElementTransitions == null)
+                {
+                    aElementTransitions = new Vector<>();
+                    aTransitions.put(sElementName, aElementTransitions);
+                }
+                aElementTransitions.add(aTransition);
+            }        
+        return aTransitions;
+    }
+    
+
+    
+    
+    /** Return a state set that contains all end states of all the given transitions.
+     */
+    private StateSet GetEndStateSet (final Iterable<Transition> aTransitions)
+    {
+        final StateSet aStateSet = new StateSet();
+        for (final Transition aTransition : aTransitions)
+            aStateSet.AddState(aTransition.GetEndState());
+        return aStateSet;
+    }
+    
+    
+    
+    
+    /** Propagate accepting state flag and skip data.
+     */
+    private void PropagateStateFlags (
+        final StateSet aNFAStateSet,
+        final State aDFAState)
+    {
+        for (final State aNFAState : aNFAStateSet.GetStates())
+        {
+            if (aNFAState.IsAccepting())
+                aDFAState.SetIsAccepting();
+            
+            for (final SkipData aSkipData : aNFAState.GetSkipData())
+                aDFAState.AddSkipData(aSkipData.Clone(aDFAState));
+        }
+    }
+    
+    
+    
+    
+    private final StateContext maNFAStateContext;
+
+    private final Map<StateSet,State> maNFASetToDFAStateMap;
+    private final StateContext maDFAStateContext;
+    private final Set<Transition> maDFATransitions;
+    private final Vector<State> maAcceptingDFAStates;
+}

Added: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,49 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+/** Transition from one state to another that does not consume an input token.
+ * 
+ *  Use in the process of creating a validating parser.
+ */
+public class EpsilonTransition
+{
+    EpsilonTransition (
+        final State aStartState,
+        final State aEndState)
+    {
+        maStartState = aStartState;
+        maEndState = aEndState;
+    }
+    
+    
+    
+    
+    public State GetStartState ()
+    {
+        return maStartState;
+    }
+
+    
+    
+    
+    public State GetEndState ()
+    {
+        return maEndState;
+    }
+    
+
+    
+    
+    @Override
+    public String toString ()
+    {
+        return String.format("%s -> %s",
+            maStartState.GetFullname(),
+            maEndState.GetFullname());
+    }
+    
+
+    
+    
+    private final State maStartState;
+    private final State maEndState;
+}

Added: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,141 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+
+/** Represents a DFA (deterministic FA) or a NFA (non-deterministic FA).
+ *  There is one automaton for each complex type and one for the top level elements.
+ *  Transitions correspond to 'element' elements in the schema or a start tag in
+ *  the input file.  During parsing the current automaton is pushed on a stack
+ *  and the automaton that represents the complex type associated with the
+ *  starting element is made the current automaton.  An end tag pops an automaton
+ *  from the stack and replaces the current automaton with it. 
+ */
+public class FiniteAutomaton
+{
+    FiniteAutomaton (
+        final StateContext aContext)
+    {
+        maStateContext = aContext;
+    }
+    
+    
+    
+    
+    public int GetStateCount ()
+    {
+        return maStateContext.GetStateCount();
+    }
+    
+    
+    
+    
+    public Iterable<State> GetStates()
+    {
+        return maStateContext.GetStates();
+    }
+
+    
+    
+    
+    public Iterable<State> GetStatesSorted ()
+    {
+        return maStateContext.GetStatesSorted();
+    }
+
+    
+    
+    
+    public State GetStartState ()
+    {
+        return maStateContext.GetStartState();
+    }
+
+    
+    
+    
+    public Iterable<State> GetAcceptingStates ()
+    {
+        return maStateContext.GetAcceptingStates();
+    }
+
+    
+    
+    
+    public FiniteAutomaton CreateDFA (
+        final StateContainer aDFAContainer,
+        final QualifiedName aTypeName)
+    {
+        return DFACreator.CreateDFAforNFA(
+            aDFAContainer,
+            maStateContext,
+            aTypeName);
+    }
+
+    
+    
+    
+    public StateContext GetStateContext()
+    {
+        return maStateContext;
+    }
+
+
+
+
+    public Iterable<Transition> GetTransitions ()
+    {
+        final Vector<Transition> aTransitions = new Vector<>();
+        for (final State aState : maStateContext.GetStates())
+            for (final Transition aTransition : aState.GetTransitions())
+                aTransitions.add(aTransition);
+        return aTransitions;
+    }
+
+    
+    
+    
+    public int GetTransitionCount()
+    {
+        int nTransitionCount = 0;
+        for (final State aState : maStateContext.GetStates())
+            nTransitionCount += aState.GetTransitionCount();
+        return nTransitionCount;
+    }
+
+    
+    
+    
+    public String GetTypeName ()
+    {
+        return maStateContext.GetStartState().GetFullname();
+    }
+    
+    
+    
+    
+    private final StateContext maStateContext;
+}

Added: openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java
URL: http://svn.apache.org/viewvc/openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java?rev=1601582&view=auto
==============================================================================
--- openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java (added)
+++ openoffice/trunk/main/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java Tue Jun 10 09:35:39 2014
@@ -0,0 +1,155 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** As there is one FA for each complex type and one for the top level elements,
+ *  this container represents the whole set of schemas.
+ */
+public class FiniteAutomatonContainer
+{
+    FiniteAutomatonContainer (final StateContainer aStateContainer)
+    {
+        maComplexTypeNameToAutomatonMap = new HashMap<>();
+    }
+    
+    
+    
+
+    public void AddAutomaton (
+        final QualifiedName aElementName,
+        final FiniteAutomaton aAutomaton)
+    {
+        maComplexTypeNameToAutomatonMap.put(aElementName, aAutomaton);
+    }
+
+    
+    
+    
+    public Iterable<FiniteAutomaton> GetAutomatons()
+    {
+        return maComplexTypeNameToAutomatonMap.values();
+    }
+
+    
+    
+    
+    public int GetAutomatonCount ()
+    {
+        return maComplexTypeNameToAutomatonMap.size();
+    }
+
+
+
+
+    public Iterable<State> GetStates()
+    {
+        final Vector<State> aStates = new Vector<>();
+        for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+            for (final State aState : aAutomaton.GetStates())
+                aStates.add(aState);
+        return aStates;
+    }
+
+    
+    
+    
+    public int GetStateCount()
+    {
+        int nStateCount = 0;
+        for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+            nStateCount += aAutomaton.GetStateCount();
+        return nStateCount;
+    }
+
+
+
+
+    public Iterable<Transition> GetTransitions ()
+    {
+        final Vector<Transition> aTransitions = new Vector<>();
+        for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+            for (final Transition aTransition : aAutomaton.GetTransitions())
+                aTransitions.add(aTransition);
+        return aTransitions;
+    }
+
+    
+    
+    
+    public int GetTransitionCount ()
+    {
+        int nTransitionCount = 0;
+        for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+            nTransitionCount += aAutomaton.GetTransitionCount();
+        return nTransitionCount;
+    }
+
+    
+    
+    
+    public FiniteAutomatonContainer CreateDFAs ()
+    {
+        final StateContainer aDFAStateContainer = new StateContainer();
+        final FiniteAutomatonContainer aDFAs = new FiniteAutomatonContainer(aDFAStateContainer);
+        for (final Entry<QualifiedName, FiniteAutomaton> aEntry : maComplexTypeNameToAutomatonMap.entrySet())
+        {
+            aDFAs.AddAutomaton(
+                aEntry.getKey(),
+                aEntry.getValue().CreateDFA(
+                    aDFAStateContainer,
+                    aEntry.getKey()));
+        }
+        return aDFAs;
+    }
+    
+    
+    
+    
+    public FiniteAutomatonContainer MinimizeDFAs ()
+    {
+        PrintStream aLog = null;
+        try
+        {
+            aLog = new PrintStream(new FileOutputStream(new File("/tmp/minimization.log")));
+        }
+        catch(Exception e)
+        {
+            e.printStackTrace();
+            return null;
+        }
+            
+        final StateContainer aNewStateContainer = new StateContainer();
+        final FiniteAutomatonContainer aDFAs = new FiniteAutomatonContainer(aNewStateContainer);
+        for (final Entry<QualifiedName, FiniteAutomaton> aEntry : maComplexTypeNameToAutomatonMap.entrySet())
+        {
+            aDFAs.AddAutomaton(
+                aEntry.getKey(),
+                HopcroftMinimizer.MinimizeDFA(
+                    aNewStateContainer,
+                    aEntry.getValue().GetStateContext(),
+                    aLog));
+        }
+        return aDFAs;
+    }
+    
+    
+    
+    
+    public FiniteAutomaton GetTopLevelAutomaton ()
+    {
+        return maComplexTypeNameToAutomatonMap.get(null);
+    }
+
+    
+    
+    
+    private final Map<QualifiedName, FiniteAutomaton> maComplexTypeNameToAutomatonMap;
+}



Mime
View raw message