lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [19/33] lucenenet git commit: Lucene.Net.Benchmark: Added Sax and TagSoup to the Support folder.
Date Sun, 06 Aug 2017 17:59:17 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLSchema.tt
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLSchema.tt b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLSchema.tt
new file mode 100644
index 0000000..5f4a839
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/HTMLSchema.tt
@@ -0,0 +1,72 @@
+<#@ template debug="true" hostspecific="true" language="C#" #>
+<#@ assembly name="System.Xml" #>
+<#@ import namespace="System.IO" #>
+<#@ import namespace="System.Xml.Xsl" #>
+<#@ output extension=".Generated.cs" #>
+<# /*
+# -----------------------------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the ""License""); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -----------------------------------------------------------------------------------
+*/ #>
+//------------------------------------------------------------------------------
+// <auto-generated>
+//     This code was generated by a tool.
+//
+//     Changes to this file may cause incorrect behavior and will be lost if
+//     the code is regenerated.
+// </auto-generated>
+//------------------------------------------------------------------------------
+namespace TagSoup 
+{
+	/// <summary>
+	/// This class provides a Schema that has been preinitialized with HTML
+	/// elements, attributes, and character entity declarations.  All the declarations
+	/// normally provided with HTML 4.01 are given, plus some that are IE-specific
+	/// and NS4-specific.  Attribute declarations of type CDATA with no default
+	/// value are not included.
+	/// </summary>
+	public class HTMLSchema : Schema 
+	{
+		// HTMLModels begin
+		<#
+			XslCompiledTransform transform = new XslCompiledTransform(true);
+			transform.Load(this.Host.ResolvePath("tssl/tssl-models.xslt"));
+			using(StringWriter writer = new StringWriter()) 
+			{
+				transform.Transform(this.Host.ResolvePath("definitions/html.tssl"), null, writer);
+				Write(writer.ToString());
+			}
+		#>  // HTMLModels end
+
+		/// <summary>
+		/// Returns a newly constructed HTMLSchema object independent of
+		/// any existing ones.
+		/// </summary>
+		public HTMLSchema() 
+		{
+			<#
+			  transform.Load(this.Host.ResolvePath("tssl/tssl.xslt"));
+			  using(StringWriter writer = new StringWriter()) 
+			  {
+					transform.Transform(this.Host.ResolvePath("definitions/html.tssl"), null, writer);
+					Write(writer.ToString());
+			  }
+			#>
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs
new file mode 100644
index 0000000..711d46a
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXScanner.cs
@@ -0,0 +1,138 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// This file is part of TagSoup.
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.  You may also distribute
+// and/or modify it under version 2.1 of the Academic Free License.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+// 
+// 
+// PYX Scanner
+
+using System.IO;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// A <see cref="IScanner"/> that accepts PYX format instead of HTML.
+    /// Useful primarily for debugging.
+    /// </summary>
+    public class PYXScanner : IScanner
+    {
+        public virtual void ResetDocumentLocator(string publicid, string systemid)
+        {
+            // Need this method for interface compatibility, but note
+            // that PyxScanner does not implement Locator.
+        }
+
+        public virtual void Scan(TextReader br, IScanHandler h)
+        {
+            string s;
+            char[] buff = null;
+            bool instag = false;
+            while ((s = br.ReadLine()) != null)
+            {
+                int size = s.Length;
+                buff = s.ToCharArray(0, size);
+                if (buff.Length < size)
+                {
+                    buff = new char[size];
+                }
+                switch (buff[0])
+                {
+                    case '(':
+                        if (instag)
+                        {
+                            h.STagC(buff, 0, 0);
+                            instag = false;
+                        }
+                        h.GI(buff, 1, size - 1);
+                        instag = true;
+                        break;
+                    case ')':
+                        if (instag)
+                        {
+                            h.STagC(buff, 0, 0);
+                            instag = false;
+                        }
+                        h.ETag(buff, 1, size - 1);
+                        break;
+                    case '?':
+                        if (instag)
+                        {
+                            h.STagC(buff, 0, 0);
+                            instag = false;
+                        }
+                        h.PI(buff, 1, size - 1);
+                        break;
+                    case 'A':
+                        int sp = s.IndexOf(' ');
+                        h.Aname(buff, 1, sp - 1);
+                        h.Aval(buff, sp + 1, size - sp - 1);
+                        break;
+                    case '-':
+                        if (instag)
+                        {
+                            h.STagC(buff, 0, 0);
+                            instag = false;
+                        }
+                        if (s.Equals("-\\n"))
+                        {
+                            buff[0] = '\n';
+                            h.PCDATA(buff, 0, 1);
+                        }
+                        else
+                        {
+                            // FIXME:
+                            // Does not decode \t and \\ in input
+                            h.PCDATA(buff, 1, size - 1);
+                        }
+                        break;
+                    case 'E':
+                        if (instag)
+                        {
+                            h.STagC(buff, 0, 0);
+                            instag = false;
+                        }
+                        h.Entity(buff, 1, size - 1);
+                        break;
+                    default:
+                        //				System.err.print("Gotcha ");
+                        //				System.err.print(s);
+                        //				System.err.print('\n');
+                        break;
+                }
+            }
+            h.EOF(buff, 0, 0);
+        }
+
+        public void StartCDATA()
+        {
+        }
+
+        //public static void main(string[] argv)  {
+        //  IScanner s = new PYXScanner();
+        //  TextReader r = new StreamReader(System.Console.OpenStandardInput(), Encoding.UTF8);
+        //  TextWriter w = new StreamWriter(System.Console.OpenStandardOutput(), Encoding.UTF8));
+        //  s.Scan(r, new PYXWriter(w));
+        //  }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs
new file mode 100644
index 0000000..ff47d0d
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/PYXWriter.cs
@@ -0,0 +1,286 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// PYX Writer
+// FIXME: does not do escapes in attribute values
+// FIXME: outputs entities as bare '&' character
+
+using Sax;
+using Sax.Ext;
+using System.IO;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// A <see cref="IContentHandler"/> that generates PYX format instead of XML.
+    /// Primarily useful for debugging.
+    /// </summary>
+    public class PYXWriter : IScanHandler, IContentHandler, ILexicalHandler
+    {
+        private readonly TextWriter theWriter; // where we Write to
+        private static char[] dummy = new char[1];
+        private string attrName; // saved attribute name
+
+        // ScanHandler implementation
+
+        public void Adup(char[] buff, int offset, int length)
+        {
+            theWriter.WriteLine(attrName);
+            attrName = null;
+        }
+
+        public void Aname(char[] buff, int offset, int length)
+        {
+            theWriter.Write('A');
+            theWriter.Write(buff, offset, length);
+            theWriter.Write(' ');
+            attrName = new string(buff, offset, length);
+        }
+
+        public void Aval(char[] buff, int offset, int length)
+        {
+            theWriter.Write(buff, offset, length);
+            theWriter.WriteLine();
+            attrName = null;
+        }
+
+        public void Cmnt(char[] buff, int offset, int length)
+        {
+            //		theWriter.Write('!');
+            //		theWriter.Write(buff, offset, length);
+            //		theWriter.WriteLine();
+        }
+
+        public void Entity(char[] buff, int offset, int length)
+        {
+        }
+
+        public int GetEntity()
+        {
+            return 0;
+        }
+
+        public void EOF(char[] buff, int offset, int length)
+        {
+            theWriter.Close();
+        }
+
+        public void ETag(char[] buff, int offset, int length)
+        {
+            theWriter.Write(')');
+            theWriter.Write(buff, offset, length);
+            theWriter.WriteLine();
+        }
+
+        public void Decl(char[] buff, int offset, int length)
+        {
+        }
+
+        public void GI(char[] buff, int offset, int length)
+        {
+            theWriter.Write('(');
+            theWriter.Write(buff, offset, length);
+            theWriter.WriteLine();
+        }
+
+        public void CDSect(char[] buff, int offset, int length)
+        {
+            PCDATA(buff, offset, length);
+        }
+
+        public void PCDATA(char[] buff, int offset, int length)
+        {
+            if (length == 0)
+            {
+                return; // nothing to do
+            }
+            bool inProgress = false;
+            length += offset;
+            for (int i = offset; i < length; i++)
+            {
+                if (buff[i] == '\n')
+                {
+                    if (inProgress)
+                    {
+                        theWriter.WriteLine();
+                    }
+                    theWriter.WriteLine("-\\n");
+                    inProgress = false;
+                }
+                else
+                {
+                    if (!inProgress)
+                    {
+                        theWriter.Write('-');
+                    }
+                    switch (buff[i])
+                    {
+                        case '\t':
+                            theWriter.Write("\\t");
+                            break;
+                        case '\\':
+                            theWriter.Write("\\\\");
+                            break;
+                        default:
+                            theWriter.Write(buff[i]);
+                            break;
+                    }
+                    inProgress = true;
+                }
+            }
+            if (inProgress)
+            {
+                theWriter.WriteLine();
+            }
+        }
+
+        public void PITarget(char[] buff, int offset, int length)
+        {
+            theWriter.Write('?');
+            theWriter.Write(buff, offset, length);
+            theWriter.Write(' ');
+        }
+
+        public void PI(char[] buff, int offset, int length)
+        {
+            theWriter.Write(buff, offset, length);
+            theWriter.WriteLine();
+        }
+
+        public void STagC(char[] buff, int offset, int length)
+        {
+            //		theWriter.WriteLine("!");			// FIXME
+        }
+
+        public void STagE(char[] buff, int offset, int length)
+        {
+            theWriter.WriteLine("!"); // FIXME
+        }
+
+        // SAX ContentHandler implementation
+
+        public void Characters(char[] buff, int offset, int length)
+        {
+            PCDATA(buff, offset, length);
+        }
+
+        public void EndDocument()
+        {
+            theWriter.Close();
+        }
+
+        public void EndElement(string uri, string localname, string qname)
+        {
+            if (qname.Length == 0)
+            {
+                qname = localname;
+            }
+            theWriter.Write(')');
+            theWriter.WriteLine(qname);
+        }
+
+        public void EndPrefixMapping(string prefix)
+        {
+        }
+
+        public void IgnorableWhitespace(char[] buff, int offset, int length)
+        {
+            Characters(buff, offset, length);
+        }
+
+        public void ProcessingInstruction(string target, string data)
+        {
+            theWriter.Write('?');
+            theWriter.Write(target);
+            theWriter.Write(' ');
+            theWriter.WriteLine(data);
+        }
+
+        public void SetDocumentLocator(ILocator locator)
+        {
+        }
+
+        public void SkippedEntity(string name)
+        {
+        }
+
+        public void StartDocument()
+        {
+        }
+
+        public void StartElement(string uri, string localname, string qname, IAttributes atts)
+        {
+            if (qname.Length == 0)
+            {
+                qname = localname;
+            }
+            theWriter.Write('(');
+            theWriter.WriteLine(qname);
+            int length = atts.Length;
+            for (int i = 0; i < length; i++)
+            {
+                qname = atts.GetQName(i);
+                if (qname.Length == 0)
+                {
+                    qname = atts.GetLocalName(i);
+                }
+                theWriter.Write('A');
+                //			theWriter.Write(atts.getType(i));	// DEBUG
+                theWriter.Write(qname);
+                theWriter.Write(' ');
+                theWriter.WriteLine(atts.GetValue(i));
+            }
+        }
+
+        public void StartPrefixMapping(string prefix, string uri)
+        {
+        }
+
+        public void Comment(char[] ch, int start, int length)
+        {
+            Cmnt(ch, start, length);
+        }
+
+        public void EndCDATA()
+        {
+        }
+
+        public void EndDTD()
+        {
+        }
+
+        public void EndEntity(string name)
+        {
+        }
+
+        public void StartCDATA()
+        {
+        }
+
+        public void StartDTD(string name, string publicId, string systemId)
+        {
+        }
+
+        public void StartEntity(string name)
+        {
+        }
+
+        // Constructor
+
+        public PYXWriter(TextWriter w)
+        {
+            theWriter = w;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs
new file mode 100644
index 0000000..a0a5463
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs
@@ -0,0 +1,1484 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// The TagSoup parser
+
+using Lucene.Net.Support;
+using Sax;
+using Sax.Ext;
+using Sax.Helpers;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace TagSoup
+{
+    /// <summary>
+    ///   The SAX parser class.
+    /// </summary>
+    public class Parser : DefaultHandler, IScanHandler, IXMLReader, ILexicalHandler
+    {
+        // XMLReader implementation
+
+        private IContentHandler theContentHandler;
+        private ILexicalHandler theLexicalHandler;
+        private IDTDHandler theDTDHandler;
+        private IErrorHandler theErrorHandler;
+        private IEntityResolver theEntityResolver;
+        private Schema theSchema;
+        private IScanner theScanner;
+        private IAutoDetector theAutoDetector;
+
+        // Default values for feature flags
+
+        private const bool DEFAULT_NAMESPACES = true;
+        private const bool DEFAULT_IGNORE_BOGONS = false;
+        private const bool DEFAULT_BOGONS_EMPTY = false;
+        private const bool DEFAULT_ROOT_BOGONS = true;
+        private const bool DEFAULT_DEFAULT_ATTRIBUTES = true;
+        private const bool DEFAULT_TRANSLATE_COLONS = false;
+        private const bool DEFAULT_RESTART_ELEMENTS = true;
+        private const bool DEFAULT_IGNORABLE_WHITESPACE = false;
+        private const bool DEFAULT_CDATA_ELEMENTS = true;
+
+        // Feature flags.  
+
+        private bool namespaces = DEFAULT_NAMESPACES;
+        private bool ignoreBogons = DEFAULT_IGNORE_BOGONS;
+        private bool bogonsEmpty = DEFAULT_BOGONS_EMPTY;
+        private bool rootBogons = DEFAULT_ROOT_BOGONS;
+        private bool defaultAttributes = DEFAULT_DEFAULT_ATTRIBUTES;
+        private bool translateColons = DEFAULT_TRANSLATE_COLONS;
+        private bool restartElements = DEFAULT_RESTART_ELEMENTS;
+        private bool ignorableWhitespace = DEFAULT_IGNORABLE_WHITESPACE;
+        private bool cDataElements = DEFAULT_CDATA_ELEMENTS;
+
+        /// <summary>
+        ///   A value of "true" indicates namespace URIs and unprefixed local
+        ///   names for element and attribute names will be available.
+        /// </summary>
+        public const string NAMESPACES_FEATURE = "http://xml.org/sax/features/namespaces";
+
+        /// <summary>
+        ///   A value of "true" indicates that XML qualified names (with prefixes)
+        ///   and attributes (including xmlns* attributes) will be available.
+        ///   We don't support this value.
+        /// </summary>
+        public const string NAMESPACE_PREFIXES_FEATURE = "http://xml.org/sax/features/namespace-prefixes";
+
+        /// <summary>
+        ///   Reports whether this parser processes external general entities
+        ///   (it doe
+        /// </summary>
+        public const string EXTERNAL_GENERAL_ENTITIES_FEATURE = "http://xml.org/sax/features/external-general-entities";
+
+        /// <summary>
+        ///   Reports whether this parser processes external parameter entities
+        ///   (it doesn't).
+        /// </summary>
+        public const string EXTERNAL_PARAMETER_ENTITIES_FEATURE = "http://xml.org/sax/features/external-parameter-entities";
+
+        /// <summary>
+        ///   May be examined only during a parse, after the startDocument()
+        ///   callback has been completed; read-only. The value is true if
+        ///   the document specified standalone="yes" in its XML declaration,
+        ///   and otherwise is false.  (It's always false.)
+        /// </summary>
+        public const string IS_STANDALONE_FEATURE = "http://xml.org/sax/features/is-standalone";
+
+        /// <summary>
+        ///   A value of "true" indicates that the LexicalHandler will report
+        ///   the beginning and end of parameter entities (it won't).
+        /// </summary>
+        public const string LEXICAL_HANDLER_PARAMETER_ENTITIES_FEATURE =
+            "http://xml.org/sax/features/lexical-handler/parameter-entities";
+
+        /// <summary>
+        ///   A value of "true" indicates that system IDs in declarations will
+        ///   be absolutized (relative to their base URIs) before reporting.
+        ///   (This returns true but doesn't actually do anything.)
+        /// </summary>
+        public const string RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris";
+
+        /// <summary>
+        /// Has a value of "true" if all XML names (for elements,
+        /// prefixes, attributes, entities, notations, and local
+        /// names), as well as Namespace URIs, will have been interned
+        /// using <see cref="string.Intern" />. This supports fast testing of
+        /// equality/inequality against string constants, rather than forcing
+        /// slower calls to <see cref="string.Equals(object)" />.  (We always intern.)
+        /// </summary>
+        public const string STRING_INTERNING_FEATURE = "http://xml.org/sax/features/string-interning";
+
+        /// <summary>
+        /// Returns "true" if the Attributes objects passed by this
+        /// parser in <see cref="IContentHandler.StartElement" /> implement the
+        /// <see cref="Sax.Net.Ext.IAttributes2" /> interface.	(They don't.)
+        /// </summary>
+        public const string USE_ATTRIBUTES2_FEATURE = "http://xml.org/sax/features/use-attributes2";
+
+        /// <summary>
+        ///   Returns "true" if the Locator objects passed by this parser
+        ///   parser in <see cref="IContentHandler.SetDocumentLocator" /> implement the
+        ///   <see cref="Sax.Net.Ext.ILocator2" /> interface.  (They don't.)
+        /// </summary>
+        public const string USE_LOCATOR2_FEATURE = "http://xml.org/sax/features/use-locator2";
+        /// <summary>
+        ///   Returns "true" if, when setEntityResolver is given an object
+        ///   implementing the  <see cref="Sax.Net.Ext.IEntityResolver2" /> interface,
+        ///   those new methods will be used.  (They won't be.)
+        /// </summary>
+        public const string USE_ENTITY_RESOLVER2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2";
+
+        /// <summary>
+        ///   Controls whether the parser is reporting all validity errors
+        ///   (We don't report any validity errors.)
+        /// </summary>
+        public const string VALIDATION_FEATURE = "http://xml.org/sax/features/validation";
+
+        /// <summary>
+        ///   Controls whether the parser reports Unicode normalization
+        ///   errors as described in section 2.13 and Appendix B of the XML
+        ///   1.1 Recommendation.  (We don't normalize.)
+        /// </summary>
+        public const string UNICODE_NORMALIZATION_CHECKING_FEATURE =
+            "http://xml.org/sax/features/unicode-normalization-checking";
+
+        /// <summary>
+        ///   Controls whether, when the namespace-prefixes feature is set,
+        ///   the parser treats namespace declaration attributes as being in
+        ///   the http://www.w3.org/2000/xmlns/ namespace.  (It doesn't.)
+        /// </summary>
+        public const string XMLNS_URIS_FEATURE = "http://xml.org/sax/features/xmlns-uris";
+
+        /// <summary>
+        ///   Returns <c>true</c> if the parser supports both XML 1.1 and XML 1.0.
+        ///   (Always <c>false</c>.)
+        /// </summary>
+        public const string XML11_FEATURE = "http://xml.org/sax/features/xml-1.1";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will ignore
+        ///   unknown elements.
+        /// </summary>
+        public const string IGNORE_BOGONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will give unknown
+        ///   elements a content model of EMPTY; a value of <c>false</c>, a
+        ///   content model of ANY.
+        /// </summary>
+        public const string BOGONS_EMPTY_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will allow unknown
+        ///   elements to be the root element.
+        /// </summary>
+        public const string ROOT_BOGONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/root-bogons";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will return default
+        ///   attribute values for missing attributes that have default values.
+        /// </summary>
+        public const string DEFAULT_ATTRIBUTES_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/default-attributes";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will
+        ///   translate colons into underscores in names.
+        /// </summary>
+        public const string TRANSLATE_COLONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/translate-colons";
+
+        /// <summary>
+        ///   A value of <c>true</c> indicates that the parser will
+        ///   attempt to restart the restartable elements.
+        /// </summary>
+        public const string RESTART_ELEMENTS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/restart-elements";
+
+        /// <summary>
+        ///   A value of "true" indicates that the parser will
+        ///   transmit whitespace in element-only content via the SAX
+        ///   ignorableWhitespace callback.  Normally this is not done,
+        ///   because HTML is an SGML application and SGML suppresses
+        ///   such whitespace.
+        /// </summary>
+        public const string IGNORABLE_WHITESPACE_FEATURE =
+            "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace";
+
+        /// <summary>
+        ///   A value of "true" indicates that the parser will treat CDATA
+        ///   elements specially.  Normally true, since the input is by
+        ///   default HTML.
+        /// </summary>
+        public const string CDATA_ELEMENTS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/cdata-elements";
+
+        /// <summary>
+        ///   Used to see some syntax events that are essential in some
+        ///   applications: comments, CDATA delimiters, selected general
+        ///   entity inclusions, and the start and end of the DTD (and
+        ///   declaration of document element name). The Object must implement
+        ///   <see cref="ILexicalHandler" />
+        /// </summary>
+        public const string LEXICAL_HANDLER_PROPERTY = "http://xml.org/sax/properties/lexical-handler";
+
+        /// <summary>
+        ///   Specifies the Scanner object this Parser uses.
+        /// </summary>
+        public const string SCANNER_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/scanner";
+
+        /// <summary>
+        ///   Specifies the Schema object this Parser uses.
+        /// </summary>
+        public const string SCHEMA_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/schema";
+
+        /// <summary>
+        ///   Specifies the AutoDetector (for encoding detection) this Parser uses.
+        /// </summary>
+        public const string AUTO_DETECTOR_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector";
+
+
+        // Due to sucky Java order of initialization issues, these
+        // entries are maintained separately from the initial values of
+        // the corresponding instance variables, but care must be taken
+        // to keep them in sync.
+
+        private readonly Hashtable features = new Hashtable {
+            { NAMESPACES_FEATURE, DEFAULT_NAMESPACES },
+            { NAMESPACE_PREFIXES_FEATURE, false },
+            { EXTERNAL_GENERAL_ENTITIES_FEATURE, false },
+            { EXTERNAL_PARAMETER_ENTITIES_FEATURE, false },
+            { IS_STANDALONE_FEATURE, false },
+            { LEXICAL_HANDLER_PARAMETER_ENTITIES_FEATURE, false },
+            { RESOLVE_DTD_URIS_FEATURE, true },
+            { STRING_INTERNING_FEATURE, true },
+            { USE_ATTRIBUTES2_FEATURE, false },
+            { USE_LOCATOR2_FEATURE, false },
+            { USE_ENTITY_RESOLVER2_FEATURE, false },
+            { VALIDATION_FEATURE, false },
+            { XMLNS_URIS_FEATURE, false },
+            { XML11_FEATURE, false },
+            { IGNORE_BOGONS_FEATURE, DEFAULT_IGNORE_BOGONS },
+            { BOGONS_EMPTY_FEATURE, DEFAULT_BOGONS_EMPTY },
+            { ROOT_BOGONS_FEATURE, DEFAULT_ROOT_BOGONS },
+            { DEFAULT_ATTRIBUTES_FEATURE, DEFAULT_DEFAULT_ATTRIBUTES },
+            { TRANSLATE_COLONS_FEATURE, DEFAULT_TRANSLATE_COLONS },
+            { RESTART_ELEMENTS_FEATURE, DEFAULT_RESTART_ELEMENTS },
+            { IGNORABLE_WHITESPACE_FEATURE, DEFAULT_IGNORABLE_WHITESPACE },
+            { CDATA_ELEMENTS_FEATURE, DEFAULT_CDATA_ELEMENTS },
+        };
+
+        public virtual bool GetFeature(string name)
+        {
+            if (features.ContainsKey(name))
+            {
+                return (bool)features[name];
+            }
+            throw new SAXNotRecognizedException("Unknown feature " + name);
+        }
+
+        public virtual void SetFeature(string name, bool value)
+        {
+            if (false == features.ContainsKey(name))
+            {
+                throw new SAXNotRecognizedException("Unknown feature " + name);
+            }
+            features[name] = value;
+
+            if (name.Equals(NAMESPACES_FEATURE))
+            {
+                namespaces = value;
+            }
+            else if (name.Equals(IGNORE_BOGONS_FEATURE))
+            {
+                ignoreBogons = value;
+            }
+            else if (name.Equals(BOGONS_EMPTY_FEATURE))
+            {
+                bogonsEmpty = value;
+            }
+            else if (name.Equals(ROOT_BOGONS_FEATURE))
+            {
+                rootBogons = value;
+            }
+            else if (name.Equals(DEFAULT_ATTRIBUTES_FEATURE))
+            {
+                defaultAttributes = value;
+            }
+            else if (name.Equals(TRANSLATE_COLONS_FEATURE))
+            {
+                translateColons = value;
+            }
+            else if (name.Equals(RESTART_ELEMENTS_FEATURE))
+            {
+                restartElements = value;
+            }
+            else if (name.Equals(IGNORABLE_WHITESPACE_FEATURE))
+            {
+                ignorableWhitespace = value;
+            }
+            else if (name.Equals(CDATA_ELEMENTS_FEATURE))
+            {
+                cDataElements = value;
+            }
+        }
+
+        public virtual object GetProperty(string name)
+        {
+            if (name.Equals(LEXICAL_HANDLER_PROPERTY))
+            {
+                return theLexicalHandler == this ? null : theLexicalHandler;
+            }
+            if (name.Equals(SCANNER_PROPERTY))
+            {
+                return theScanner;
+            }
+            if (name.Equals(SCHEMA_PROPERTY))
+            {
+                return theSchema;
+            }
+            if (name.Equals(AUTO_DETECTOR_PROPERTY))
+            {
+                return theAutoDetector;
+            }
+            throw new SAXNotRecognizedException("Unknown property " + name);
+        }
+
+        public virtual void SetProperty(string name, object value)
+        {
+            if (name.Equals(LEXICAL_HANDLER_PROPERTY))
+            {
+                if (value == null)
+                {
+                    theLexicalHandler = this;
+                }
+                else
+                {
+                    var handler = value as ILexicalHandler;
+                    if (handler != null)
+                    {
+                        theLexicalHandler = handler;
+                    }
+                    else
+                    {
+                        throw new SAXNotSupportedException("Your lexical handler is not a ILexicalHandler");
+                    }
+                }
+            }
+            else if (name.Equals(SCANNER_PROPERTY))
+            {
+                var scanner = value as IScanner;
+                if (scanner != null)
+                {
+                    theScanner = scanner;
+                }
+                else
+                {
+                    throw new SAXNotSupportedException("Your scanner is not a IScanner");
+                }
+            }
+            else if (name.Equals(SCHEMA_PROPERTY))
+            {
+                var schema = value as Schema;
+                if (schema != null)
+                {
+                    theSchema = schema;
+                }
+                else
+                {
+                    throw new SAXNotSupportedException("Your schema is not a Schema");
+                }
+            }
+            else if (name.Equals(AUTO_DETECTOR_PROPERTY))
+            {
+                var detector = value as IAutoDetector;
+                if (detector != null)
+                {
+                    theAutoDetector = detector;
+                }
+                else
+                {
+                    throw new SAXNotSupportedException("Your auto-detector is not an IAutoDetector");
+                }
+            }
+            else
+            {
+                throw new SAXNotRecognizedException("Unknown property " + name);
+            }
+        }
+
+        public virtual IEntityResolver EntityResolver
+        {
+            get { return theEntityResolver == this ? null : theEntityResolver; }
+            set { theEntityResolver = value ?? this; }
+        }
+
+        public virtual IDTDHandler DTDHandler
+        {
+            get { return theDTDHandler == this ? null : theDTDHandler; }
+            set { theDTDHandler = value ?? this; }
+        }
+
+        public virtual IContentHandler ContentHandler
+        {
+            get { return theContentHandler == this ? null : theContentHandler; }
+            set { theContentHandler = value ?? this; }
+        }
+
+        public virtual IErrorHandler ErrorHandler
+        {
+            get { return theErrorHandler == this ? null : theErrorHandler; }
+            set { theErrorHandler = value ?? this; }
+        }
+
+        public virtual void Parse(InputSource input)
+        {
+            Setup();
+            TextReader r = GetReader(input);
+            theContentHandler.StartDocument();
+            theScanner.ResetDocumentLocator(input.PublicId, input.SystemId);
+            var locator = theScanner as ILocator;
+            if (locator != null)
+            {
+                theContentHandler.SetDocumentLocator(locator);
+            }
+            if (!(theSchema.Uri.Equals("")))
+            {
+                theContentHandler.StartPrefixMapping(theSchema.Prefix, theSchema.Uri);
+            }
+            theScanner.Scan(r, this);
+        }
+
+        public virtual void Parse(string systemid)
+        {
+            Parse(new InputSource(systemid));
+        }
+
+        // Sets up instance variables that haven't been set by setFeature
+        private void Setup()
+        {
+            if (theSchema == null)
+            {
+                theSchema = new HTMLSchema();
+            }
+            if (theScanner == null)
+            {
+                theScanner = new HTMLScanner();
+            }
+            if (theAutoDetector == null)
+            {
+                theAutoDetector = new AutoDetectorDelegate(stream => new StreamReader(stream));
+            }
+            theStack = new Element(theSchema.GetElementType("<root>"), defaultAttributes);
+            thePCDATA = new Element(theSchema.GetElementType("<pcdata>"), defaultAttributes);
+            theNewElement = null;
+            theAttributeName = null;
+            thePITarget = null;
+            theSaved = null;
+            theEntity = 0;
+            virginStack = true;
+            theDoctypeName = theDoctypePublicId = theDoctypeSystemId = null;
+        }
+
+        /// <summary>
+        /// Return a <see cref="TextReader"/> based on the contents of an <see cref="InputSource"/>
+        /// Buffer the Stream
+        /// </summary>
+        /// <param name="s"></param>
+        /// <returns></returns>
+        private TextReader GetReader(InputSource s)
+        {
+            TextReader r = s.TextReader;
+            Stream i = s.Stream;
+            Encoding encoding = s.Encoding;
+            string publicid = s.PublicId;
+            string systemid = s.SystemId;
+            if (r == null)
+            {
+                if (i == null)
+                {
+                    i = GetInputStream(publicid, systemid);
+                }
+                if (!(i is BufferedStream))
+                {
+                    i = new BufferedStream(i);
+                }
+                if (encoding == null)
+                {
+                    r = theAutoDetector.AutoDetectingReader(i);
+                }
+                else
+                {
+                    //try {
+                    //TODO: Safe?
+                    r = new StreamReader(i, encoding);
+                    //  }
+                    //catch (UnsupportedEncodingException e) {
+                    //  r = new StreamReader(i);
+                    //  }
+                }
+            }
+            //		r = new BufferedReader(r);
+            return r;
+        }
+
+        /// <summary>
+        ///   Get an Stream based on a publicid and a systemid
+        ///   We don't process publicids (who uses them anyhow?)
+        /// </summary>
+        /// <param name="publicid"></param>
+        /// <param name="systemid"></param>
+        /// <returns></returns>
+        private Stream GetInputStream(string publicid, string systemid)
+        {
+            var basis = new Uri("file://" + Environment.CurrentDirectory + Path.DirectorySeparatorChar);
+            var url = new Uri(basis, systemid);
+            return new FileStream(url.LocalPath, FileMode.Open, FileAccess.Read, FileShare.Read);
+        }
+
+        // ScanHandler implementation
+
+        private Element theNewElement;
+        private string theAttributeName;
+        private bool theDoctypeIsPresent;
+        private string theDoctypePublicId;
+        private string theDoctypeSystemId;
+        private string theDoctypeName;
+        private string thePITarget;
+        private Element theStack;
+        private Element theSaved;
+        private Element thePCDATA;
+        private int theEntity; // needs to support chars past U+FFFF
+
+
+        public virtual void Adup(char[] buff, int offset, int length)
+        {
+            if (theNewElement == null || theAttributeName == null)
+            {
+                return;
+            }
+            theNewElement.SetAttribute(theAttributeName, null, theAttributeName);
+            theAttributeName = null;
+        }
+
+        public virtual void Aname(char[] buff, int offset, int length)
+        {
+            if (theNewElement == null)
+            {
+                return;
+            }
+            // Currently we don't rely on Schema to canonicalize
+            // attribute names.
+            theAttributeName = MakeName(buff, offset, length).ToLowerInvariant();
+            //		System.err.println("%% Attribute name " + theAttributeName);
+        }
+
+        public virtual void Aval(char[] buff, int offset, int length)
+        {
+            if (theNewElement == null || theAttributeName == null)
+            {
+                return;
+            }
+            var value = new string(buff, offset, length);
+            //		System.err.println("%% Attribute value [" + value + "]");
+            value = ExpandEntities(value);
+            theNewElement.SetAttribute(theAttributeName, null, value);
+            theAttributeName = null;
+            //		System.err.println("%% Aval done");
+        }
+
+        /// <summary>
+        ///   Expand entity references in attribute values selectively.
+        ///   Currently we expand a reference iff it is properly terminated
+        ///   with a semicolon.
+        /// </summary>
+        /// <param name="src"></param>
+        /// <returns></returns>
+        private string ExpandEntities(string src)
+        {
+            int refStart = -1;
+            int len = src.Length;
+            var dst = new char[len];
+            int dstlen = 0;
+            for (int i = 0; i < len; i++)
+            {
+                char ch = src[i];
+                dst[dstlen++] = ch;
+                //			System.err.print("i = " + i + ", d = " + dstlen + ", ch = [" + ch + "] ");
+                if (ch == '&' && refStart == -1)
+                {
+                    // start of a ref excluding &
+                    refStart = dstlen;
+                    //				System.err.println("start of ref");
+                }
+                else if (refStart == -1)
+                {
+                    // not in a ref
+                    //				System.err.println("not in ref");
+                }
+                else if (char.IsLetter(ch) || char.IsDigit(ch) || ch == '#')
+                {
+                    // valid entity char
+                    //				System.err.println("valid");
+                }
+                else if (ch == ';')
+                {
+                    // properly terminated ref
+                    //				System.err.print("got [" + new string(dst, refStart, dstlen-refStart-1) + "]");
+                    int ent = LookupEntity(dst, refStart, dstlen - refStart - 1);
+                    //				System.err.println(" = " + ent);
+                    if (ent > 0xFFFF)
+                    {
+                        ent -= 0x10000;
+                        dst[refStart - 1] = (char)((ent >> 10) + 0xD800);
+                        dst[refStart] = (char)((ent & 0x3FF) + 0xDC00);
+                        dstlen = refStart + 1;
+                    }
+                    else if (ent != 0)
+                    {
+                        dst[refStart - 1] = (char)ent;
+                        dstlen = refStart;
+                    }
+                    refStart = -1;
+                }
+                else
+                {
+                    // improperly terminated ref
+                    //				System.err.println("end of ref");
+                    refStart = -1;
+                }
+            }
+            return new string(dst, 0, dstlen);
+        }
+
+        public virtual void Entity(char[] buff, int offset, int length)
+        {
+            theEntity = LookupEntity(buff, offset, length);
+        }
+
+        /// <summary>
+        ///   Process numeric character references,
+        ///   deferring to the schema for named ones.
+        /// </summary>
+        /// <param name="buff"></param>
+        /// <param name="offset"></param>
+        /// <param name="length"></param>
+        /// <returns></returns>
+        private int LookupEntity(char[] buff, int offset, int length)
+        {
+            int result = 0;
+            if (length < 1)
+            {
+                return result;
+            }
+            //		System.err.println("%% Entity at " + offset + " " + length);
+            //		System.err.println("%% Got entity [" + new string(buff, offset, length) + "]");
+            if (buff[offset] == '#')
+            {
+                if (length > 1 && (buff[offset + 1] == 'x' || buff[offset + 1] == 'X'))
+                {
+                    try
+                    {
+                        return Convert.ToInt32(new string(buff, offset + 2, length - 2), 16);
+                    }
+                    catch (FormatException)
+                    {
+                        return 0;
+                    }
+                }
+                try
+                {
+                    return Convert.ToInt32(new string(buff, offset + 1, length - 1), 10);
+                }
+                catch (FormatException)
+                {
+                    return 0;
+                }
+            }
+            return theSchema.GetEntity(new string(buff, offset, length));
+        }
+
+        public virtual void EOF(char[] buff, int offset, int length)
+        {
+            if (virginStack)
+            {
+                Rectify(thePCDATA);
+            }
+            while (theStack.Next != null)
+            {
+                Pop();
+            }
+            if (!(theSchema.Uri.Equals("")))
+            {
+                theContentHandler.EndPrefixMapping(theSchema.Prefix);
+            }
+            theContentHandler.EndDocument();
+        }
+
+        public virtual void ETag(char[] buff, int offset, int length)
+        {
+            if (ETagCdata(buff, offset, length))
+            {
+                return;
+            }
+            ETagBasic(buff, offset, length);
+        }
+
+        private static readonly char[] etagchars = { '<', '/', '>' };
+        public virtual bool ETagCdata(char[] buff, int offset, int length)
+        {
+            string currentName = theStack.Name;
+            // If this is a CDATA element and the tag doesn't match,
+            // or isn't properly formed (junk after the name),
+            // restart CDATA mode and process the tag as characters.
+            if (cDataElements && (theStack.Flags & Schema.F_CDATA) != 0)
+            {
+                bool realTag = (length == currentName.Length);
+                if (realTag)
+                {
+                    for (int i = 0; i < length; i++)
+                    {
+                        if (char.ToLower(buff[offset + i]) != char.ToLower(currentName[i]))
+                        {
+                            realTag = false;
+                            break;
+                        }
+                    }
+                }
+                if (!realTag)
+                {
+                    theContentHandler.Characters(etagchars, 0, 2);
+                    theContentHandler.Characters(buff, offset, length);
+                    theContentHandler.Characters(etagchars, 2, 1);
+                    theScanner.StartCDATA();
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        public virtual void ETagBasic(char[] buff, int offset, int length)
+        {
+            theNewElement = null;
+            string name;
+            if (length != 0)
+            {
+                // Canonicalize case of name
+                name = MakeName(buff, offset, length);
+                //			System.err.println("got etag [" + name + "]");
+                ElementType type = theSchema.GetElementType(name);
+                if (type == null)
+                {
+                    return; // mysterious end-tag
+                }
+                name = type.Name;
+            }
+            else
+            {
+                name = theStack.Name;
+            }
+            //		System.err.println("%% Got end of " + name);
+
+            Element sp;
+            bool inNoforce = false;
+            for (sp = theStack; sp != null; sp = sp.Next)
+            {
+                if (sp.Name.Equals(name))
+                {
+                    break;
+                }
+                if ((sp.Flags & Schema.F_NOFORCE) != 0)
+                {
+                    inNoforce = true;
+                }
+            }
+
+            if (sp == null)
+            {
+                return; // Ignore unknown etags
+            }
+            if (sp.Next == null || sp.Next.Next == null)
+            {
+                return;
+            }
+            if (inNoforce)
+            {
+                // inside an F_NOFORCE element?
+                sp.Preclose(); // preclose the matching element
+            }
+            else
+            {
+                // restartably pop everything above us
+                while (theStack != sp)
+                {
+                    RestartablyPop();
+                }
+                Pop();
+            }
+            // pop any preclosed elements now at the top
+            while (theStack.IsPreclosed)
+            {
+                Pop();
+            }
+            Restart(null);
+        }
+
+        /// <summary>
+        ///   Push restartables on the stack if possible
+        ///   e is the next element to be started, if we know what it is
+        /// </summary>
+        /// <param name="e"></param>
+        private void Restart(Element e)
+        {
+            while (theSaved != null && theStack.CanContain(theSaved) && (e == null || theSaved.CanContain(e)))
+            {
+                Element next = theSaved.Next;
+                Push(theSaved);
+                theSaved = next;
+            }
+        }
+
+        /// <summary>
+        ///   Pop the stack irrevocably
+        /// </summary>
+        private void Pop()
+        {
+            if (theStack == null)
+            {
+                return; // empty stack
+            }
+            string name = theStack.Name;
+            string localName = theStack.LocalName;
+            string ns = theStack.Namespace;
+            string prefix = PrefixOf(name);
+
+            //		System.err.println("%% Popping " + name);
+            if (!namespaces)
+            {
+                ns = localName = "";
+            }
+            theContentHandler.EndElement(ns, localName, name);
+            if (Foreign(prefix, ns))
+            {
+                theContentHandler.EndPrefixMapping(prefix);
+                //			System.err.println("%% Unmapping [" + prefix + "] for elements to " + namespace);
+            }
+            Attributes atts = theStack.Attributes;
+            for (int i = atts.Length - 1; i >= 0; i--)
+            {
+                string attNamespace = atts.GetURI(i);
+                string attPrefix = PrefixOf(atts.GetQName(i));
+                if (Foreign(attPrefix, attNamespace))
+                {
+                    theContentHandler.EndPrefixMapping(attPrefix);
+                    //			System.err.println("%% Unmapping [" + attPrefix + "] for attributes to " + attNamespace);
+                }
+            }
+            theStack = theStack.Next;
+        }
+
+        /// <summary>
+        ///   Pop the stack restartably
+        /// </summary>
+        private void RestartablyPop()
+        {
+            Element popped = theStack;
+            Pop();
+            if (restartElements && (popped.Flags & Schema.F_RESTART) != 0)
+            {
+                popped.Anonymize();
+                popped.Next = theSaved;
+                theSaved = popped;
+            }
+        }
+
+        // Push element onto stack
+        private bool virginStack = true;
+        private void Push(Element e)
+        {
+            string name = e.Name;
+            string localName = e.LocalName;
+            string ns = e.Namespace;
+            string prefix = PrefixOf(name);
+
+            //		System.err.println("%% Pushing " + name);
+            e.Clean();
+            if (!namespaces)
+            {
+                ns = localName = "";
+            }
+            if (virginStack && localName.Equals(theDoctypeName, StringComparison.OrdinalIgnoreCase))
+            {
+                try
+                {
+                    theEntityResolver.ResolveEntity(theDoctypePublicId, theDoctypeSystemId);
+                }
+                catch (IOException)
+                {
+                } // Can't be thrown for root I believe.
+            }
+            if (Foreign(prefix, ns))
+            {
+                theContentHandler.StartPrefixMapping(prefix, ns);
+                //			System.err.println("%% Mapping [" + prefix + "] for elements to " + namespace);
+            }
+            Attributes atts = e.Attributes;
+            int len = atts.Length;
+            for (int i = 0; i < len; i++)
+            {
+                string attNamespace = atts.GetURI(i);
+                string attPrefix = PrefixOf(atts.GetQName(i));
+                if (Foreign(attPrefix, attNamespace))
+                {
+                    theContentHandler.StartPrefixMapping(attPrefix, attNamespace);
+                    //				System.err.println("%% Mapping [" + attPrefix + "] for attributes to " + attNamespace);
+                }
+            }
+            theContentHandler.StartElement(ns, localName, name, e.Attributes);
+            e.Next = theStack;
+            theStack = e;
+            virginStack = false;
+            if (cDataElements && (theStack.Flags & Schema.F_CDATA) != 0)
+            {
+                theScanner.StartCDATA();
+            }
+        }
+
+        /// <summary>
+        ///   Get the prefix from a QName
+        /// </summary>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        private static string PrefixOf(string name)
+        {
+            int i = name.IndexOf(':');
+            string prefix = "";
+            if (i != -1)
+            {
+                prefix = name.Substring(0, i);
+            }
+            //		System.err.println("%% " + prefix + " is prefix of " + name);
+            return prefix;
+        }
+
+        /// <summary>
+        ///   Return true if we have a foreign name
+        /// </summary>
+        /// <param name="prefix"></param>
+        /// <param name="ns"></param>
+        /// <returns></returns>
+        private bool Foreign(string prefix, string ns)
+        {
+            //		System.err.print("%% Testing " + prefix + " and " + namespace + " for foreignness -- ");
+            bool foreign = !(prefix.Equals("") || ns.Equals("") || ns.Equals(theSchema.Uri));
+            //		System.err.println(foreign);
+            return foreign;
+        }
+
+        /// <summary>
+        ///   Parsing the complete XML Document Type Definition is way too complex,
+        ///   but for many simple cases we can extract something useful from it.
+        ///   doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
+        ///   DeclSep ::= PEReference | S
+        ///   intSubset ::= (markupdecl | DeclSep)*
+        ///   markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
+        ///   ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
+        /// </summary>
+        /// <param name="buff"></param>
+        /// <param name="offset"></param>
+        /// <param name="length"></param>
+        public virtual void Decl(char[] buff, int offset, int length)
+        {
+            var s = new string(buff, offset, length);
+            string name = null;
+            string systemid = null;
+            string publicid = null;
+            string[] v = Split(s);
+            if (v.Length > 0 && "DOCTYPE".Equals(v[0], StringComparison.OrdinalIgnoreCase))
+            {
+                if (theDoctypeIsPresent)
+                {
+                    return; // one doctype only!
+                }
+                theDoctypeIsPresent = true;
+                if (v.Length > 1)
+                {
+                    name = v[1];
+                    if (v.Length > 3 && "SYSTEM".Equals(v[2]))
+                    {
+                        systemid = v[3];
+                    }
+                    else if (v.Length > 3 && "PUBLIC".Equals(v[2]))
+                    {
+                        publicid = v[3];
+                        if (v.Length > 4)
+                        {
+                            systemid = v[4];
+                        }
+                        else
+                        {
+                            systemid = "";
+                        }
+                    }
+                }
+            }
+            publicid = TrimQuotes(publicid);
+            systemid = TrimQuotes(systemid);
+            if (name != null)
+            {
+                publicid = CleanPublicId(publicid);
+                theLexicalHandler.StartDTD(name, publicid, systemid);
+                theLexicalHandler.EndDTD();
+                theDoctypeName = name;
+                theDoctypePublicId = publicid;
+                var locator = theScanner as ILocator;
+                if (locator != null)
+                {
+                    // Must resolve systemid
+                    theDoctypeSystemId = locator.SystemId;
+                    try
+                    {
+                        if (Uri.IsWellFormedUriString(theDoctypeSystemId, UriKind.Absolute))
+                        {
+                            theDoctypeSystemId = new Uri(new Uri(theDoctypeSystemId), systemid).ToString();
+                        }
+                    }
+                    catch (Exception)
+                    {
+                    }
+                }
+            }
+        }
+
+        // If the string is quoted, trim the quotes.
+        private static string TrimQuotes(string value)
+        {
+            if (value == null)
+            {
+                return null;
+            }
+            int length = value.Length;
+            if (length == 0)
+            {
+                return value;
+            }
+            char s = value[0];
+            char e = value[length - 1];
+            if (s == e && (s == '\'' || s == '"'))
+            {
+                value = value.Substring(1, value.Length - 1);
+            }
+            return value;
+        }
+
+        /// <summary>
+        ///   Split the supplied string into words or phrases seperated by spaces.
+        ///   Recognises quotes around a phrase and doesn't split it.
+        /// </summary>
+        /// <param name="val"></param>
+        /// <returns></returns>
+        private static string[] Split(string val)
+        {
+            val = val.Trim();
+            if (val.Length == 0)
+            {
+                return new string[0];
+            }
+            var l = new List<string>();
+            int s = 0;
+            int e = 0;
+            bool sq = false; // single quote
+            bool dq = false; // double quote
+            var lastc = (char)0;
+            int len = val.Length;
+            for (e = 0; e < len; e++)
+            {
+                char c = val[e];
+                if (!dq && c == '\'' && lastc != '\\')
+                {
+                    sq = !sq;
+                    if (s < 0)
+                    {
+                        s = e;
+                    }
+                }
+                else if (!sq && c == '\"' && lastc != '\\')
+                {
+                    dq = !dq;
+                    if (s < 0)
+                    {
+                        s = e;
+                    }
+                }
+                else if (!sq && !dq)
+                {
+                    if (char.IsWhiteSpace(c))
+                    {
+                        if (s >= 0)
+                        {
+                            l.Add(val.Substring(s, e - s));
+                        }
+                        s = -1;
+                    }
+                    else if (s < 0 && c != ' ')
+                    {
+                        s = e;
+                    }
+                }
+                lastc = c;
+            }
+            l.Add(val.Substring(s, e - s));
+            return l.ToArray();
+        }
+
+        private const string LEGAL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%";
+
+        /// <summary>
+        ///   Replace junk in publicids with spaces
+        /// </summary>
+        /// <param name="src"></param>
+        /// <returns></returns>
+        private string CleanPublicId(string src)
+        {
+            if (src == null)
+            {
+                return null;
+            }
+            int len = src.Length;
+            var dst = new StringBuilder(len);
+            bool suppressSpace = true;
+            for (int i = 0; i < len; i++)
+            {
+                char ch = src[i];
+                if (LEGAL.IndexOf(ch) != -1)
+                {
+                    // legal but not whitespace
+                    dst.Append(ch);
+                    suppressSpace = false;
+                }
+                else if (suppressSpace)
+                {
+                    // normalizable whitespace or junk
+                }
+                else
+                {
+                    dst.Append(' ');
+                    suppressSpace = true;
+                }
+            }
+            //		System.err.println("%% Publicid [" + dst.tostring().trim() + "]");
+            return dst.ToString().Trim(); // trim any final junk whitespace
+        }
+
+        public virtual void GI(char[] buff, int offset, int length)
+        {
+            if (theNewElement != null)
+            {
+                return;
+            }
+            string name = MakeName(buff, offset, length);
+            if (name == null)
+            {
+                return;
+            }
+            ElementType type = theSchema.GetElementType(name);
+            if (type == null)
+            {
+                // Suppress unknown elements if ignore-bogons is on
+                if (ignoreBogons)
+                {
+                    return;
+                }
+                int bogonModel = (bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY);
+                int bogonMemberOf = (rootBogons ? Schema.M_ANY : (Schema.M_ANY & ~Schema.M_ROOT));
+                theSchema.ElementType(name, bogonModel, bogonMemberOf, 0);
+                if (!rootBogons)
+                {
+                    theSchema.Parent(name, theSchema.RootElementType.Name);
+                }
+                type = theSchema.GetElementType(name);
+            }
+
+            theNewElement = new Element(type, defaultAttributes);
+            //		System.err.println("%% Got GI " + theNewElement.name());
+        }
+
+        public virtual void CDSect(char[] buff, int offset, int length)
+        {
+            theLexicalHandler.StartCDATA();
+            PCDATA(buff, offset, length);
+            theLexicalHandler.EndCDATA();
+        }
+
+        public virtual void PCDATA(char[] buff, int offset, int length)
+        {
+            if (length == 0)
+            {
+                return;
+            }
+            bool allWhite = true;
+            for (int i = 0; i < length; i++)
+            {
+                if (!char.IsWhiteSpace(buff[offset + i]))
+                {
+                    allWhite = false;
+                }
+            }
+            if (allWhite && !theStack.CanContain(thePCDATA))
+            {
+                if (ignorableWhitespace)
+                {
+                    theContentHandler.IgnorableWhitespace(buff, offset, length);
+                }
+            }
+            else
+            {
+                Rectify(thePCDATA);
+                theContentHandler.Characters(buff, offset, length);
+            }
+        }
+
+        public virtual void PITarget(char[] buff, int offset, int length)
+        {
+            if (theNewElement != null)
+            {
+                return;
+            }
+            thePITarget = MakeName(buff, offset, length).Replace(':', '_');
+        }
+
+        public virtual void PI(char[] buff, int offset, int length)
+        {
+            if (theNewElement != null || thePITarget == null)
+            {
+                return;
+            }
+            if ("xml".Equals(thePITarget, StringComparison.OrdinalIgnoreCase))
+            {
+                return;
+            }
+            //		if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI");
+            if (length > 0 && buff[length - 1] == '?')
+            {
+                length--; // remove trailing ?
+            }
+            theContentHandler.ProcessingInstruction(thePITarget, new string(buff, offset, length));
+            thePITarget = null;
+        }
+
+        public virtual void STagC(char[] buff, int offset, int length)
+        {
+            //		System.err.println("%% Start-tag");
+            if (theNewElement == null)
+            {
+                return;
+            }
+            Rectify(theNewElement);
+            if (theStack.Model == Schema.M_EMPTY)
+            {
+                // Force an immediate end tag
+                ETagBasic(buff, offset, length);
+            }
+        }
+
+        public virtual void STagE(char[] buff, int offset, int length)
+        {
+            //		System.err.println("%% Empty-tag");
+            if (theNewElement == null)
+            {
+                return;
+            }
+            Rectify(theNewElement);
+            // Force an immediate end tag
+            ETagBasic(buff, offset, length);
+        }
+
+        private char[] theCommentBuffer = new char[2000];
+        public virtual void Cmnt(char[] buff, int offset, int length)
+        {
+            theLexicalHandler.Comment(buff, offset, length);
+        }
+
+        /// <summary>
+        ///   Rectify the stack, pushing and popping as needed
+        ///   so that the argument can be safely pushed
+        /// </summary>
+        /// <param name="e"></param>
+        private void Rectify(Element e)
+        {
+            Element sp;
+            while (true)
+            {
+                for (sp = theStack; sp != null; sp = sp.Next)
+                {
+                    if (sp.CanContain(e))
+                    {
+                        break;
+                    }
+                }
+                if (sp != null)
+                {
+                    break;
+                }
+                ElementType parentType = e.Parent;
+                if (parentType == null)
+                {
+                    break;
+                }
+                var parent = new Element(parentType, defaultAttributes);
+                //			System.err.println("%% Ascending from " + e.name() + " to " + parent.name());
+                parent.Next = e;
+                e = parent;
+            }
+            if (sp == null)
+            {
+                return; // don't know what to do
+            }
+            while (theStack != sp)
+            {
+                if (theStack == null || theStack.Next == null || theStack.Next.Next == null)
+                {
+                    break;
+                }
+                RestartablyPop();
+            }
+            while (e != null)
+            {
+                Element nexte = e.Next;
+                if (!e.Name.Equals("<pcdata>"))
+                {
+                    Push(e);
+                }
+                e = nexte;
+                Restart(e);
+            }
+            theNewElement = null;
+        }
+
+        public virtual int GetEntity()
+        {
+            return theEntity;
+        }
+
+        /// <summary>
+        ///   Return the argument as a valid XML name
+        ///   This no longer lowercases the result: we depend on Schema to
+        ///   canonicalize case.
+        /// </summary>
+        /// <param name="buff"></param>
+        /// <param name="offset"></param>
+        /// <param name="length"></param>
+        /// <returns></returns>
+        private string MakeName(char[] buff, int offset, int length)
+        {
+            var dst = new StringBuilder(length + 2);
+            bool seenColon = false;
+            bool start = true;
+            //		string src = new string(buff, offset, length); // DEBUG
+            for (; length-- > 0; offset++)
+            {
+                char ch = buff[offset];
+                if (char.IsLetter(ch) || ch == '_')
+                {
+                    start = false;
+                    dst.Append(ch);
+                }
+                else if (char.IsDigit(ch) || ch == '-' || ch == '.')
+                {
+                    if (start)
+                    {
+                        dst.Append('_');
+                    }
+                    start = false;
+                    dst.Append(ch);
+                }
+                else if (ch == ':' && !seenColon)
+                {
+                    seenColon = true;
+                    if (start)
+                    {
+                        dst.Append('_');
+                    }
+                    start = true;
+                    dst.Append(translateColons ? '_' : ch);
+                }
+            }
+            int dstLength = dst.Length;
+            if (dstLength == 0 || dst[dstLength - 1] == ':')
+            {
+                dst.Append('_');
+            }
+            //		System.err.println("Made name \"" + dst + "\" from \"" + src + "\"");
+            return dst.ToString().Intern();
+        }
+
+        private class AutoDetectorDelegate : IAutoDetector
+        {
+            private readonly Func<Stream, StreamReader> _delegate;
+
+            public AutoDetectorDelegate(Func<Stream, StreamReader> @delegate)
+            {
+                _delegate = @delegate;
+            }
+
+            public TextReader AutoDetectingReader(Stream stream)
+            {
+                return _delegate(stream);
+            }
+        }
+
+        // Default LexicalHandler implementation
+
+        public virtual void Comment(char[] ch, int start, int length)
+        {
+        }
+
+        public virtual void EndCDATA()
+        {
+        }
+
+        public virtual void EndDTD()
+        {
+        }
+
+        public virtual void EndEntity(string name)
+        {
+        }
+
+        public virtual void StartCDATA()
+        {
+        }
+
+        public virtual void StartDTD(string name, string publicid, string systemid)
+        {
+        }
+
+        public virtual void StartEntity(string name)
+        {
+        }
+
+        /// <summary>
+        ///  Creates a new instance of <see cref="Parser" />
+        /// </summary>
+        public Parser()
+        {
+            theNewElement = null;
+            theContentHandler = this;
+            theLexicalHandler = this;
+            theDTDHandler = this;
+            theErrorHandler = this;
+            theEntityResolver = this;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs
new file mode 100644
index 0000000..3901ada
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/ScanHandler.cs
@@ -0,0 +1,105 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// Scanner handler
+
+namespace TagSoup
+{
+    /// <summary>
+    /// An interface that Scanners use to report events in the input stream.
+    /// </summary>
+    public interface IScanHandler
+    {
+        /// <summary>
+        /// Reports an attribute name without a value.
+        /// </summary>
+        void Adup(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports an attribute name; a value will follow.
+        /// </summary>
+        void Aname(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports an attribute value.
+        /// </summary>
+        void Aval(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the content of a CDATA section (not a CDATA element)
+        /// </summary>
+        void CDSect(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports a &lt;!....&gt; declaration - typically a DOCTYPE
+        /// </summary>
+        void Decl(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports an entity reference or character reference.
+        /// </summary>
+        void Entity(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports EOF.
+        /// </summary>
+        void EOF(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports an end-tag.
+        /// </summary>
+        void ETag(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the general identifier (element type name) of a start-tag.
+        /// </summary>
+        void GI(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports character content.
+        /// </summary>
+        void PCDATA(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the data part of a processing instruction.
+        /// </summary>
+        void PI(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the target part of a processing instruction.
+        /// </summary>
+        void PITarget(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the close of a start-tag.
+        /// </summary>
+        void STagC(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports the close of an empty-tag.
+        /// </summary>
+        void STagE(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Reports a comment.
+        /// </summary>
+        void Cmnt(char[] buff, int offset, int length);
+
+        /// <summary>
+        /// Returns the value of the last entity or character reference reported.
+        /// </summary>
+        /// <returns>The value of the last entity or character reference reported.</returns>
+        int GetEntity();
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs
new file mode 100644
index 0000000..5e4d406
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Scanner.cs
@@ -0,0 +1,53 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// Scanner
+
+using System.IO;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// An interface allowing <see cref="Parser"/> to invoke scanners.
+    /// </summary>
+    public interface IScanner
+    {
+        /// <summary>
+        /// Invoke a scanner.
+        /// </summary>
+        /// <param name="br">
+        /// A source of characters to scan
+        /// </param>
+        /// <param name="handler">
+        /// A <see cref="IScanHandler"/> to report events to
+        /// </param>
+        void Scan(TextReader br, IScanHandler handler);
+
+        /// <summary>
+        /// Reset the embedded locator.
+        /// </summary>
+        /// <param name="publicid">
+        /// The publicid of the source
+        /// </param>
+        /// <param name="systemid">
+        /// The systemid of the source
+        /// </param>
+        void ResetDocumentLocator(string publicid, string systemid);
+
+        /// <summary>
+        /// Signal to the scanner to start CDATA content mode.
+        /// </summary>
+        void StartCDATA();
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/198e5868/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs b/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs
new file mode 100644
index 0000000..76a86f9
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/TagSoup/Schema.cs
@@ -0,0 +1,159 @@
+// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
+//
+// TagSoup is licensed under the Apache License,
+// Version 2.0.  You may obtain a copy of this license at
+// http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
+// additional legal rights not granted by this license.
+//
+// TagSoup is distributed in the hope that it will be useful, but
+// unless required by applicable law or agreed to in writing, TagSoup
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+// OF ANY KIND, either express or implied; not even the implied warranty
+// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// 
+// 
+// Model of document
+
+using System;
+using System.Collections;
+
+namespace TagSoup
+{
+    /// <summary>
+    /// Abstract class representing a TSSL schema.
+    /// Actual TSSL schemas are compiled into concrete subclasses of this class.
+    /// </summary>
+    public abstract class Schema
+    {
+        public const int M_ANY = -1;//0xFFFFFFFF;
+        public const int M_EMPTY = 0;
+        public const int M_PCDATA = 1 << 30;
+        public const int M_ROOT = 1 << 31;
+
+        public const int F_RESTART = 1;
+        public const int F_CDATA = 2;
+        public const int F_NOFORCE = 4;
+
+        private readonly Hashtable theEntities = new Hashtable(); // string -> Character
+        private readonly Hashtable theElementTypes = new Hashtable(); // string -> ElementType
+
+        private string theURI = "";
+        private string thePrefix = "";
+        private ElementType theRoot;
+        
+        /// <summary>
+        /// Add or replace an element type for this schema.
+        /// </summary>
+        /// <param name="name"> Name (Qname) of the element</param>
+        /// <param name="model">Models of the element's content as a vector of bits</param>
+        /// <param name="memberOf">Models the element is a member of as a vector of bits</param>
+        /// <param name="flags">Flags for the element</param>
+        public virtual void ElementType(string name, int model, int memberOf, int flags)
+        {
+            var e = new ElementType(name, model, memberOf, flags, this);
+            theElementTypes[name.ToLower()] = e;
+            if (memberOf == M_ROOT)
+            {
+                theRoot = e;
+            }
+        }
+
+        /// <summary>
+        /// Gets or sets the root element of this schema
+        /// </summary>
+        public virtual ElementType RootElementType
+        {
+            get { return theRoot; }
+        }
+
+        /// <summary>
+        /// Add or replace a default attribute for an element type in this schema.
+        /// </summary>
+        /// <param name="elemName">Name (Qname) of the element type</param>
+        /// <param name="attrName">Name (Qname) of the attribute</param>
+        /// <param name="type">Type of the attribute</param>
+        /// <param name="value">Default value of the attribute; null if no default</param>
+        public virtual void Attribute(string elemName, string attrName, string type, string value)
+        {
+            ElementType e = GetElementType(elemName);
+            if (e == null)
+            {
+                throw new Exception("Attribute " + attrName + " specified for unknown element type " + elemName);
+            }
+            e.SetAttribute(attrName, type, value);
+        }
+
+        /// <summary>
+        /// Specify natural parent of an element in this schema.
+        /// </summary>
+        /// <param name="name">Name of the child element</param>
+        /// <param name="parentName">Name of the parent element</param>
+        public virtual void Parent(string name, string parentName)
+        {
+            ElementType child = GetElementType(name);
+            ElementType parent = GetElementType(parentName);
+            if (child == null)
+            {
+                throw new Exception("No child " + name + " for parent " + parentName);
+            }
+            if (parent == null)
+            {
+                throw new Exception("No parent " + parentName + " for child " + name);
+            }
+            child.Parent = parent;
+        }
+
+        /// <summary>
+        /// Add to or replace a character entity in this schema.
+        /// </summary>
+        /// <param name="name">Name of the entity</param>
+        /// <param name="value">Value of the entity</param>
+        public virtual void Entity(string name, int value)
+        {
+            theEntities[name] = value;
+        }
+
+        /// <summary>
+        /// Get an <see cref="TagSoup.ElementType"/> by name.
+        /// </summary>
+        /// <param name="name">Name (Qname) of the element type</param>
+        /// <returns>The corresponding <see cref="TagSoup.ElementType"/></returns>
+        public virtual ElementType GetElementType(string name)
+        {
+            return (ElementType)(theElementTypes[name.ToLower()]);
+        }
+
+        /// <summary>
+        /// Get an entity value by name.
+        /// </summary>
+        /// <param name="name">Name of the entity</param>
+        /// <returns>The corresponding character, or 0 if none</returns>
+        public virtual int GetEntity(string name)
+        {
+            //		System.err.println("%% Looking up entity " + name);
+            if (theEntities.ContainsKey(name))
+            {
+                return (int)theEntities[name];
+            }
+            return 0;
+        }
+
+        /// <summary>
+        /// Gets or sets the URI (namespace name) of this schema.
+        /// </summary>
+        public virtual string Uri
+        {
+            get { return theURI; }
+            set { theURI = value; }
+        }
+
+        /// <summary>
+        /// Gets ot sets the prefix of this schema.
+        /// </summary>
+        public virtual string Prefix
+        {
+            get { return thePrefix; }
+            set { thePrefix = value; }
+        }
+    }
+}


Mime
View raw message