lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [3/4] lucenenet git commit: Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems
Date Wed, 14 Dec 2016 20:00:49 GMT
Renamed hyphenation to Hyphenation to fix build and run on case sensitive file systems


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/7ecb7529
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/7ecb7529
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/7ecb7529

Branch: refs/heads/master
Commit: 7ecb7529066c1d95dce83b175dac3322fc4068ab
Parents: 96d38ef
Author: Yaroslav <slashmk@gmail.com>
Authored: Mon Nov 28 21:23:25 2016 +0200
Committer: Yaroslav <slashmk@gmail.com>
Committed: Mon Nov 28 21:23:25 2016 +0200

----------------------------------------------------------------------
 .../Analysis/Compound/Hyphenation/ByteVector.cs | 156 ++++
 .../Analysis/Compound/Hyphenation/CharVector.cs | 171 ++++
 .../Analysis/Compound/Hyphenation/Hyphen.cs     |  72 ++
 .../Compound/Hyphenation/Hyphenation.cs         |  53 ++
 .../Compound/Hyphenation/HyphenationTree.cs     | 581 +++++++++++++
 .../Compound/Hyphenation/PatternConsumer.cs     |  54 ++
 .../Compound/Hyphenation/PatternParser.cs       | 483 +++++++++++
 .../Compound/Hyphenation/TernaryTree.cs         | 816 +++++++++++++++++++
 .../Compound/Hyphenation/hyphenation.dtd        |  68 ++
 .../Analysis/Compound/hyphenation/ByteVector.cs | 156 ----
 .../Analysis/Compound/hyphenation/CharVector.cs | 171 ----
 .../Analysis/Compound/hyphenation/Hyphen.cs     |  72 --
 .../Compound/hyphenation/Hyphenation.cs         |  53 --
 .../Compound/hyphenation/HyphenationTree.cs     | 581 -------------
 .../Compound/hyphenation/PatternConsumer.cs     |  54 --
 .../Compound/hyphenation/PatternParser.cs       | 483 -----------
 .../Compound/hyphenation/TernaryTree.cs         | 816 -------------------
 .../Compound/hyphenation/hyphenation.dtd        |  68 --
 18 files changed, 2454 insertions(+), 2454 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
new file mode 100644
index 0000000..6442d11
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/ByteVector.cs
@@ -0,0 +1,156 @@
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple byte vector with access to the underlying
+    /// array.
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class ByteVector
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private sbyte[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public ByteVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new sbyte[blockSize];
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = 0;
+        }
+
+        public virtual sbyte[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual sbyte this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length
+        {
+            get { return n; }
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, sbyte val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual sbyte Get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        /// <summary>
+        /// This is to implement memory allocation in the array. Like malloc().
+        /// </summary>
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                sbyte[] aux = new sbyte[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                sbyte[] aux = new sbyte[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
new file mode 100644
index 0000000..26fcea5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/CharVector.cs
@@ -0,0 +1,171 @@
+using System;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple char vector with access to the underlying
+    /// array.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class CharVector : ICloneable
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private char[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public CharVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public CharVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new char[blockSize];
+            n = 0;
+        }
+
+        public CharVector(char[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = a.Length;
+        }
+
+        public CharVector(char[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = a.Length;
+        }
+
+        /// <summary>
+        /// Reset Vector but don't resize or clear elements
+        /// </summary>
+        public virtual void Clear()
+        {
+            n = 0;
+        }
+
+        public virtual object Clone()
+        {
+            CharVector cv = new CharVector(array, blockSize);
+            cv.n = this.n;
+            return cv;
+        }
+
+        public virtual char[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual char this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length()
+        {
+            return n;
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, char val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual char get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                char[] aux = new char[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                char[] aux = new char[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
new file mode 100644
index 0000000..91009b1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphen.cs
@@ -0,0 +1,72 @@
+using System.Text;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
+    /// pre-break text, post-break text and no-break. If no line-break is generated
+    /// at this position, the no-break text is used, otherwise, pre-break and
+    /// post-break are used. Typically, pre-break is equal to the hyphen character
+    /// and the others are empty. However, this general scheme allows support for
+    /// cases in some languages where words change spelling if they're split across
+    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
+    /// from TeX.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class Hyphen
+    {
+        public string preBreak;
+
+        public string noBreak;
+
+        public string postBreak;
+
+        internal Hyphen(string pre, string no, string post)
+        {
+            preBreak = pre;
+            noBreak = no;
+            postBreak = post;
+        }
+
+        internal Hyphen(string pre)
+        {
+            preBreak = pre;
+            noBreak = null;
+            postBreak = null;
+        }
+
+        public override string ToString()
+        {
+            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
+            {
+                return "-";
+            }
+            StringBuilder res = new StringBuilder("{");
+            res.Append(preBreak);
+            res.Append("}{");
+            res.Append(postBreak);
+            res.Append("}{");
+            res.Append(noBreak);
+            res.Append('}');
+            return res.ToString();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
new file mode 100644
index 0000000..fdbac29
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/Hyphenation.cs
@@ -0,0 +1,53 @@
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class represents a hyphenated word.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public class Hyphenation
+    {
+
+        private readonly int[] hyphenPoints;
+
+        /// <summary>
+        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
+        /// </summary>
+        internal Hyphenation(int[] points)
+        {
+            hyphenPoints = points;
+        }
+
+        /// <returns> the number of hyphenation points in the word </returns>
+        public virtual int Length
+        {
+            get { return hyphenPoints.Length; }
+        }
+
+        /// <returns> the hyphenation points </returns>
+        public virtual int[] HyphenationPoints
+        {
+            get
+            {
+                return hyphenPoints;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
new file mode 100644
index 0000000..287f6f3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -0,0 +1,581 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Xml;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+	/// This tree structure stores the hyphenation patterns in an efficient way for
+	/// fast lookup. It provides the provides the method to hyphenate a word.
+	/// 
+	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+	/// </summary>
+	public class HyphenationTree : TernaryTree, IPatternConsumer
+    {
+
+        /// <summary>
+        /// value space: stores the interletter values
+        /// </summary>
+        protected internal ByteVector vspace;
+
+        /// <summary>
+        /// This map stores hyphenation exceptions
+        /// </summary>
+        protected internal IDictionary<string, IList<object>> stoplist;
+
+        /// <summary>
+        /// This map stores the character classes
+        /// </summary>
+        protected internal TernaryTree classmap;
+
+        /// <summary>
+        /// Temporary map to store interletter values on pattern loading.
+        /// </summary>
+        [NonSerialized]
+        private TernaryTree ivalues;
+
+        public HyphenationTree()
+        {
+            stoplist = new HashMap<string, IList<object>>(23); // usually a small table
+            classmap = new TernaryTree();
+            vspace = new ByteVector();
+            vspace.Alloc(1); // this reserves index 0, which we don't use
+        }
+
+        /// <summary>
+        /// Packs the values by storing them in 4 bits, two values into a byte Values
+        /// range is from 0 to 9. We use zero as terminator, so we'll add 1 to the
+        /// value.
+        /// </summary>
+        /// <param name="values"> a string of digits from '0' to '9' representing the
+        ///        interletter values. </param>
+        /// <returns> the index into the vspace array where the packed values are stored. </returns>
+        protected internal virtual int PackValues(string values)
+        {
+            int i, n = values.Length;
+            int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1;
+            int offset = vspace.Alloc(m);
+            sbyte[] va = vspace.Array;
+            for (i = 0; i < n; i++)
+            {
+                int j = i >> 1;
+                sbyte v = (sbyte)((values[i] - '0' + 1) & 0x0f);
+                if ((i & 1) == 1)
+                {
+                    va[j + offset] = (sbyte)(va[j + offset] | v);
+                }
+                else
+                {
+                    va[j + offset] = (sbyte)(v << 4); // big endian
+                }
+            }
+            va[m - 1 + offset] = 0; // terminator
+            return offset;
+        }
+
+        protected internal virtual string UnpackValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)(((int)((uint)v >> 4)) - 1 + '0');
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1 + '0');
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            return buf.ToString();
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename)
+        {
+            LoadPatterns(filename, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(string filename, Encoding encoding)
+        {
+            var src = new FileStream(filename, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f)
+        {
+            LoadPatterns(f, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="f"> the filename </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(FileInfo f, Encoding encoding)
+        {
+            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
+            LoadPatterns(src, encoding);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source)
+        {
+            LoadPatterns(source, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Read hyphenation patterns from an XML file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case the parsing fails </exception>
+        public virtual void LoadPatterns(Stream source, Encoding encoding)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var reader = XmlReader.Create(new StreamReader(source, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new PatternParser.DtdResolver()
+            }))
+            {
+                LoadPatterns(reader);
+            }
+        }
+
+        public virtual void LoadPatterns(XmlReader source)
+        {
+            PatternParser pp = new PatternParser(this);
+            ivalues = new TernaryTree();
+
+            pp.Parse(source);
+
+            // patterns/values should be now in the tree
+            // let's optimize a bit
+            TrimToSize();
+            vspace.TrimToSize();
+            classmap.TrimToSize();
+
+            // get rid of the auxiliary map
+            ivalues = null;
+        }
+
+        public virtual string FindPattern(string pat)
+        {
+            int k = base.Find(pat);
+            if (k >= 0)
+            {
+                return UnpackValues(k);
+            }
+            return "";
+        }
+
+        /// <summary>
+        /// String compare, returns 0 if equal or t is a substring of s
+        /// </summary>
+        protected internal virtual int HStrCmp(char[] s, int si, char[] t, int ti)
+        {
+            for (; s[si] == t[ti]; si++, ti++)
+            {
+                if (s[si] == 0)
+                {
+                    return 0;
+                }
+            }
+            if (t[ti] == 0)
+            {
+                return 0;
+            }
+            return s[si] - t[ti];
+        }
+
+        protected internal virtual sbyte[] GetValues(int k)
+        {
+            StringBuilder buf = new StringBuilder();
+            sbyte v = vspace[k++];
+            while (v != 0)
+            {
+                char c = (char)((((int)((uint)v >> 4))) - 1);
+                buf.Append(c);
+                c = (char)(v & 0x0f);
+                if (c == 0)
+                {
+                    break;
+                }
+                c = (char)(c - 1);
+                buf.Append(c);
+                v = vspace[k++];
+            }
+            sbyte[] res = new sbyte[buf.Length];
+            for (int i = 0; i < res.Length; i++)
+            {
+                res[i] = (sbyte)buf[i];
+            }
+            return res;
+        }
+
+        /// <summary>
+        /// <para>
+        /// Search for all possible partial matches of word starting at index an update
+        /// interletter values. In other words, it does something like:
+        /// </para>
+        /// <code>
+        /// for(i=0; i&lt;patterns.length; i++) {
+        /// if ( word.substring(index).startsWidth(patterns[i]) )
+        /// update_interletter_values(patterns[i]);
+        /// }
+        /// </code>
+        /// <para>
+        /// But it is done in an efficient way since the patterns are stored in a
+        /// ternary tree. In fact, this is the whole purpose of having the tree: doing
+        /// this search without having to test every single pattern. The number of
+        /// patterns for languages such as English range from 4000 to 10000. Thus,
+        /// doing thousands of string comparisons for each word to hyphenate would be
+        /// really slow without the tree. The tradeoff is memory, but using a ternary
+        /// tree instead of a trie, almost halves the the memory used by Lout or TeX.
+        /// It's also faster than using a hash table
+        /// </para>
+        /// </summary>
+        /// <param name="word"> null terminated word to match </param>
+        /// <param name="index"> start index from word </param>
+        /// <param name="il"> interletter values array to update </param>
+        protected internal virtual void SearchPatterns(char[] word, int index, sbyte[] il)
+        {
+            sbyte[] values;
+            int i = index;
+            char p, q;
+            char sp = word[i];
+            p = root;
+
+            while (p > 0 && p < sc.Length)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (HStrCmp(word, i, kv.Array, lo[p]) == 0)
+                    {
+                        values = GetValues(eq[p]); // data pointer is in eq[]
+                        int j = index;
+                        for (int k = 0; k < values.Length; k++)
+                        {
+                            if (j < il.Length && values[k] > il[j])
+                            {
+                                il[j] = values[k];
+                            }
+                            j++;
+                        }
+                    }
+                    return;
+                }
+                int d = sp - sc[p];
+                if (d == 0)
+                {
+                    if (sp == 0)
+                    {
+                        break;
+                    }
+                    sp = word[++i];
+                    p = eq[p];
+                    q = p;
+
+                    // look for a pattern ending at this position by searching for
+                    // the null char ( splitchar == 0 )
+                    while (q > 0 && q < sc.Length)
+                    {
+                        if (sc[q] == 0xFFFF) // stop at compressed branch
+                        {
+                            break;
+                        }
+                        if (sc[q] == 0)
+                        {
+                            values = GetValues(eq[q]);
+                            int j = index;
+                            for (int k = 0; k < values.Length; k++)
+                            {
+                                if (j < il.Length && values[k] > il[j])
+                                {
+                                    il[j] = values[k];
+                                }
+                                j++;
+                            }
+                            break;
+                        }
+                        else
+                        {
+                            q = lo[q];
+
+                            /// <summary>
+                            /// actually the code should be: q = sc[q] < 0 ? hi[q] : lo[q]; but
+                            /// java chars are unsigned
+                            /// </summary>
+                        }
+                    }
+                }
+                else
+                {
+                    p = d < 0 ? lo[p] : hi[p];
+                }
+            }
+        }
+
+        /// <summary>
+        /// Hyphenate word and return a Hyphenation object.
+        /// </summary>
+        /// <param name="word"> the word to be hyphenated </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(string word, int remainCharCount, int pushCharCount)
+        {
+            char[] w = word.ToCharArray();
+            return Hyphenate(w, 0, w.Length, remainCharCount, pushCharCount);
+        }
+
+        /// <summary>
+        /// w = "****nnllllllnnn*****", where n is a non-letter, l is a letter, all n
+        /// may be absent, the first n is at offset, the first l is at offset +
+        /// iIgnoreAtBeginning; word = ".llllll.'\0'***", where all l in w are copied
+        /// into word. In the first part of the routine len = w.length, in the second
+        /// part of the routine len = word.length. Three indices are used: index(w),
+        /// the index in w, index(word), the index in word, letterindex(word), the
+        /// index in the letter part of word. The following relations exist: index(w) =
+        /// offset + i - 1 index(word) = i - iIgnoreAtBeginning letterindex(word) =
+        /// index(word) - 1 (see first loop). It follows that: index(w) - index(word) =
+        /// offset - 1 + iIgnoreAtBeginning index(w) = letterindex(word) + offset +
+        /// iIgnoreAtBeginning
+        /// </summary>
+
+        /// <summary>
+        /// Hyphenate word and return an array of hyphenation points.
+        /// </summary>
+        /// <param name="w"> char array that contains the word </param>
+        /// <param name="offset"> Offset to first character in word </param>
+        /// <param name="len"> Length of word </param>
+        /// <param name="remainCharCount"> Minimum number of characters allowed before the
+        ///        hyphenation point. </param>
+        /// <param name="pushCharCount"> Minimum number of characters allowed after the
+        ///        hyphenation point. </param>
+        /// <returns> a <seealso cref="Hyphenation Hyphenation"/> object representing the
+        ///         hyphenated word or null if word is not hyphenated. </returns>
+        public virtual Hyphenation Hyphenate(char[] w, int offset, int len, int remainCharCount, int pushCharCount)
+        {
+            int i;
+            char[] word = new char[len + 3];
+
+            // normalize word
+            char[] c = new char[2];
+            int iIgnoreAtBeginning = 0;
+            int iLength = len;
+            bool bEndOfLetters = false;
+            for (i = 1; i <= len; i++)
+            {
+                c[0] = w[offset + i - 1];
+                int nc = classmap.Find(c, 0);
+                if (nc < 0) // found a non-letter character ...
+                {
+                    if (i == (1 + iIgnoreAtBeginning))
+                    {
+                        // ... before any letter character
+                        iIgnoreAtBeginning++;
+                    }
+                    else
+                    {
+                        // ... after a letter character
+                        bEndOfLetters = true;
+                    }
+                    iLength--;
+                }
+                else
+                {
+                    if (!bEndOfLetters)
+                    {
+                        word[i - iIgnoreAtBeginning] = (char)nc;
+                    }
+                    else
+                    {
+                        return null;
+                    }
+                }
+            }
+            len = iLength;
+            if (len < (remainCharCount + pushCharCount))
+            {
+                // word is too short to be hyphenated
+                return null;
+            }
+            int[] result = new int[len + 1];
+            int k = 0;
+
+            // check exception list first
+            string sw = new string(word, 1, len);
+            if (stoplist.ContainsKey(sw))
+            {
+                // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no =
+                // null)
+                IList<object> hw = stoplist[sw];
+                int j = 0;
+                for (i = 0; i < hw.Count; i++)
+                {
+                    object o = hw[i];
+                    // j = index(sw) = letterindex(word)?
+                    // result[k] = corresponding index(w)
+                    if (o is string)
+                    {
+                        j += ((string)o).Length;
+                        if (j >= remainCharCount && j < (len - pushCharCount))
+                        {
+                            result[k++] = j + iIgnoreAtBeginning;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // use algorithm to get hyphenation points
+                word[0] = '.'; // word start marker
+                word[len + 1] = '.'; // word end marker
+                word[len + 2] = (char)0; // null terminated
+                sbyte[] il = new sbyte[len + 3]; // initialized to zero
+                for (i = 0; i < len + 1; i++)
+                {
+                    SearchPatterns(word, i, il);
+                }
+
+                // hyphenation points are located where interletter value is odd
+                // i is letterindex(word),
+                // i + 1 is index(word),
+                // result[k] = corresponding index(w)
+                for (i = 0; i < len; i++)
+                {
+                    if (((il[i + 1] & 1) == 1) && i >= remainCharCount && i <= (len - pushCharCount))
+                    {
+                        result[k++] = i + iIgnoreAtBeginning;
+                    }
+                }
+            }
+
+            if (k > 0)
+            {
+                // trim result array
+                int[] res = new int[k + 2];
+                Array.Copy(result, 0, res, 1, k);
+                // We add the synthetical hyphenation points
+                // at the beginning and end of the word
+                res[0] = 0;
+                res[k + 1] = len;
+                return new Hyphenation(res);
+            }
+            else
+            {
+                return null;
+            }
+        }
+
+        /// <summary>
+        /// Add a character class to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> as callback to add character classes.
+        /// Character classes define the valid word characters for hyphenation. If a
+        /// word contains a character not defined in any of the classes, it is not
+        /// hyphenated. It also defines a way to normalize the characters in order to
+        /// compare them with the stored patterns. Usually pattern files use only lower
+        /// case characters, in this case a class for letter 'a', for example, should
+        /// be defined as "aA", the first character being the normalization char.
+        /// </summary>
+        public virtual void AddClass(string chargroup)
+        {
+            if (chargroup.Length > 0)
+            {
+                char equivChar = chargroup[0];
+                char[] key = new char[2];
+                key[1] = (char)0;
+                for (int i = 0; i < chargroup.Length; i++)
+                {
+                    key[0] = chargroup[i];
+                    classmap.Insert(key, 0, equivChar);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Add an exception to the tree. It is used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to store the
+        /// hyphenation exceptions.
+        /// </summary>
+        /// <param name="word"> normalized word </param>
+        /// <param name="hyphenatedword"> a vector of alternating strings and
+        ///        <seealso cref="Hyphen hyphen"/> objects. </param>
+        public virtual void AddException(string word, List<object> hyphenatedword)
+        {
+            stoplist[word] = hyphenatedword;
+        }
+
+        /// <summary>
+        /// Add a pattern to the tree. Mainly, to be used by
+        /// <seealso cref="PatternParser PatternParser"/> class as callback to add a pattern to
+        /// the tree.
+        /// </summary>
+        /// <param name="pattern"> the hyphenation pattern </param>
+        /// <param name="ivalue"> interletter weight values indicating the desirability and
+        ///        priority of hyphenating at a given point within the pattern. It
+        ///        should contain only digit characters. (i.e. '0' to '9'). </param>
+        public virtual void AddPattern(string pattern, string ivalue)
+        {
+            int k = ivalues.Find(ivalue);
+            if (k <= 0)
+            {
+                k = PackValues(ivalue);
+                ivalues.Insert(ivalue, (char)k);
+            }
+            Insert(pattern, (char)k);
+        }
+
+        // public override void printStats(PrintStream @out)
+        // {
+        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
+        //base.printStats(@out);
+
+        // }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
new file mode 100644
index 0000000..069badd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternConsumer.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This interface is used to connect the XML pattern file parser to the
+    /// hyphenation tree.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public interface IPatternConsumer
+	{
+
+	  /// <summary>
+	  /// Add a character class. A character class defines characters that are
+	  /// considered equivalent for the purpose of hyphenation (e.g. "aA"). It
+	  /// usually means to ignore case.
+	  /// </summary>
+	  /// <param name="chargroup"> character group </param>
+	  void AddClass(string chargroup);
+
+	  /// <summary>
+	  /// Add a hyphenation exception. An exception replaces the result obtained by
+	  /// the algorithm for cases for which this fails or the user wants to provide
+	  /// his own hyphenation. A hyphenatedword is a vector of alternating String's
+	  /// and <seealso cref="Hyphen"/> instances
+	  /// </summary>
+	  void AddException(string word, List<object> hyphenatedword);
+
+	  /// <summary>
+	  /// Add hyphenation patterns.
+	  /// </summary>
+	  /// <param name="pattern"> the pattern </param>
+	  /// <param name="values"> interletter values expressed as a string of digit characters. </param>
+	  void AddPattern(string pattern, string values);
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
new file mode 100644
index 0000000..8c00d19
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -0,0 +1,483 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Xml;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A XMLReader document handler to read and parse hyphenation patterns from a XML
+    /// file.
+    /// 
+    /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
+    /// than a SAX parser.
+    /// </summary>
+    public class PatternParser
+    {
+        internal int currElement;
+
+        internal IPatternConsumer consumer;
+
+        internal StringBuilder token;
+
+        internal List<object> exception;
+
+        internal char hyphenChar;
+
+        internal string errMsg;
+
+        internal const int ELEM_CLASSES = 1;
+
+        internal const int ELEM_EXCEPTIONS = 2;
+
+        internal const int ELEM_PATTERNS = 3;
+
+        internal const int ELEM_HYPHEN = 4;
+
+        public PatternParser()
+        {
+            token = new StringBuilder();
+            hyphenChar = '-'; // default
+        }
+
+        public PatternParser(IPatternConsumer consumer) : this()
+        {
+            this.consumer = consumer;
+        }
+
+        public virtual IPatternConsumer Consumer
+        {
+            set
+            {
+                this.consumer = value;
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="filename"> the filename </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(string filename)
+        {
+            // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
+            using (var src = XmlReader.Create(filename, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file)
+        {
+            Parse(file, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(FileInfo file, Encoding encoding)
+        {
+            using (var src = XmlReader.Create(new StreamReader(file.FullName, encoding), new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="file"> the pattern file </param>
+        public virtual void Parse(Stream xmlStream)
+        {
+            using (var src = XmlReader.Create(xmlStream, new XmlReaderSettings
+            {
+                DtdProcessing = DtdProcessing.Parse,
+                XmlResolver = new DtdResolver()
+            }))
+            {
+                Parse(src);
+            }
+        }
+
+        /// <summary>
+        /// Parses a hyphenation pattern file.
+        /// </summary>
+        /// <param name="source"> the InputSource for the file </param>
+        /// <exception cref="IOException"> In case of an exception while parsing </exception>
+        public virtual void Parse(XmlReader source)
+        {
+            source.MoveToContent();
+            while (source.Read())
+            {
+                ParseNode(source);
+            }
+        }
+
+        private void ParseNode(XmlReader node)
+        {
+            string uri, name, raw;
+            switch (node.NodeType)
+            {
+                case XmlNodeType.Element:
+
+                    // Element start
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    bool isEmptyElement = node.IsEmptyElement;
+                    var attributes = GetAttributes(node);
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    this.StartElement(uri, name, raw, attributes);
+                    if (isEmptyElement)
+                    {
+                        this.EndElement(uri, name, raw);
+                    }
+                    break;
+
+                case XmlNodeType.Text:
+
+                    this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
+                    break;
+
+                case XmlNodeType.EndElement:
+                    uri = node.NamespaceURI;
+                    name = node.Name;
+                    raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer
+
+                    // Element end
+                    this.EndElement(uri, name, raw);
+                    break;
+            }
+        }
+
+        private IDictionary<string, string> GetAttributes(XmlReader node)
+        {
+            var result = new Dictionary<string, string>();
+            if (node.HasAttributes)
+            {
+                for (int i = 0; i < node.AttributeCount; i++)
+                {
+                    node.MoveToAttribute(i);
+                    result.Add(node.Name, node.Value);
+                }
+            }
+
+            return result;
+        }
+
+        protected internal virtual string ReadToken(StringBuilder chars)
+        {
+            string word;
+            bool space = false;
+            int i;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            if (space)
+            {
+                // chars.delete(0,i);
+                for (int countr = i; countr < chars.Length; countr++)
+                {
+                    chars[countr - i] = chars[countr];
+                }
+                chars.Length = chars.Length - i;
+                if (token.Length > 0)
+                {
+                    word = token.ToString();
+                    token.Length = 0;
+                    return word;
+                }
+            }
+            space = false;
+            for (i = 0; i < chars.Length; i++)
+            {
+                if (char.IsWhiteSpace(chars[i]))
+                {
+                    space = true;
+                    break;
+                }
+            }
+            token.Append(chars.ToString(0, i - 0));
+            // chars.delete(0,i);
+            for (int countr = i; countr < chars.Length; countr++)
+            {
+                chars[countr - i] = chars[countr];
+            }
+            chars.Length = chars.Length - i;
+            if (space)
+            {
+                word = token.ToString();
+                token.Length = 0;
+                return word;
+            }
+            token.Append(chars.ToString());
+            return null;
+        }
+
+        protected internal static string GetPattern(string word)
+        {
+            StringBuilder pat = new StringBuilder();
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                if (!char.IsDigit(word[i]))
+                {
+                    pat.Append(word[i]);
+                }
+            }
+            return pat.ToString();
+        }
+
+        protected internal virtual List<object> NormalizeException<T1>(List<T1> ex)
+        {
+            List<object> res = new List<object>();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    string str = (string)item;
+                    StringBuilder buf = new StringBuilder();
+                    for (int j = 0; j < str.Length; j++)
+                    {
+                        char c = str[j];
+                        if (c != hyphenChar)
+                        {
+                            buf.Append(c);
+                        }
+                        else
+                        {
+                            res.Add(buf.ToString());
+                            buf.Length = 0;
+                            char[] h = new char[1];
+                            h[0] = hyphenChar;
+                            // we use here hyphenChar which is not necessarily
+                            // the one to be printed
+                            res.Add(new Hyphen(new string(h), null, null));
+                        }
+                    }
+                    if (buf.Length > 0)
+                    {
+                        res.Add(buf.ToString());
+                    }
+                }
+                else
+                {
+                    res.Add(item);
+                }
+            }
+            return res;
+        }
+
+        protected internal virtual string GetExceptionWord<T1>(List<T1> ex)
+        {
+            StringBuilder res = new StringBuilder();
+            for (int i = 0; i < ex.Count; i++)
+            {
+                object item = ex[i];
+                if (item is string)
+                {
+                    res.Append((string)item);
+                }
+                else
+                {
+                    if (((Hyphen)item).noBreak != null)
+                    {
+                        res.Append(((Hyphen)item).noBreak);
+                    }
+                }
+            }
+            return res.ToString();
+        }
+
+        protected internal static string GetInterletterValues(string pat)
+        {
+            StringBuilder il = new StringBuilder();
+            string word = pat + "a"; // add dummy letter to serve as sentinel
+            int len = word.Length;
+            for (int i = 0; i < len; i++)
+            {
+                char c = word[i];
+                if (char.IsDigit(c))
+                {
+                    il.Append(c);
+                    i++;
+                }
+                else
+                {
+                    il.Append('0');
+                }
+            }
+            return il.ToString();
+        }
+
+        /// <summary>
+        /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
+        /// rather than from the file system.
+        /// </summary>
+        internal class DtdResolver : XmlUrlResolver
+        {
+            public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
+            {
+                string dtdFilename = "hyphenation.dtd";
+                if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.OrdinalIgnoreCase))
+                {
+                    var qualifedDtdFilename = string.Concat(GetType().Namespace, ".", dtdFilename);
+                    return GetType().Assembly.GetManifestResourceStream(qualifedDtdFilename);
+                }
+
+                return base.GetEntity(absoluteUri, role, ofObjectToReturn);
+            }
+        }
+
+        //
+        // ContentHandler methods
+        //
+
+        /// <seealso cref= org.xml.sax.ContentHandler#startElement(java.lang.String,
+        ///      java.lang.String, java.lang.String, org.xml.sax.Attributes) </seealso>
+        public void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
+        {
+            if (local.Equals("hyphen-char"))
+            {
+                string h = attrs.ContainsKey("value") ? attrs["value"] : null;
+                if (h != null && h.Length == 1)
+                {
+                    hyphenChar = h[0];
+                }
+            }
+            else if (local.Equals("classes"))
+            {
+                currElement = ELEM_CLASSES;
+            }
+            else if (local.Equals("patterns"))
+            {
+                currElement = ELEM_PATTERNS;
+            }
+            else if (local.Equals("exceptions"))
+            {
+                currElement = ELEM_EXCEPTIONS;
+                exception = new List<object>();
+            }
+            else if (local.Equals("hyphen"))
+            {
+                if (token.Length > 0)
+                {
+                    exception.Add(token.ToString());
+                }
+                exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
+                currElement = ELEM_HYPHEN;
+            }
+            token.Length = 0;
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#endElement(java.lang.String,
+        ///      java.lang.String, java.lang.String) </seealso>
+        public void EndElement(string uri, string local, string raw)
+        {
+            if (token.Length > 0)
+            {
+                string word = token.ToString();
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                    case ELEM_HYPHEN:
+                        // nothing to do
+                        break;
+                }
+                if (currElement != ELEM_HYPHEN)
+                {
+                    token.Length = 0;
+                }
+            }
+            if (currElement == ELEM_HYPHEN)
+            {
+                currElement = ELEM_EXCEPTIONS;
+            }
+            else
+            {
+                currElement = 0;
+            }
+        }
+
+        /// <seealso cref= org.xml.sax.ContentHandler#characters(char[], int, int) </seealso>
+        public void Characters(char[] ch, int start, int length)
+        {
+            StringBuilder chars = new StringBuilder(length);
+            chars.Append(ch, start, length);
+            string word = ReadToken(chars);
+            while (word != null)
+            {
+                // System.out.println("\"" + word + "\"");
+                switch (currElement)
+                {
+                    case ELEM_CLASSES:
+                        consumer.AddClass(word);
+                        break;
+                    case ELEM_EXCEPTIONS:
+                        exception.Add(word);
+                        exception = NormalizeException(exception);
+                        consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
+                        exception.Clear();
+                        break;
+                    case ELEM_PATTERNS:
+                        consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
+                        break;
+                }
+                word = ReadToken(chars);
+            }
+
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/7ecb7529/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
new file mode 100644
index 0000000..88cfd01
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
@@ -0,0 +1,816 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Compound.Hyphenation
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <h2>Ternary Search Tree.</h2>
+    /// 
+    /// <para>
+    /// A ternary search tree is a hybrid between a binary tree and a digital search
+    /// tree (trie). Keys are limited to strings. A data value of type char is stored
+    /// in each leaf node. It can be used as an index (or pointer) to the data.
+    /// Branches that only contain one key are compressed to one node by storing a
+    /// pointer to the trailer substring of the key. This class is intended to serve
+    /// as base class or helper class to implement Dictionary collections or the
+    /// like. Ternary trees have some nice properties as the following: the tree can
+    /// be traversed in sorted order, partial matches (wildcard) can be implemented,
+    /// retrieval of all keys within a given distance from the target, etc. The
+    /// storage requirements are higher than a binary tree but a lot less than a
+    /// trie. Performance is comparable with a hash table, sometimes it outperforms a
+    /// hash function (most of the time can determine a miss faster than a hash).
+    /// </para>
+    /// 
+    /// <para>
+    /// The main purpose of this java port is to serve as a base for implementing
+    /// TeX's hyphenation algorithm (see The TeXBook, appendix H). Each language
+    /// requires from 5000 to 15000 hyphenation patterns which will be keys in this
+    /// tree. The strings patterns are usually small (from 2 to 5 characters), but
+    /// each char in the tree is stored in a node. Thus memory usage is the main
+    /// concern. We will sacrifice 'elegance' to keep memory requirements to the
+    /// minimum. Using java's char type as pointer (yes, I know pointer it is a
+    /// forbidden word in java) we can keep the size of the node to be just 8 bytes
+    /// (3 pointers and the data char). This gives room for about 65000 nodes. In my
+    /// tests the english patterns took 7694 nodes and the german patterns 10055
+    /// nodes, so I think we are safe.
+    /// </para>
+    /// 
+    /// <para>
+    /// All said, this is a map with strings as keys and char as value. Pretty
+    /// limited!. It can be extended to a general map by using the string
+    /// representation of an object and using the char value as an index to an array
+    /// that contains the object values.
+    /// </para>
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+
+    public class TernaryTree : ICloneable
+    {
+        /// <summary>
+        /// We use 4 arrays to represent a node. I guess I should have created a proper
+        /// node class, but somehow Knuth's pascal code made me forget we now have a
+        /// portable language with virtual memory management and automatic garbage
+        /// collection! And now is kind of late, furthermore, if it ain't broken, don't
+        /// fix it.
+        /// </summary>
+
+        /// <summary>
+        /// Pointer to low branch and to rest of the key when it is stored directly in
+        /// this node, we don't have unions in java!
+        /// </summary>
+        protected internal char[] lo;
+
+        /// <summary>
+        /// Pointer to high branch.
+        /// </summary>
+        protected internal char[] hi;
+
+        /// <summary>
+        /// Pointer to equal branch and to data when this node is a string terminator.
+        /// </summary>
+        protected internal char[] eq;
+
+        /// <summary>
+        /// <P>
+        /// The character stored in this node: splitchar. Two special values are
+        /// reserved:
+        /// </P>
+        /// <ul>
+        /// <li>0x0000 as string terminator</li>
+        /// <li>0xFFFF to indicate that the branch starting at this node is compressed</li>
+        /// </ul>
+        /// <para>
+        /// This shouldn't be a problem if we give the usual semantics to strings since
+        /// 0xFFFF is guaranteed not to be an Unicode character.
+        /// </para>
+        /// </summary>
+        protected internal char[] sc;
+
+        /// <summary>
+        /// This vector holds the trailing of the keys when the branch is compressed.
+        /// </summary>
+        protected internal CharVector kv;
+
+        protected internal char root;
+
+        protected internal char freenode;
+
+        protected internal int length; // number of items in tree
+
+        protected internal const int BLOCK_SIZE = 2048; // allocation size for arrays
+
+        internal TernaryTree()
+        {
+            Init();
+        }
+
+        protected internal virtual void Init()
+        {
+            root = (char)0;
+            freenode = (char)1;
+            length = 0;
+            lo = new char[BLOCK_SIZE];
+            hi = new char[BLOCK_SIZE];
+            eq = new char[BLOCK_SIZE];
+            sc = new char[BLOCK_SIZE];
+            kv = new CharVector();
+        }
+
+        /// <summary>
+        /// Branches are initially compressed, needing one node per key plus the size
+        /// of the string key. They are decompressed as needed when another key with
+        /// same prefix is inserted. This saves a lot of space, specially for long
+        /// keys.
+        /// </summary>
+        public virtual void Insert(string key, char val)
+        {
+            // make sure we have enough room in the arrays
+            int len = key.Length + 1; // maximum number of nodes that may be generated
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            char[] strkey = new char[len--];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+            root = Insert(root, strkey, 0, val);
+        }
+
+        public virtual void Insert(char[] key, int start, char val)
+        {
+            int len = StrLen(key) + 1;
+            if (freenode + len > eq.Length)
+            {
+                RedimNodeArrays(eq.Length + BLOCK_SIZE);
+            }
+            root = Insert(root, key, start, val);
+        }
+
+        /// <summary>
+        /// The actual insertion function, recursive version.
+        /// </summary>
+        private char Insert(char p, char[] key, int start, char val)
+        {
+            int len = StrLen(key, start);
+            if (p == 0)
+            {
+                // this means there is no branch, this node will start a new branch.
+                // Instead of doing that, we store the key somewhere else and create
+                // only one node with a pointer to the key
+                p = freenode++;
+                eq[p] = val; // holds data
+                length++;
+                hi[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = (char)0xFFFF; // indicates branch is compressed
+                    lo[p] = (char)kv.Alloc(len + 1); // use 'lo' to hold pointer to key
+                    StrCpy(kv.Array, lo[p], key, start);
+                }
+                else
+                {
+                    sc[p] = (char)0;
+                    lo[p] = (char)0;
+                }
+                return p;
+            }
+
+            if (sc[p] == 0xFFFF)
+            {
+                // branch is compressed: need to decompress
+                // this will generate garbage in the external key array
+                // but we can do some garbage collection later
+                char pp = freenode++;
+                lo[pp] = lo[p]; // previous pointer to key
+                eq[pp] = eq[p]; // previous pointer to data
+                lo[p] = (char)0;
+                if (len > 0)
+                {
+                    sc[p] = kv[lo[pp]];
+                    eq[p] = pp;
+                    lo[pp]++;
+                    if (kv[lo[pp]] == 0)
+                    {
+                        // key completly decompressed leaving garbage in key array
+                        lo[pp] = (char)0;
+                        sc[pp] = (char)0;
+                        hi[pp] = (char)0;
+                    }
+                    else
+                    {
+                        // we only got first char of key, rest is still there
+                        sc[pp] = (char)0xFFFF;
+                    }
+                }
+                else
+                {
+                    // In this case we can save a node by swapping the new node
+                    // with the compressed node
+                    sc[pp] = (char)0xFFFF;
+                    hi[p] = pp;
+                    sc[p] = (char)0;
+                    eq[p] = val;
+                    length++;
+                    return p;
+                }
+            }
+            char s = key[start];
+            if (s < sc[p])
+            {
+                lo[p] = Insert(lo[p], key, start, val);
+            }
+            else if (s == sc[p])
+            {
+                if (s != 0)
+                {
+                    eq[p] = Insert(eq[p], key, start + 1, val);
+                }
+                else
+                {
+                    // key already in tree, overwrite data
+                    eq[p] = val;
+                }
+            }
+            else
+            {
+                hi[p] = Insert(hi[p], key, start, val);
+            }
+            return p;
+        }
+
+        /// <summary>
+        /// Compares 2 null terminated char arrays
+        /// </summary>
+        public static int StrCmp(char[] a, int startA, char[] b, int startB)
+        {
+            for (; a[startA] == b[startB]; startA++, startB++)
+            {
+                if (a[startA] == 0)
+                {
+                    return 0;
+                }
+            }
+            return a[startA] - b[startB];
+        }
+
+        /// <summary>
+        /// Compares a string with null terminated char array
+        /// </summary>
+        public static int StrCmp(string str, char[] a, int start)
+        {
+            int i, d, len = str.Length;
+            for (i = 0; i < len; i++)
+            {
+                d = (int)str[i] - a[start + i];
+                if (d != 0)
+                {
+                    return d;
+                }
+                if (a[start + i] == 0)
+                {
+                    return d;
+                }
+            }
+            if (a[start + i] != 0)
+            {
+                return -a[start + i];
+            }
+            return 0;
+
+        }
+
+        public static void StrCpy(char[] dst, int di, char[] src, int si)
+        {
+            while (src[si] != 0)
+            {
+                dst[di++] = src[si++];
+            }
+            dst[di] = (char)0;
+        }
+
+        public static int StrLen(char[] a, int start)
+        {
+            int len = 0;
+            for (int i = start; i < a.Length && a[i] != 0; i++)
+            {
+                len++;
+            }
+            return len;
+        }
+
+        public static int StrLen(char[] a)
+        {
+            return StrLen(a, 0);
+        }
+
+        public virtual int Find(string key)
+        {
+            int len = key.Length;
+            char[] strkey = new char[len + 1];
+            key.CopyTo(0, strkey, 0, len - 0);
+            strkey[len] = (char)0;
+
+            return Find(strkey, 0);
+        }
+
+        public virtual int Find(char[] key, int start)
+        {
+            int d;
+            char p = root;
+            int i = start;
+            char c;
+
+            while (p != 0)
+            {
+                if (sc[p] == 0xFFFF)
+                {
+                    if (StrCmp(key, i, kv.Array, lo[p]) == 0)
+                    {
+                        return eq[p];
+                    }
+                    else
+                    {
+                        return -1;
+                    }
+                }
+                c = key[i];
+                d = c - sc[p];
+                if (d == 0)
+                {
+                    if (c == 0)
+                    {
+                        return eq[p];
+                    }
+                    i++;
+                    p = eq[p];
+                }
+                else if (d < 0)
+                {
+                    p = lo[p];
+                }
+                else
+                {
+                    p = hi[p];
+                }
+            }
+            return -1;
+        }
+
+        public virtual bool Knows(string key)
+        {
+            return (Find(key) >= 0);
+        }
+
+        // redimension the arrays
+        private void RedimNodeArrays(int newsize)
+        {
+            int len = newsize < lo.Length ? newsize : lo.Length;
+            char[] na = new char[newsize];
+            Array.Copy(lo, 0, na, 0, len);
+            lo = na;
+            na = new char[newsize];
+            Array.Copy(hi, 0, na, 0, len);
+            hi = na;
+            na = new char[newsize];
+            Array.Copy(eq, 0, na, 0, len);
+            eq = na;
+            na = new char[newsize];
+            Array.Copy(sc, 0, na, 0, len);
+            sc = na;
+        }
+
+        public virtual int Length
+        {
+            get { return length; }
+        }
+
+        public object Clone()
+        {
+            TernaryTree t = new TernaryTree();
+            t.lo = (char[])this.lo.Clone();
+            t.hi = (char[])this.hi.Clone();
+            t.eq = (char[])this.eq.Clone();
+            t.sc = (char[])this.sc.Clone();
+            t.kv = (CharVector)this.kv.Clone();
+            t.root = this.root;
+            t.freenode = this.freenode;
+            t.length = this.length;
+
+            return t;
+        }
+
+        /// <summary>
+        /// Recursively insert the median first and then the median of the lower and
+        /// upper halves, and so on in order to get a balanced tree. The array of keys
+        /// is assumed to be sorted in ascending order.
+        /// </summary>
+        protected internal virtual void InsertBalanced(string[] k, char[] v, int offset, int n)
+        {
+            int m;
+            if (n < 1)
+            {
+                return;
+            }
+            m = n >> 1;
+
+            Insert(k[m + offset], v[m + offset]);
+            InsertBalanced(k, v, offset, m);
+
+            InsertBalanced(k, v, offset + m + 1, n - m - 1);
+        }
+
+        /// <summary>
+        /// Balance the tree for best search performance
+        /// </summary>
+        public virtual void Balance()
+        {
+            // System.out.print("Before root splitchar = ");
+            // System.out.println(sc[root]);
+
+            int i = 0, n = length;
+            string[] k = new string[n];
+            char[] v = new char[n];
+            Iterator iter = new Iterator(this);
+            while (iter.MoveNext())
+            {
+                v[i] = iter.Value;
+                k[i++] = iter.Current;
+            }
+            Init();
+            InsertBalanced(k, v, 0, n);
+
+            // With uniform letter distribution sc[root] should be around 'm'
+            // System.out.print("After root splitchar = ");
+            // System.out.println(sc[root]);
+        }
+
+        /// <summary>
+        /// Each node stores a character (splitchar) which is part of some key(s). In a
+        /// compressed branch (one that only contain a single string key) the trailer
+        /// of the key which is not already in nodes is stored externally in the kv
+        /// array. As items are inserted, key substrings decrease. Some substrings may
+        /// completely disappear when the whole branch is totally decompressed. The
+        /// tree is traversed to find the key substrings actually used. In addition,
+        /// duplicate substrings are removed using a map (implemented with a
+        /// TernaryTree!).
+        /// 
+        /// </summary>
+        public virtual void TrimToSize()
+        {
+            // first balance the tree for best performance
+            Balance();
+
+            // redimension the node arrays
+            RedimNodeArrays(freenode);
+
+            // ok, compact kv array
+            CharVector kx = new CharVector();
+            kx.Alloc(1);
+            TernaryTree map = new TernaryTree();
+            Compact(kx, map, root);
+            kv = kx;
+            kv.TrimToSize();
+        }
+
+        private void Compact(CharVector kx, TernaryTree map, char p)
+        {
+            int k;
+            if (p == 0)
+            {
+                return;
+            }
+            if (sc[p] == 0xFFFF)
+            {
+                k = map.Find(kv.Array, lo[p]);
+                if (k < 0)
+                {
+                    k = kx.Alloc(StrLen(kv.Array, lo[p]) + 1);
+                    StrCpy(kx.Array, k, kv.Array, lo[p]);
+                    map.Insert(kx.Array, k, (char)k);
+                }
+                lo[p] = (char)k;
+            }
+            else
+            {
+                Compact(kx, map, lo[p]);
+                if (sc[p] != 0)
+                {
+                    Compact(kx, map, eq[p]);
+                }
+                Compact(kx, map, hi[p]);
+            }
+        }
+
+        public virtual IEnumerator<string> Keys()
+        {
+            return new Iterator(this);
+        }
+
+        /// <summary>
+        /// Enumerator for TernaryTree
+        /// 
+        /// LUCENENET NOTE: This differs a bit from its Java counterpart to adhere to
+        /// .NET IEnumerator semantics. In Java, when the <see cref="Iterator"/> is
+        /// instantiated, it is already positioned at the first element. However,
+        /// to act like a .NET IEnumerator, the initial state is undefined and considered
+        /// to be before the first element until <see cref="MoveNext"/> is called, and
+        /// if a move took place it will return <c>true</c>;
+        /// </summary>
+        public class Iterator : IEnumerator<string>
+        {
+            private readonly TernaryTree outerInstance;
+
+
+            /// <summary>
+            /// current node index
+            /// </summary>
+            private int cur;
+
+            /// <summary>
+            /// current key
+            /// </summary>
+            private string curkey;
+
+            internal class Item : ICloneable
+            {
+                internal char parent;
+                internal char child;
+
+                public Item()
+                {
+                    parent = (char)0;
+                    child = (char)0;
+                }
+
+                public Item(char p, char c)
+                {
+                    parent = p;
+                    child = c;
+                }
+
+                public object Clone()
+                {
+                    return new Item(parent, child);
+                }
+
+            }
+
+            /// <summary>
+            /// Node stack
+            /// </summary>
+            internal Stack<Item> ns;
+
+            /// <summary>
+            /// key stack implemented with a StringBuilder
+            /// </summary>
+            internal StringBuilder ks;
+
+            private bool isInitialized = false;
+
+            public Iterator(TernaryTree outerInstance)
+            {
+                this.outerInstance = outerInstance;
+                cur = -1;
+                ns = new Stack<Item>();
+                ks = new StringBuilder();
+                isInitialized = false;
+            }
+
+            public virtual void Rewind()
+            {
+                ns.Clear();
+                ks.Length = 0;
+                cur = outerInstance.root;
+                Run();
+            }
+
+            public virtual char Value
+            {
+                get
+                {
+                    if (cur >= 0)
+                    {
+                        return outerInstance.eq[cur];
+                    }
+                    return (char)0;
+                }
+            }
+
+            /// <summary>
+            /// traverse upwards
+            /// </summary>
+            internal virtual int Up()
+            {
+                Item i = new Item();
+                int res = 0;
+
+                if (ns.Count == 0)
+                {
+                    return -1;
+                }
+
+                if (cur != 0 && outerInstance.sc[cur] == 0)
+                {
+                    return outerInstance.lo[cur];
+                }
+
+                bool climb = true;
+
+                while (climb)
+                {
+                    i = ns.Pop();
+                    i.child++;
+                    switch ((int)i.child)
+                    {
+                        case 1:
+                            if (outerInstance.sc[i.parent] != 0)
+                            {
+                                res = outerInstance.eq[i.parent];
+                                ns.Push((Item)i.Clone());
+                                ks.Append(outerInstance.sc[i.parent]);
+                            }
+                            else
+                            {
+                                i.child++;
+                                ns.Push((Item)i.Clone());
+                                res = outerInstance.hi[i.parent];
+                            }
+                            climb = false;
+                            break;
+
+                        case 2:
+                            res = outerInstance.hi[i.parent];
+                            ns.Push((Item)i.Clone());
+                            if (ks.Length > 0)
+                            {
+                                ks.Length = ks.Length - 1; // pop
+                            }
+                            climb = false;
+                            break;
+
+                        default:
+                            if (ns.Count == 0)
+                            {
+                                return -1;
+                            }
+                            climb = true;
+                            break;
+                    }
+                }
+                return res;
+            }
+
+            /// <summary>
+            /// traverse the tree to find next key
+            /// </summary>
+            internal virtual int Run()
+            {
+                if (cur == -1)
+                {
+                    return -1;
+                }
+
+                bool leaf = false;
+                while (true)
+                {
+                    // first go down on low branch until leaf or compressed branch
+                    while (cur != 0)
+                    {
+                        if (outerInstance.sc[cur] == 0xFFFF)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        ns.Push(new Item((char)cur, '\u0000'));
+                        if (outerInstance.sc[cur] == 0)
+                        {
+                            leaf = true;
+                            break;
+                        }
+                        cur = outerInstance.lo[cur];
+                    }
+                    if (leaf)
+                    {
+                        break;
+                    }
+                    // nothing found, go up one node and try again
+                    cur = Up();
+                    if (cur == -1)
+                    {
+                        return -1;
+                    }
+                }
+                // The current node should be a data node and
+                // the key should be in the key stack (at least partially)
+                StringBuilder buf = new StringBuilder(ks.ToString());
+                if (outerInstance.sc[cur] == 0xFFFF)
+                {
+                    int p = outerInstance.lo[cur];
+                    while (outerInstance.kv[p] != 0)
+                    {
+                        buf.Append(outerInstance.kv[p++]);
+                    }
+                }
+                curkey = buf.ToString();
+                return 0;
+            }
+
+            #region Added for better .NET support
+            public string Current
+            {
+                get
+                {
+                    return curkey;
+                }
+            }
+
+            object IEnumerator.Current
+            {
+                get
+                {
+                    return Current;
+                }
+            }
+
+            public void Dispose()
+            {
+                // nothing to do
+            }
+
+            public bool MoveNext()
+            {
+                if (!isInitialized)
+                {
+                    Rewind();
+                    isInitialized = true;
+                    return cur != -1;
+                }
+                if (cur == -1)
+                {
+                    return false;
+                }
+                cur = Up();
+                Run();
+                return cur != -1;
+            }
+
+            public void Reset()
+            {
+                throw new NotSupportedException();
+            }
+
+            #endregion
+        }
+
+        public virtual void PrintStats(TextWriter @out)
+        {
+            @out.WriteLine("Number of keys = " + Convert.ToString(length));
+            @out.WriteLine("Node count = " + Convert.ToString(freenode));
+            // System.out.println("Array length = " + Integer.toString(eq.length));
+            @out.WriteLine("Key Array length = " + Convert.ToString(kv.Length()));
+
+            /*
+             * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
+             * System.out.print(kv.get(i)); else System.out.println("");
+             * System.out.println("Keys:"); for(Enumeration enum = keys();
+             * enum.hasMoreElements(); ) System.out.println(enum.nextElement());
+             */
+        }
+        /*
+          public static void main(String[] args) {
+            TernaryTree tt = new TernaryTree();
+            tt.insert("Carlos", 'C');
+            tt.insert("Car", 'r');
+            tt.insert("palos", 'l');
+            tt.insert("pa", 'p');
+            tt.trimToSize();
+            System.out.println((char) tt.find("Car"));
+            System.out.println((char) tt.find("Carlos"));
+            System.out.println((char) tt.find("alto"));
+            tt.printStats(System.out);
+          }
+          */
+
+    }
+}
\ No newline at end of file


Mime
View raw message