lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [7/8] Porting Lucene.Net.Suggest (still not compiling)
Date Mon, 15 Sep 2014 22:24:54 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs
new file mode 100644
index 0000000..181d24e
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/DirectSpellChecker.cs
@@ -0,0 +1,575 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Automaton;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Simple automaton-based spellchecker.
+    /// <para>
+    /// Candidates are presented directly from the term dictionary, based on
+    /// Levenshtein distance. This is an alternative to <seealso cref="SpellChecker"/>
+    /// if you are using an edit-distance-like metric such as Levenshtein
+    /// or <seealso cref="JaroWinklerDistance"/>.
+    /// </para>
+    /// <para>
+    /// A practical benefit of this spellchecker is that it requires no additional
+    /// datastructures (neither in RAM nor on disk) to do its work.
+    /// 
+    /// </para>
+    /// </summary>
+    /// <seealso cref= LevenshteinAutomata </seealso>
+    /// <seealso cref= FuzzyTermsEnum
+    /// 
+    /// @lucene.experimental </seealso>
+    public class DirectSpellChecker
+    {
+        /// <summary>
+        /// The default StringDistance, Damerau-Levenshtein distance implemented internally
+        ///  via <seealso cref="LevenshteinAutomata"/>.
+        ///  <para>
+        ///  Note: this is the fastest distance metric, because Damerau-Levenshtein is used
+        ///  to draw candidates from the term dictionary: this just re-uses the scoring.
+        /// </para>
+        /// </summary>
+        public static readonly StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
+
+        /// <summary>
+        /// maximum edit distance for candidate terms </summary>
+        private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
+        /// <summary>
+        /// minimum prefix for candidate terms </summary>
+        private int minPrefix = 1;
+        /// <summary>
+        /// maximum number of top-N inspections per suggestion </summary>
+        private int maxInspections = 5;
+        /// <summary>
+        /// minimum accuracy for a term to match </summary>
+        private float accuracy = SpellChecker.DEFAULT_ACCURACY;
+        /// <summary>
+        /// value in [0..1] (or absolute number >=1) representing the minimum
+        /// number of documents (of the total) where a term should appear. 
+        /// </summary>
+        private float thresholdFrequency = 0f;
+        /// <summary>
+        /// minimum length of a query word to return suggestions </summary>
+        private int minQueryLength = 4;
+        /// <summary>
+        /// value in [0..1] (or absolute number >=1) representing the maximum
+        ///  number of documents (of the total) a query term can appear in to
+        ///  be corrected. 
+        /// </summary>
+        private float maxQueryFrequency = 0.01f;
+        /// <summary>
+        /// true if the spellchecker should lowercase terms </summary>
+        private bool lowerCaseTerms = true;
+        /// <summary>
+        /// the comparator to use </summary>
+        private IComparer<SuggestWord> comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
+        /// <summary>
+        /// the string distance to use </summary>
+        private StringDistance distance = INTERNAL_LEVENSHTEIN;
+
+        /// <summary>
+        /// Creates a DirectSpellChecker with default configuration values </summary>
+        public DirectSpellChecker()
+        {
+        }
+
+        /// <summary>
+        /// Get the maximum number of Levenshtein edit-distances to draw
+        ///  candidate terms from. 
+        /// </summary>
+        public virtual int MaxEdits
+        {
+            get
+            {
+                return maxEdits;
+            }
+            set
+            {
+                if (value < 1 || value > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)
+                {
+                    throw new NotSupportedException("Invalid maxEdits");
+                }
+                this.maxEdits = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the minimal number of characters that must match exactly
+        /// </summary>
+        public virtual int MinPrefix
+        {
+            get
+            {
+                return minPrefix;
+            }
+            set
+            {
+                this.minPrefix = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the maximum number of top-N inspections per suggestion
+        /// </summary>
+        public virtual int MaxInspections
+        {
+            get
+            {
+                return maxInspections;
+            }
+            set
+            {
+                this.maxInspections = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the minimal accuracy from the StringDistance for a match
+        /// </summary>
+        public virtual float Accuracy
+        {
+            get
+            {
+                return accuracy;
+            }
+            set
+            {
+                this.accuracy = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the minimal threshold of documents a term must appear for a match
+        /// </summary>
+        public virtual float ThresholdFrequency
+        {
+            get
+            {
+                return thresholdFrequency;
+            }
+            set
+            {
+                if (value >= 1f && value != (int)value)
+                {
+                    throw new System.ArgumentException("Fractional absolute document frequencies are not allowed");
+                }
+                this.thresholdFrequency = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the minimum length of a query term needed to return suggestions </summary>
+        public virtual int MinQueryLength
+        {
+            get
+            {
+                return minQueryLength;
+            }
+            set
+            {
+                this.minQueryLength = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the maximum threshold of documents a query term can appear in order
+        /// to provide suggestions.
+        /// </summary>
+        public virtual float MaxQueryFrequency
+        {
+            get
+            {
+                return maxQueryFrequency;
+            }
+            set
+            {
+                if (value >= 1f && value != (int)value)
+                {
+                    throw new System.ArgumentException("Fractional absolute document frequencies are not allowed");
+                }
+                this.maxQueryFrequency = value;
+            }
+        }
+
+
+        /// <summary>
+        /// true if the spellchecker should lowercase terms </summary>
+        public virtual bool LowerCaseTerms
+        {
+            get
+            {
+                return lowerCaseTerms;
+            }
+            set
+            {
+                this.lowerCaseTerms = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the current comparator in use.
+        /// </summary>
+        public virtual IComparer<SuggestWord> Comparator
+        {
+            get
+            {
+                return comparator;
+            }
+            set
+            {
+                this.comparator = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Get the string distance metric in use.
+        /// </summary>
+        public virtual StringDistance Distance
+        {
+            get
+            {
+                return distance;
+            }
+            set
+            {
+                this.distance = value;
+            }
+        }
+
+
+        /// <summary>
+        /// Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode) 
+        ///       suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)}
+        /// </summary>
+        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir)
+        {
+            return SuggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
+        }
+
+        /// <summary>
+        /// Calls {@link #suggestSimilar(Term, int, IndexReader, SuggestMode, float) 
+        ///       suggestSimilar(term, numSug, ir, suggestMode, this.accuracy)}
+        /// 
+        /// </summary>
+        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir, SuggestMode suggestMode)
+        {
+            return SuggestSimilar(term, numSug, ir, suggestMode, this.accuracy);
+        }
+
+        /// <summary>
+        /// Suggest similar words.
+        /// 
+        /// <para>Unlike <seealso cref="SpellChecker"/>, the similarity used to fetch the most
+        /// relevant terms is an edit distance, therefore typically a low value
+        /// for numSug will work very well.
+        /// 
+        /// </para>
+        /// </summary>
+        /// <param name="term"> Term you want to spell check on </param>
+        /// <param name="numSug"> the maximum number of suggested words </param>
+        /// <param name="ir"> IndexReader to find terms from </param>
+        /// <param name="suggestMode"> specifies when to return suggested words </param>
+        /// <param name="accuracy"> return only suggested words that match with this similarity </param>
+        /// <returns> sorted list of the suggested words according to the comparator </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public virtual SuggestWord[] SuggestSimilar(Term term, int numSug, IndexReader ir, SuggestMode suggestMode, float accuracy)
+        {
+            CharsRef spare = new CharsRef();
+            string text = term.Text();
+            if (minQueryLength > 0 && text.CodePointCount(0, text.Length) < minQueryLength)
+            {
+                return new SuggestWord[0];
+            }
+
+            if (lowerCaseTerms)
+            {
+                term = new Term(term.Field(), text.ToLower(Locale.ROOT));
+            }
+
+            int docfreq = ir.DocFreq(term);
+
+            if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0)
+            {
+                return new SuggestWord[0];
+            }
+
+            int maxDoc = ir.MaxDoc();
+
+            if (maxQueryFrequency >= 1f && docfreq > maxQueryFrequency)
+            {
+                return new SuggestWord[0];
+            }
+            else if (docfreq > (int)Math.Ceiling(maxQueryFrequency * (float)maxDoc))
+            {
+                return new SuggestWord[0];
+            }
+
+            if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR)
+            {
+                docfreq = 0;
+            }
+
+            if (thresholdFrequency >= 1f)
+            {
+                docfreq = Math.Max(docfreq, (int)thresholdFrequency);
+            }
+            else if (thresholdFrequency > 0f)
+            {
+                docfreq = Math.Max(docfreq, (int)(thresholdFrequency * (float)maxDoc) - 1);
+            }
+
+            ICollection<ScoreTerm> terms = null;
+            int inspections = numSug * maxInspections;
+
+            // try ed=1 first, in case we get lucky
+            terms = suggestSimilar(term, inspections, ir, docfreq, 1, accuracy, spare);
+            if (maxEdits > 1 && terms.Count < inspections)
+            {
+                var moreTerms = new HashSet<ScoreTerm>();
+                moreTerms.AddAll(terms);
+                moreTerms.AddAll(suggestSimilar(term, inspections, ir, docfreq, maxEdits, accuracy, spare));
+                terms = moreTerms;
+            }
+
+            // create the suggestword response, sort it, and trim it to size.
+
+            var suggestions = new SuggestWord[terms.Count];
+            int index = suggestions.Length - 1;
+            foreach (ScoreTerm s in terms)
+            {
+                SuggestWord suggestion = new SuggestWord();
+                if (s.termAsString == null)
+                {
+                    UnicodeUtil.UTF8toUTF16(s.term, spare);
+                    s.termAsString = spare.ToString();
+                }
+                suggestion.@string = s.termAsString;
+                suggestion.score = s.score;
+                suggestion.freq = s.docfreq;
+                suggestions[index--] = suggestion;
+            }
+
+            ArrayUtil.TimSort(suggestions, Collections.ReverseOrder(comparator));
+            if (numSug < suggestions.Length)
+            {
+                SuggestWord[] trimmed = new SuggestWord[numSug];
+                Array.Copy(suggestions, 0, trimmed, 0, numSug);
+                suggestions = trimmed;
+            }
+            return suggestions;
+        }
+
+        /// <summary>
+        /// Provide spelling corrections based on several parameters.
+        /// </summary>
+        /// <param name="term"> The term to suggest spelling corrections for </param>
+        /// <param name="numSug"> The maximum number of spelling corrections </param>
+        /// <param name="ir"> The index reader to fetch the candidate spelling corrections from </param>
+        /// <param name="docfreq"> The minimum document frequency a potential suggestion need to have in order to be included </param>
+        /// <param name="editDistance"> The maximum edit distance candidates are allowed to have </param>
+        /// <param name="accuracy"> The minimum accuracy a suggested spelling correction needs to have in order to be included </param>
+        /// <param name="spare"> a chars scratch </param>
+        /// <returns> a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order. </returns>
+        /// <exception cref="IOException"> If I/O related errors occur </exception>
+        protected internal virtual ICollection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance, float accuracy, CharsRef spare)
+        {
+
+            var atts = new AttributeSource();
+            MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.AddAttribute<MaxNonCompetitiveBoostAttribute>();
+            Terms terms = MultiFields.GetTerms(ir, term.Field());
+            if (terms == null)
+            {
+                return Enumerable.Empty<ScoreDoc>();
+            }
+            FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.Max(minPrefix, editDistance - 1), true);
+
+            var stQueue = new PriorityQueue<ScoreTerm>();
+
+            BytesRef queryTerm = new BytesRef(term.Text());
+            BytesRef candidateTerm;
+            ScoreTerm st = new ScoreTerm();
+            BoostAttribute boostAtt = e.Attributes().AddAttribute<BoostAttribute>();
+            while ((candidateTerm = e.Next()) != null)
+            {
+                float boost = boostAtt.Boost;
+                // ignore uncompetitive hits
+                if (stQueue.Size() >= numSug && boost <= stQueue.Peek().boost)
+                {
+                    continue;
+                }
+
+                // ignore exact match of the same term
+                if (queryTerm.BytesEquals(candidateTerm))
+                {
+                    continue;
+                }
+
+                int df = e.DocFreq();
+
+                // check docFreq if required
+                if (df <= docfreq)
+                {
+                    continue;
+                }
+
+                float score;
+                string termAsString;
+                if (distance == INTERNAL_LEVENSHTEIN)
+                {
+                    // delay creating strings until the end
+                    termAsString = null;
+                    // undo FuzzyTermsEnum's scale factor for a real scaled lev score
+                    score = boost / e.ScaleFactor + e.MinSimilarity;
+                }
+                else
+                {
+                    UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
+                    termAsString = spare.ToString();
+                    score = distance.GetDistance(term.Text(), termAsString);
+                }
+
+                if (score < accuracy)
+                {
+                    continue;
+                }
+
+                // add new entry in PQ
+                st.term = BytesRef.DeepCopyOf(candidateTerm);
+                st.boost = boost;
+                st.docfreq = df;
+                st.termAsString = termAsString;
+                st.score = score;
+                stQueue.Offer(st);
+                // possibly drop entries from queue
+                st = (stQueue.Size() > numSug) ? stQueue.Poll() : new ScoreTerm();
+                maxBoostAtt.MaxNonCompetitiveBoost = (stQueue.Size() >= numSug) ? stQueue.Peek().boost : float.NegativeInfinity;
+            }
+
+            return stQueue;
+        }
+
+        /// <summary>
+        /// Holds a spelling correction for internal usage inside <seealso cref="DirectSpellChecker"/>.
+        /// </summary>
+        protected internal class ScoreTerm : IComparable<ScoreTerm>
+        {
+
+            /// <summary>
+            /// The actual spellcheck correction.
+            /// </summary>
+            public BytesRef term;
+
+            /// <summary>
+            /// The boost representing the similarity from the FuzzyTermsEnum (internal similarity score)
+            /// </summary>
+            public float boost;
+
+            /// <summary>
+            /// The df of the spellcheck correction.
+            /// </summary>
+            public int docfreq;
+
+            /// <summary>
+            /// The spellcheck correction represented as string, can be <code>null</code>.
+            /// </summary>
+            public string termAsString;
+
+            /// <summary>
+            /// The similarity score.
+            /// </summary>
+            public float score;
+
+            /// <summary>
+            /// Constructor.
+            /// </summary>
+            public ScoreTerm()
+            {
+            }
+
+            public virtual int CompareTo(ScoreTerm other)
+            {
+                if (term.BytesEquals(other.term))
+                {
+                    return 0; // consistent with equals
+                }
+                if (this.boost == other.boost)
+                {
+                    return other.term.CompareTo(this.term);
+                }
+                else
+                {
+                    return this.boost.CompareTo(other.boost);
+                }
+            }
+
+            public override int GetHashCode()
+            {
+                const int prime = 31;
+                int result = 1;
+                result = prime * result + ((term == null) ? 0 : term.GetHashCode());
+                return result;
+            }
+
+            public override bool Equals(object obj)
+            {
+                if (this == obj)
+                {
+                    return true;
+                }
+                if (obj == null)
+                {
+                    return false;
+                }
+                if (this.GetType() != obj.GetType())
+                {
+                    return false;
+                }
+                ScoreTerm other = (ScoreTerm)obj;
+                if (term == null)
+                {
+                    if (other.term != null)
+                    {
+                        return false;
+                    }
+                }
+                else if (!term.BytesEquals(other.term))
+                {
+                    return false;
+                }
+                return true;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs b/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs
new file mode 100644
index 0000000..9a185cb
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/HighFrequencyDictionary.cs
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Search.Suggest;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Spell
+{
+    /// <summary>
+    /// HighFrequencyDictionary: terms taken from the given field
+    /// of a Lucene index, which appear in a number of documents
+    /// above a given threshold.
+    /// 
+    /// Threshold is a value in [0..1] representing the minimum
+    /// number of documents (of the total) where a term should appear.
+    /// 
+    /// Based on LuceneDictionary.
+    /// </summary>
+    public class HighFrequencyDictionary : Dictionary
+    {
+        private IndexReader reader;
+        private string field;
+        private float thresh;
+
+        /// <summary>
+        /// Creates a new Dictionary, pulling source terms from
+        /// the specified <code>field</code> in the provided <code>reader</code>.
+        /// <para>
+        /// Terms appearing in less than <code>thresh</code> percentage of documents
+        /// will be excluded.
+        /// </para>
+        /// </summary>
+        public HighFrequencyDictionary(IndexReader reader, string field, float thresh)
+        {
+            this.reader = reader;
+            this.field = field;
+            this.thresh = thresh;
+        }
+
+        public InputIterator EntryIterator
+        {
+            get
+            {
+                return new HighFrequencyIterator(this);
+            }
+        }
+
+        internal sealed class HighFrequencyIterator : InputIterator
+        {
+            private readonly HighFrequencyDictionary outerInstance;
+
+            internal readonly BytesRef spare = new BytesRef();
+            internal readonly TermsEnum termsEnum;
+            internal int minNumDocs;
+            internal long freq;
+
+            internal HighFrequencyIterator(HighFrequencyDictionary outerInstance)
+            {
+                this.outerInstance = outerInstance;
+                Terms terms = MultiFields.GetTerms(outerInstance.reader, outerInstance.field);
+                if (terms != null)
+                {
+                    termsEnum = terms.Iterator(null);
+                }
+                else
+                {
+                    termsEnum = null;
+                }
+                minNumDocs = (int)(outerInstance.thresh * (float)outerInstance.reader.NumDocs());
+            }
+
+            internal bool IsFrequent(int freq)
+            {
+                return freq >= minNumDocs;
+            }
+
+            public long Weight
+            {
+                get { return freq; }
+            }
+
+            public BytesRef Next()
+            {
+                if (termsEnum != null)
+                {
+                    BytesRef next;
+                    while ((next = termsEnum.Next()) != null)
+                    {
+                        if (IsFrequent(termsEnum.DocFreq()))
+                        {
+                            freq = termsEnum.DocFreq();
+                            spare.CopyBytes(next);
+                            return spare;
+                        }
+                    }
+                }
+                return null;
+            }
+
+            public IComparer<BytesRef> Comparator
+            {
+                get
+                {
+                    if (termsEnum == null)
+                    {
+                        return null;
+                    }
+                    else
+                    {
+                        return termsEnum.Comparator;
+                    }
+                }
+            }
+
+            public BytesRef Payload
+            {
+                get { return null; }
+            }
+
+            public bool HasPayloads
+            {
+                get { return false; }
+            }
+
+            public HashSet<BytesRef> Contexts
+            {
+                get { return null; }
+            }
+
+            public bool HasContexts
+            {
+                get { return false; }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs b/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs
new file mode 100644
index 0000000..56e4f4a
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/JaroWinklerDistance.cs
@@ -0,0 +1,173 @@
+using System;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Similarity measure for short strings such as person names.
+    /// <para>
+    /// </para>
+    /// </summary>
+    /// <seealso cref= <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance">http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a> </seealso>
+    public class JaroWinklerDistance : StringDistance
+    {
+
+        private float threshold = 0.7f;
+
+        /// <summary>
+        /// Creates a new distance metric with the default threshold
+        /// for the Jaro Winkler bonus (0.7) </summary>
+        /// <seealso cref= #setThreshold(float) </seealso>
+        public JaroWinklerDistance()
+        {
+        }
+
+        private int[] Matches(string s1, string s2)
+        {
+            string max, min;
+            if (s1.Length > s2.Length)
+            {
+                max = s1;
+                min = s2;
+            }
+            else
+            {
+                max = s2;
+                min = s1;
+            }
+            int range = Math.Max(max.Length / 2 - 1, 0);
+            int[] matchIndexes = new int[min.Length];
+            Arrays.Fill(matchIndexes, -1);
+            bool[] matchFlags = new bool[max.Length];
+            int matches = 0;
+            for (int mi = 0; mi < min.Length; mi++)
+            {
+                char c1 = min[mi];
+                for (int xi = Math.Max(mi - range, 0), xn = Math.Min(mi + range + 1, max.Length); xi < xn; xi++)
+                {
+                    if (!matchFlags[xi] && c1 == max[xi])
+                    {
+                        matchIndexes[mi] = xi;
+                        matchFlags[xi] = true;
+                        matches++;
+                        break;
+                    }
+                }
+            }
+            char[] ms1 = new char[matches];
+            char[] ms2 = new char[matches];
+            for (int i = 0, si = 0; i < min.Length; i++)
+            {
+                if (matchIndexes[i] != -1)
+                {
+                    ms1[si] = min[i];
+                    si++;
+                }
+            }
+            for (int i = 0, si = 0; i < max.Length; i++)
+            {
+                if (matchFlags[i])
+                {
+                    ms2[si] = max[i];
+                    si++;
+                }
+            }
+            int transpositions = 0;
+            for (int mi = 0; mi < ms1.Length; mi++)
+            {
+                if (ms1[mi] != ms2[mi])
+                {
+                    transpositions++;
+                }
+            }
+            int prefix = 0;
+            for (int mi = 0; mi < min.Length; mi++)
+            {
+                if (s1[mi] == s2[mi])
+                {
+                    prefix++;
+                }
+                else
+                {
+                    break;
+                }
+            }
+            return new int[] { matches, transpositions / 2, prefix, max.Length };
+        }
+
+        public virtual float GetDistance(string s1, string s2)
+        {
+            int[] mtp = Matches(s1, s2);
+            float m = mtp[0];
+            if (m == 0)
+            {
+                return 0f;
+            }
+            float j = ((m / s1.Length + m / s2.Length + (m - mtp[1]) / m)) / 3;
+            float jw = j < Threshold ? j : j + Math.Min(0.1f, 1f / mtp[3]) * mtp[2] * (1 - j);
+            return jw;
+        }
+
+        /// <summary>
+        /// Sets the threshold used to determine when Winkler bonus should be used.
+        /// Set to a negative value to get the Jaro distance. </summary>
+        /// <param name="threshold"> the new value of the threshold </param>
+        public virtual float Threshold
+        {
+            set
+            {
+                this.threshold = value;
+            }
+            get
+            {
+                return threshold;
+            }
+        }
+
+
+        public override int GetHashCode()
+        {
+            return 113 * Number.FloatToIntBits(threshold) * this.GetType().GetHashCode();
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+            {
+                return true;
+            }
+            if (null == obj || this.GetType() != obj.GetType())
+            {
+                return false;
+            }
+
+            JaroWinklerDistance o = (JaroWinklerDistance)obj;
+            return (Number.FloatToIntBits(o.threshold) == Number.FloatToIntBits(this.threshold));
+        }
+
+        public override string ToString()
+        {
+            return "jarowinkler(" + threshold + ")";
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs
new file mode 100644
index 0000000..1ce93ba
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/LevensteinDistance.cs
@@ -0,0 +1,144 @@
+using System;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Levenstein edit distance class.
+    /// </summary>
+    public sealed class LevensteinDistance : StringDistance
+    {
+
+        /// <summary>
+        /// Optimized to run a bit faster than the static getDistance().
+        /// In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster.
+        /// </summary>
+        public LevensteinDistance()
+        {
+        }
+
+
+        //*****************************
+        // Compute Levenshtein distance: see org.apache.commons.lang.StringUtils#getLevenshteinDistance(String, String)
+        //*****************************
+        public float GetDistance(string target, string other)
+        {
+            char[] sa;
+            int n;
+            int[] p; //'previous' cost array, horizontally
+            int[] d; // cost array, horizontally
+            int[] _d; //placeholder to assist in swapping p and d
+
+            /*
+               The difference between this impl. and the previous is that, rather
+               than creating and retaining a matrix of size s.length()+1 by t.length()+1,
+               we maintain two single-dimensional arrays of length s.length()+1.  The first, d,
+               is the 'current working' distance array that maintains the newest distance cost
+               counts as we iterate through the characters of String s.  Each time we increment
+               the index of String t we are comparing, d is copied to p, the second int[].  Doing so
+               allows us to retain the previous cost counts as required by the algorithm (taking
+               the minimum of the cost count to the left, up one, and diagonally up and to the left
+               of the current cost count being calculated).  (Note that the arrays aren't really
+               copied anymore, just switched...this is clearly much better than cloning an array
+               or doing a System.arraycopy() each time  through the outer loop.)
+	
+               Effectively, the difference between the two implementations is this one does not
+               cause an out of memory condition when calculating the LD over two very large strings.
+             */
+
+            sa = target.ToCharArray();
+            n = sa.Length;
+            p = new int[n + 1];
+            d = new int[n + 1];
+
+            int m = other.Length;
+            if (n == 0 || m == 0)
+            {
+                if (n == m)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return 0;
+                }
+            }
+
+
+            // indexes into strings s and t
+            int i; // iterates through s
+            int j; // iterates through t
+
+            char t_j; // jth character of t
+
+            int cost; // cost
+
+            for (i = 0; i <= n; i++)
+            {
+                p[i] = i;
+            }
+
+            for (j = 1; j <= m; j++)
+            {
+                t_j = other[j - 1];
+                d[0] = j;
+
+                for (i = 1; i <= n; i++)
+                {
+                    cost = sa[i - 1] == t_j ? 0 : 1;
+                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
+                    d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
+                }
+
+                // copy current distance counts to 'previous row' distance counts
+                _d = p;
+                p = d;
+                d = _d;
+            }
+
+            // our last action in the above loop was to switch d and p, so p now
+            // actually has the most recent cost counts
+            return 1.0f - ((float)p[n] / Math.Max(other.Length, sa.Length));
+        }
+
+        public override int GetHashCode()
+        {
+            return 163 * this.GetType().GetHashCode();
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+            {
+                return true;
+            }
+            if (null == obj)
+            {
+                return false;
+            }
+            return (this.GetType() == obj.GetType());
+        }
+
+        public override string ToString()
+        {
+            return "levenstein";
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/LuceneDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/LuceneDictionary.cs b/src/Lucene.Net.Suggest/Spell/LuceneDictionary.cs
new file mode 100644
index 0000000..e781152
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/LuceneDictionary.cs
@@ -0,0 +1,58 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search.Suggest;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Lucene Dictionary: terms taken from the given field
+    /// of a Lucene index.
+    /// </summary>
+    public class LuceneDictionary : Dictionary
+    {
+        private IndexReader reader;
+        private string field;
+
+        /// <summary>
+        /// Creates a new Dictionary, pulling source terms from
+        /// the specified <code>field</code> in the provided <code>reader</code>
+        /// </summary>
+        public LuceneDictionary(IndexReader reader, string field)
+        {
+            this.reader = reader;
+            this.field = field;
+        }
+
+        public InputIterator EntryIterator
+        {
+            get
+            {
+                Terms terms = MultiFields.GetTerms(reader, field);
+                if (terms != null)
+                {
+                    return new InputIteratorWrapper(terms.Iterator(null));
+                }
+                else
+                {
+                    return EmptyInputIterator.Instance;
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
new file mode 100644
index 0000000..ebf0738
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/LuceneLevenshteinDistance.cs
@@ -0,0 +1,136 @@
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    ///  Damerau-Levenshtein (optimal string alignment) implemented in a consistent 
+    ///  way as Lucene's FuzzyTermsEnum with the transpositions option enabled.
+    ///  
+    ///  Notes:
+    ///  <ul>
+    ///    <li> This metric treats full unicode codepoints as characters
+    ///    <li> This metric scales raw edit distances into a floating point score
+    ///         based upon the shortest of the two terms
+    ///    <li> Transpositions of two adjacent codepoints are treated as primitive 
+    ///         edits.
+    ///    <li> Edits are applied in parallel: for example, "ab" and "bca" have 
+    ///         distance 3.
+    ///  </ul>
+    ///  
+    ///  NOTE: this class is not particularly efficient. It is only intended
+    ///  for merging results from multiple DirectSpellCheckers.
+    /// </summary>
+    public sealed class LuceneLevenshteinDistance : StringDistance
+    {
+
+        /// <summary>
+        /// Creates a new comparator, mimicing the behavior of Lucene's internal
+        /// edit distance.
+        /// </summary>
+        public LuceneLevenshteinDistance()
+        {
+        }
+
+        public float getDistance(string target, string other)
+        {
+            IntsRef targetPoints;
+            IntsRef otherPoints;
+            int n;
+            int[][] d; // cost array
+
+            // NOTE: if we cared, we could 3*m space instead of m*n space, similar to 
+            // what LevenshteinDistance does, except cycling thru a ring of three 
+            // horizontal cost arrays... but this comparator is never actually used by 
+            // DirectSpellChecker, its only used for merging results from multiple shards 
+            // in "distributed spellcheck", and its inefficient in other ways too...
+
+            // cheaper to do this up front once
+            targetPoints = toIntsRef(target);
+            otherPoints = toIntsRef(other);
+            n = targetPoints.Length;
+            int m = otherPoints.Length;
+
+            //TODO The following call to the 'RectangularArrays' helper class reproduces the rectangular array initialization that is automatic in Java: (ORIGINAL LINE: d = new int[n+1][m+1];)
+            d = RectangularArrays.ReturnRectangularIntArray(n + 1, m + 1);
+
+            if (n == 0 || m == 0)
+            {
+                if (n == m)
+                {
+                    return 0;
+                }
+                else
+                {
+                    return Math.Max(n, m);
+                }
+            }
+
+            // indexes into strings s and t
+            int i; // iterates through s
+            int j; // iterates through t
+
+            int t_j; // jth character of t
+
+            int cost; // cost
+
+            for (i = 0; i <= n; i++)
+            {
+                d[i][0] = i;
+            }
+
+            for (j = 0; j <= m; j++)
+            {
+                d[0][j] = j;
+            }
+
+            for (j = 1; j <= m; j++)
+            {
+                t_j = otherPoints.ints[j - 1];
+
+                for (i = 1; i <= n; i++)
+                {
+                    cost = targetPoints.ints[i - 1] == t_j ? 0 : 1;
+                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
+                    d[i][j] = Math.Min(Math.Min(d[i - 1][j] + 1, d[i][j - 1] + 1), d[i - 1][j - 1] + cost);
+                    // transposition
+                    if (i > 1 && j > 1 && targetPoints.ints[i - 1] == otherPoints.ints[j - 2] && targetPoints.ints[i - 2] == otherPoints.ints[j - 1])
+                    {
+                        d[i][j] = Math.Min(d[i][j], d[i - 2][j - 2] + cost);
+                    }
+                }
+            }
+
+            return 1.0f - ((float)d[n][m] / Math.Min(m, n));
+        }
+
+        private static IntsRef toIntsRef(string s)
+        {
+            IntsRef @ref = new IntsRef(s.Length); // worst case
+            int utf16Len = s.Length;
+            for (int i = 0, cp = 0; i < utf16Len; i += char.charCount(cp))
+            {
+                cp = @ref.ints[@ref.length++] = char.codePointAt(s, i);
+            }
+            return @ref;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/NGramDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/NGramDistance.cs b/src/Lucene.Net.Suggest/Spell/NGramDistance.cs
new file mode 100644
index 0000000..461ea1f
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/NGramDistance.cs
@@ -0,0 +1,195 @@
+using System;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /// <summary>
+    /// Licensed to the Apache Software Foundation (ASF) under one or more
+    /// contributor license agreements.  See the NOTICE file distributed with
+    /// this work for additional information regarding copyright ownership.
+    /// The ASF licenses this file to You under the Apache License, Version 2.0
+    /// (the "License"); you may not use this file except in compliance with
+    /// the License.  You may obtain a copy of the License at
+    /// 
+    ///     http://www.apache.org/licenses/LICENSE-2.0
+    /// 
+    /// Unless required by applicable law or agreed to in writing, software
+    /// distributed under the License is distributed on an "AS IS" BASIS,
+    /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    /// See the License for the specific language governing permissions and
+    /// limitations under the License.
+    /// </summary>
+
+    /// <summary>
+    /// N-Gram version of edit distance based on paper by Grzegorz Kondrak, 
+    /// "N-gram similarity and distance". Proceedings of the Twelfth International 
+    /// Conference on String Processing and Information Retrieval (SPIRE 2005), pp. 115-126, 
+    /// Buenos Aires, Argentina, November 2005. 
+    /// http://www.cs.ualberta.ca/~kondrak/papers/spire05.pdf
+    /// 
+    /// This implementation uses the position-based optimization to compute partial
+    /// matches of n-gram sub-strings and adds a null-character prefix of size n-1 
+    /// so that the first character is contained in the same number of n-grams as 
+    /// a middle character.  Null-character prefix matches are discounted so that 
+    /// strings with no matching characters will return a distance of 0.
+    /// 
+    /// </summary>
+    public class NGramDistance : StringDistance
+    {
+
+        private int n;
+
+        /// <summary>
+        /// Creates an N-Gram distance measure using n-grams of the specified size. </summary>
+        /// <param name="size"> The size of the n-gram to be used to compute the string distance. </param>
+        public NGramDistance(int size)
+        {
+            this.n = size;
+        }
+
+        /// <summary>
+        /// Creates an N-Gram distance measure using n-grams of size 2.
+        /// </summary>
+        public NGramDistance()
+            : this(2)
+        {
+        }
+
+        public virtual float GetDistance(string source, string target)
+        {
+            int sl = source.Length;
+            int tl = target.Length;
+
+            if (sl == 0 || tl == 0)
+            {
+                if (sl == tl)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return 0;
+                }
+            }
+
+            int cost = 0;
+            if (sl < n || tl < n)
+            {
+                for (int i = 0, ni = Math.Min(sl, tl); i < ni; i++)
+                {
+                    if (source[i] == target[i])
+                    {
+                        cost++;
+                    }
+                }
+                return (float)cost / Math.Max(sl, tl);
+            }
+
+            char[] sa = new char[sl + n - 1];
+            float[] p; //'previous' cost array, horizontally
+            float[] d; // cost array, horizontally
+            float[] _d; //placeholder to assist in swapping p and d
+
+            //construct sa with prefix
+            for (int i = 0; i < sa.Length; i++)
+            {
+                if (i < n - 1)
+                {
+                    sa[i] = (char)0; //add prefix
+                }
+                else
+                {
+                    sa[i] = source[i - n + 1];
+                }
+            }
+            p = new float[sl + 1];
+            d = new float[sl + 1];
+
+            // indexes into strings s and t
+            int i; // iterates through source
+            int j; // iterates through target
+
+            char[] t_j = new char[n]; // jth n-gram of t
+
+            for (i = 0; i <= sl; i++)
+            {
+                p[i] = i;
+            }
+
+            for (j = 1; j <= tl; j++)
+            {
+                //construct t_j n-gram 
+                if (j < n)
+                {
+                    for (int ti = 0; ti < n - j; ti++)
+                    {
+                        t_j[ti] = (char)0; //add prefix
+                    }
+                    for (int ti = n - j; ti < n; ti++)
+                    {
+                        t_j[ti] = target[ti - (n - j)];
+                    }
+                }
+                else
+                {
+                    t_j = StringHelperClass.SubstringSpecial(target, j - n, j).ToCharArray();
+                }
+                d[0] = j;
+                for (i = 1; i <= sl; i++)
+                {
+                    cost = 0;
+                    int tn = n;
+                    //compare sa to t_j
+                    for (int ni = 0; ni < n; ni++)
+                    {
+                        if (sa[i - 1 + ni] != t_j[ni])
+                        {
+                            cost++;
+                        }
+                        else if (sa[i - 1 + ni] == 0) //discount matches on prefix
+                        {
+                            tn--;
+                        }
+                    }
+                    float ec = (float)cost / tn;
+                    // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
+                    d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
+                }
+                // copy current distance counts to 'previous row' distance counts
+                _d = p;
+                p = d;
+                d = _d;
+            }
+
+            // our last action in the above loop was to switch d and p, so p now
+            // actually has the most recent cost counts
+            return 1.0f - (p[sl] / Math.Max(tl, sl));
+        }
+
+        public override int GetHashCode()
+        {
+            return 1427 * n * this.GetType().GetHashCode();
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+            {
+                return true;
+            }
+            if (null == obj || this.GetType() != obj.GetType())
+            {
+                return false;
+            }
+
+            var o = (NGramDistance)obj;
+            return o.n == this.n;
+        }
+
+        public override string ToString()
+        {
+            return "ngram(" + n + ")";
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs
new file mode 100644
index 0000000..312c410
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs
@@ -0,0 +1,134 @@
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Search.Suggest;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Dictionary represented by a text file.
+    /// 
+    /// <p/>Format allowed: 1 word per line:<br/>
+    /// word1<br/>
+    /// word2<br/>
+    /// word3<br/>
+    /// </summary>
+    public class PlainTextDictionary : Dictionary
+    {
+
+        private BufferedReader @in;
+
+        /// <summary>
+        /// Creates a dictionary based on a File.
+        /// <para>
+        /// NOTE: content is treated as UTF-8
+        /// </para>
+        /// </summary>
+        public PlainTextDictionary(File file)
+        {
+            @in = new BufferedReader(IOUtils.getDecodingReader(file, StandardCharsets.UTF_8));
+        }
+
+        /// <summary>
+        /// Creates a dictionary based on an inputstream.
+        /// <para>
+        /// NOTE: content is treated as UTF-8
+        /// </para>
+        /// </summary>
+        public PlainTextDictionary(InputStream dictFile)
+        {
+            @in = new BufferedReader(IOUtils.getDecodingReader(dictFile, StandardCharsets.UTF_8));
+        }
+
+        /// <summary>
+        /// Creates a dictionary based on a reader.
+        /// </summary>
+        public PlainTextDictionary(Reader reader)
+        {
+            @in = new BufferedReader(reader);
+        }
+
+        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+        //ORIGINAL LINE: @Override public org.apache.lucene.search.suggest.InputIterator getEntryIterator() throws IOException
+        public virtual InputIterator EntryIterator
+        {
+            get
+            {
+                return new InputIteratorWrapper(new FileIterator(this));
+            }
+        }
+
+        internal sealed class FileIterator : BytesRefIterator
+        {
+            private readonly PlainTextDictionary outerInstance;
+
+            public FileIterator(PlainTextDictionary outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            internal bool done = false;
+            internal readonly BytesRef spare = new BytesRef();
+            //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+            //ORIGINAL LINE: @Override public org.apache.lucene.util.BytesRef next() throws IOException
+            public BytesRef Next()
+            {
+                if (done)
+                {
+                    return null;
+                }
+                bool success = false;
+                BytesRef result;
+                try
+                {
+                    string line;
+                    if ((line = outerInstance.@in.ReadLine()) != null)
+                    {
+                        spare.CopyChars(line);
+                        result = spare;
+                    }
+                    else
+                    {
+                        done = true;
+                        IOUtils.Close(outerInstance.@in);
+                        result = null;
+                    }
+                    success = true;
+                }
+                finally
+                {
+                    if (!success)
+                    {
+                        IOUtils.CloseWhileHandlingException(outerInstance.@in);
+                    }
+                }
+                return result;
+            }
+
+            public IComparer<BytesRef> Comparator
+            {
+                get
+                {
+                    return null;
+                }
+            }
+        }
+    }
+}
\ No newline at end of file


Mime
View raw message