lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [6/8] Porting Lucene.Net.Suggest (still not compiling)
Date Mon, 15 Sep 2014 22:24:53 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SpellChecker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SpellChecker.cs b/src/Lucene.Net.Suggest/Spell/SpellChecker.cs
new file mode 100644
index 0000000..82f9810
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SpellChecker.cs
@@ -0,0 +1,748 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Version = Lucene.Net.Util.Version;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <para>
+    ///   Spell Checker class  (Main class) <br/>
+    ///  (initially inspired by the David Spencer code).
+    /// </para>
+    /// 
+    /// <para>Example Usage:
+    /// 
+    /// <pre class="prettyprint">
+    ///  SpellChecker spellchecker = new SpellChecker(spellIndexDirectory);
+    ///  // To index a field of a user index:
+    ///  spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
+    ///  // To index a file containing words:
+    ///  spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt")));
+    ///  String[] suggestions = spellchecker.suggestSimilar("misspelt", 5);
+    /// </pre>
+    /// 
+    /// 
+    /// </para>
+    /// </summary>
+    public class SpellChecker : IDisposable
+    {
+
+        /// <summary>
+        /// The default minimum score to use, if not specified by calling <seealso cref="#setAccuracy(float)"/> .
+        /// </summary>
+        public const float DEFAULT_ACCURACY = 0.5f;
+
+        /// <summary>
+        /// Field name for each word in the ngram index.
+        /// </summary>
+        public const string F_WORD = "word";
+
+        /// <summary>
+        /// the spell index
+        /// </summary>
+        // don't modify the directory directly - see #swapSearcher()
+        // TODO: why is this package private?
+        internal Directory spellIndex;
+        /// <summary>
+        /// Boost value for start and end grams
+        /// </summary>
+        private float bStart = 2.0f;
+
+        private float bEnd = 1.0f;
+        // don't use this searcher directly - see #swapSearcher()
+
+        private IndexSearcher searcher;
+        /*
+         * this locks all modifications to the current searcher.
+         */
+
+        private readonly object searcherLock = new object();
+        /*
+         * this lock synchronizes all possible modifications to the
+         * current index directory. It should not be possible to try modifying
+         * the same index concurrently. Note: Do not acquire the searcher lock
+         * before acquiring this lock!
+         */
+        private readonly object modifyCurrentIndexLock = new object();
+
+        private volatile bool closed = false;
+        // minimum score for hits generated by the spell checker query
+
+        private float accuracy = DEFAULT_ACCURACY;
+
+        private StringDistance sd;
+        private IComparer<SuggestWord> comparator;
+
+        /// <summary>
+        /// Use the given directory as a spell checker index. The directory
+        /// is created if it doesn't exist yet. </summary>
+        /// <param name="spellIndex"> the spell index directory </param>
+        /// <param name="sd"> the <seealso cref="StringDistance"/> measurement to use </param>
+        /// <exception cref="IOException"> if Spellchecker can not open the directory </exception>
+        public SpellChecker(Directory spellIndex, StringDistance sd)
+            : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR)
+        {
+        }
+        /// <summary>
+        /// Use the given directory as a spell checker index with a
+        /// <seealso cref="LevensteinDistance"/> as the default <seealso cref="StringDistance"/>. The
+        /// directory is created if it doesn't exist yet.
+        /// </summary>
+        /// <param name="spellIndex">
+        ///          the spell index directory </param>
+        /// <exception cref="IOException">
+        ///           if spellchecker can not open the directory </exception>
+        public SpellChecker(Directory spellIndex)
+            : this(spellIndex, new LevensteinDistance())
+        {
+        }
+
+        /// <summary>
+        /// Use the given directory as a spell checker index with the given <seealso cref="Lucene.Net.Search.Spell.StringDistance"/> measure
+        /// and the given <seealso cref="java.util.Comparator"/> for sorting the results. </summary>
+        /// <param name="spellIndex"> The spelling index </param>
+        /// <param name="sd"> The distance </param>
+        /// <param name="comparator"> The comparator </param>
+        /// <exception cref="IOException"> if there is a problem opening the index </exception>
+        public SpellChecker(Directory spellIndex, StringDistance sd, IComparer<SuggestWord> comparator)
+        {
+            SpellIndex = spellIndex;
+            StringDistance = sd;
+            this.comparator = comparator;
+        }
+
+        /// <summary>
+        /// Use a different index as the spell checker index or re-open
+        /// the existing index if <code>spellIndex</code> is the same value
+        /// as given in the constructor. </summary>
+        /// <param name="spellIndexDir"> the spell directory to use </param>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <exception cref="System.IO.IOException"> if spellchecker can not open the directory </exception>
+        // TODO: we should make this final as it is called in the constructor
+        public virtual Directory SpellIndex
+        {
+            set
+            {
+                // this could be the same directory as the current spellIndex
+                // modifications to the directory should be synchronized 
+                lock (modifyCurrentIndexLock)
+                {
+                    EnsureOpen();
+                    if (!DirectoryReader.IndexExists(value))
+                    {
+                        using (var writer = new IndexWriter(value, new IndexWriterConfig(Version.LUCENE_CURRENT, null)))
+                        {
+                        }
+                    }
+                    SwapSearcher(value);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Sets the <seealso cref="java.util.Comparator"/> for the <seealso cref="SuggestWordQueue"/>. </summary>
+        /// <param name="comparator"> the comparator </param>
+        public virtual IComparer<SuggestWord> Comparator
+        {
+            set
+            {
+                this.comparator = value;
+            }
+            get
+            {
+                return comparator;
+            }
+        }
+
+
+        /// <summary>
+        /// Sets the <seealso cref="StringDistance"/> implementation for this
+        /// <seealso cref="SpellChecker"/> instance.
+        /// </summary>
+        /// <param name="sd"> the <seealso cref="StringDistance"/> implementation for this
+        /// <seealso cref="SpellChecker"/> instance </param>
+        public virtual StringDistance StringDistance
+        {
+            set
+            {
+                this.sd = value;
+            }
+            get
+            {
+                return sd;
+            }
+        }
+
+        /// <summary>
+        /// Sets the accuracy 0 &lt; minScore &lt; 1; default <seealso cref="#DEFAULT_ACCURACY"/> </summary>
+        /// <param name="acc"> The new accuracy </param>
+        public virtual float Accuracy
+        {
+            set
+            {
+                this.accuracy = value;
+            }
+            get
+            {
+                return accuracy;
+            }
+        }
+
+
+        /// <summary>
+        /// Suggest similar words.
+        /// 
+        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+        /// is not the same as the edit distance strategy used to calculate the best
+        /// matching spell-checked word from the hits that Lucene found, one usually has
+        /// to retrieve a couple of numSug's in order to get the true best match.
+        /// 
+        /// </para>
+        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
+        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
+        /// 
+        /// </para>
+        /// </summary>
+        /// <param name="word"> the word you want a spell check done on </param>
+        /// <param name="numSug"> the number of suggested words </param>
+        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <returns> String[]
+        /// </returns>
+        /// <seealso cref= #suggestSimilar(String, int, IndexReader, String, SuggestMode, float)  </seealso>
+        public virtual string[] SuggestSimilar(string word, int numSug)
+        {
+            return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
+        }
+
+        /// <summary>
+        /// Suggest similar words.
+        /// 
+        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+        /// is not the same as the edit distance strategy used to calculate the best
+        /// matching spell-checked word from the hits that Lucene found, one usually has
+        /// to retrieve a couple of numSug's in order to get the true best match.
+        /// 
+        /// </para>
+        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
+        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
+        /// 
+        /// </para>
+        /// </summary>
+        /// <param name="word"> the word you want a spell check done on </param>
+        /// <param name="numSug"> the number of suggested words </param>
+        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
+        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <returns> String[]
+        /// </returns>
+        /// <seealso cref= #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) </seealso>
+        public virtual string[] SuggestSimilar(string word, int numSug, float accuracy)
+        {
+            return this.SuggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
+        }
+
+        /// <summary>
+        /// Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) 
+        ///       suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)}
+        /// 
+        /// </summary>
+        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode)
+        {
+            return SuggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy);
+        }
+
+        /// <summary>
+        /// Suggest similar words (optionally restricted to a field of an index).
+        /// 
+        /// <para>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+        /// is not the same as the edit distance strategy used to calculate the best
+        /// matching spell-checked word from the hits that Lucene found, one usually has
+        /// to retrieve a couple of numSug's in order to get the true best match.
+        /// 
+        /// </para>
+        /// <para>I.e. if numSug == 1, don't count on that suggestion being the best one.
+        /// Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
+        /// 
+        /// </para>
+        /// </summary>
+        /// <param name="word"> the word you want a spell check done on </param>
+        /// <param name="numSug"> the number of suggested words </param>
+        /// <param name="ir"> the indexReader of the user index (can be null see field param) </param>
+        /// <param name="field"> the field of the user index: if field is not null, the suggested
+        /// words are restricted to the words present in this field. </param>
+        /// <param name="suggestMode"> 
+        /// (NOTE: if indexReader==null and/or field==null, then this is overridden with SuggestMode.SUGGEST_ALWAYS) </param>
+        /// <param name="accuracy"> The minimum score a suggestion must have in order to qualify for inclusion in the results </param>
+        /// <exception cref="IOException"> if the underlying index throws an <seealso cref="IOException"/> </exception>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <returns> String[] the sorted list of the suggest words with these 2 criteria:
+        /// first criteria: the edit distance, second criteria (only if restricted mode): the popularity
+        /// of the suggest words in the field of the user index
+        ///  </returns>
+        public virtual string[] SuggestSimilar(string word, int numSug, IndexReader ir, string field, SuggestMode suggestMode, float accuracy)
+        {
+            // obtainSearcher calls ensureOpen
+            IndexSearcher indexSearcher = ObtainSearcher();
+            try
+            {
+                if (ir == null || field == null)
+                {
+                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
+                }
+                if (suggestMode == SuggestMode.SUGGEST_ALWAYS)
+                {
+                    ir = null;
+                    field = null;
+                }
+
+                int lengthWord = word.Length;
+
+                int freq = (ir != null && field != null) ? ir.DocFreq(new Term(field, word)) : 0;
+                int goalFreq = suggestMode == SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
+                // if the word exists in the real index and we don't care for word frequency, return the word itself
+                if (suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0)
+                {
+                    return new string[] { word };
+                }
+
+                BooleanQuery query = new BooleanQuery();
+                string[] grams;
+                string key;
+
+                for (int ng = GetMin(lengthWord); ng <= GetMax(lengthWord); ng++)
+                {
+
+                    key = "gram" + ng; // form key
+
+                    grams = FormGrams(word, ng); // form word into ngrams (allow dups too)
+
+                    if (grams.Length == 0)
+                    {
+                        continue; // hmm
+                    }
+
+                    if (bStart > 0) // should we boost prefixes?
+                    {
+                        Add(query, "start" + ng, grams[0], bStart); // matches start of word
+
+                    }
+                    if (bEnd > 0) // should we boost suffixes
+                    {
+                        Add(query, "end" + ng, grams[grams.Length - 1], bEnd); // matches end of word
+
+                    }
+                    for (int i = 0; i < grams.Length; i++)
+                    {
+                        Add(query, key, grams[i]);
+                    }
+                }
+
+                int maxHits = 10 * numSug;
+
+                //    System.out.println("Q: " + query);
+                ScoreDoc[] hits = indexSearcher.Search(query, null, maxHits).ScoreDocs;
+                //    System.out.println("HITS: " + hits.length());
+                SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
+
+                // go thru more than 'maxr' matches in case the distance filter triggers
+                int stop = Math.Min(hits.Length, maxHits);
+                SuggestWord sugWord = new SuggestWord();
+                for (int i = 0; i < stop; i++)
+                {
+
+                    sugWord.@string = indexSearcher.Doc(hits[i].Doc).Get(F_WORD); // get orig word
+
+                    // don't suggest a word for itself, that would be silly
+                    if (sugWord.@string.Equals(word))
+                    {
+                        continue;
+                    }
+
+                    // edit distance
+                    sugWord.score = sd.GetDistance(word, sugWord.@string);
+                    if (sugWord.score < accuracy)
+                    {
+                        continue;
+                    }
+
+                    if (ir != null && field != null) // use the user index
+                    {
+                        sugWord.freq = ir.DocFreq(new Term(field, sugWord.@string)); // freq in the index
+                        // don't suggest a word that is not present in the field
+                        if ((suggestMode == SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1)
+                        {
+                            continue;
+                        }
+                    }
+                    sugQueue.InsertWithOverflow(sugWord);
+                    if (sugQueue.Size() == numSug)
+                    {
+                        // if queue full, maintain the minScore score
+                        accuracy = sugQueue.Top().score;
+                    }
+                    sugWord = new SuggestWord();
+                }
+
+                // convert to array string
+                string[] list = new string[sugQueue.Size()];
+                for (int i = sugQueue.Size() - 1; i >= 0; i--)
+                {
+                    list[i] = sugQueue.Pop().@string;
+                }
+
+                return list;
+            }
+            finally
+            {
+                ReleaseSearcher(indexSearcher);
+            }
+        }
+        /// <summary>
+        /// Add a clause to a boolean query.
+        /// </summary>
+        private static void Add(BooleanQuery q, string name, string value, float boost)
+        {
+            Query tq = new TermQuery(new Term(name, value));
+            tq.Boost = boost;
+            q.Add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
+        }
+
+        /// <summary>
+        /// Add a clause to a boolean query.
+        /// </summary>
+        private static void Add(BooleanQuery q, string name, string value)
+        {
+            q.Add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD));
+        }
+
+        /// <summary>
+        /// Form all ngrams for a given word. </summary>
+        /// <param name="text"> the word to parse </param>
+        /// <param name="ng"> the ngram length e.g. 3 </param>
+        /// <returns> an array of all ngrams in the word and note that duplicates are not removed </returns>
+        private static string[] FormGrams(string text, int ng)
+        {
+            int len = text.Length;
+            string[] res = new string[len - ng + 1];
+            for (int i = 0; i < len - ng + 1; i++)
+            {
+                res[i] = text.Substring(i, ng);
+            }
+            return res;
+        }
+
+        /// <summary>
+        /// Removes all terms from the spell check index. </summary>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        public virtual void ClearIndex()
+        {
+            lock (modifyCurrentIndexLock)
+            {
+                EnsureOpen();
+                var dir = this.spellIndex;
+                using (var writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null)
+                   .SetOpenMode(IndexWriterConfig.OpenMode.CREATE))) { }
+                SwapSearcher(dir);
+            }
+        }
+
+        /// <summary>
+        /// Check whether the word exists in the index. </summary>
+        /// <param name="word"> word to check </param>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <returns> true if the word exists in the index </returns>
+        public virtual bool Exist(string word)
+        {
+            // obtainSearcher calls ensureOpen
+            IndexSearcher indexSearcher = ObtainSearcher();
+            try
+            {
+                // TODO: we should use ReaderUtil+seekExact, we dont care about the docFreq
+                // this is just an existence check
+                return indexSearcher.IndexReader.DocFreq(new Term(F_WORD, word)) > 0;
+            }
+            finally
+            {
+                ReleaseSearcher(indexSearcher);
+            }
+        }
+
+        /// <summary>
+        /// Indexes the data from the given <seealso cref="Dictionary"/>. </summary>
+        /// <param name="dict"> Dictionary to index </param>
+        /// <param name="config"> <seealso cref="IndexWriterConfig"/> to use </param>
+        /// <param name="fullMerge"> whether or not the spellcheck index should be fully merged </param>
+        /// <exception cref="AlreadyClosedException"> if the Spellchecker is already closed </exception>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public void IndexDictionary(Dictionary dict, IndexWriterConfig config, bool fullMerge)
+        {
+            lock (modifyCurrentIndexLock)
+            {
+                EnsureOpen();
+                Directory dir = this.spellIndex;
+                using (var writer = new IndexWriter(dir, config))
+                {
+                    IndexSearcher indexSearcher = ObtainSearcher();
+                    IList<TermsEnum> termsEnums = new List<TermsEnum>();
+
+                    IndexReader reader = searcher.IndexReader;
+                    if (reader.MaxDoc() > 0)
+                    {
+                        foreach (AtomicReaderContext ctx in reader.Leaves())
+                        {
+                            Terms terms = ctx.Reader().Terms(F_WORD);
+                            if (terms != null)
+                            {
+                                termsEnums.Add(terms.Iterator(null));
+                            }
+                        }
+                    }
+
+                    bool isEmpty = termsEnums.Count == 0;
+
+                    try
+                    {
+                        BytesRefIterator iter = dict.EntryIterator;
+                        BytesRef currentTerm;
+
+                        while ((currentTerm = iter.Next()) != null)
+                        {
+
+                            string word = currentTerm.Utf8ToString();
+                            int len = word.Length;
+                            if (len < 3)
+                            {
+                                continue; // too short we bail but "too long" is fine...
+                            }
+
+                            if (!isEmpty)
+                            {
+                                foreach (TermsEnum te in termsEnums)
+                                {
+                                    if (te.SeekExact(currentTerm))
+                                    {
+                                        goto termsContinue;
+                                    }
+                                }
+                            }
+
+                            // ok index the word
+                            var doc = CreateDocument(word, GetMin(len), GetMax(len));
+                            writer.AddDocument(doc);
+                        termsContinue:
+                            ;
+                        }
+                    termsBreak:
+                        ;
+                    }
+                    finally
+                    {
+                        ReleaseSearcher(indexSearcher);
+                    }
+                    if (fullMerge)
+                    {
+                        writer.ForceMerge(1);
+                    }
+                }
+                // TODO: this isn't that great, maybe in the future SpellChecker should take
+                // IWC in its ctor / keep its writer open?
+
+                // also re-open the spell index to see our own changes when the next suggestion
+                // is fetched:
+                SwapSearcher(dir);
+            }
+        }
+
+        private static int GetMin(int l)
+        {
+            if (l > 5)
+            {
+                return 3;
+            }
+            if (l == 5)
+            {
+                return 2;
+            }
+            return 1;
+        }
+
+        private static int GetMax(int l)
+        {
+            if (l > 5)
+            {
+                return 4;
+            }
+            if (l == 5)
+            {
+                return 3;
+            }
+            return 2;
+        }
+
+        private static Document CreateDocument(string text, int ng1, int ng2)
+        {
+            var doc = new Document();
+            // the word field is never queried on... its indexed so it can be quickly
+            // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
+            Field f = new StringField(F_WORD, text, Field.Store.YES);
+            doc.Add(f); // orig term
+            AddGram(text, doc, ng1, ng2);
+            return doc;
+        }
+
+        private static void AddGram(string text, Document doc, int ng1, int ng2)
+        {
+            int len = text.Length;
+            for (int ng = ng1; ng <= ng2; ng++)
+            {
+                string key = "gram" + ng;
+                string end = null;
+                for (int i = 0; i < len - ng + 1; i++)
+                {
+                    string gram = text.Substring(i, ng);
+                    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
+                    ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
+                    Field ngramField = new Field(key, gram, ft);
+                    // spellchecker does not use positional queries, but we want freqs
+                    // for scoring these multivalued n-gram fields.
+                    doc.Add(ngramField);
+                    if (i == 0)
+                    {
+                        // only one term possible in the startXXField, TF/pos and norms aren't needed.
+                        Field startField = new StringField("start" + ng, gram, Field.Store.NO);
+                        doc.Add(startField);
+                    }
+                    end = gram;
+                }
+                if (end != null) // may not be present if len==ng1
+                {
+                    // only one term possible in the endXXField, TF/pos and norms aren't needed.
+                    Field endField = new StringField("end" + ng, end, Field.Store.NO);
+                    doc.Add(endField);
+                }
+            }
+        }
+
+        private IndexSearcher ObtainSearcher()
+        {
+            lock (searcherLock)
+            {
+                EnsureOpen();
+                searcher.IndexReader.IncRef();
+                return searcher;
+            }
+        }
+
+        private void ReleaseSearcher(IndexSearcher aSearcher)
+        {
+            // don't check if open - always decRef 
+            // don't decrement the private searcher - could have been swapped
+            aSearcher.IndexReader.DecRef();
+        }
+
+        private void EnsureOpen()
+        {
+            if (closed)
+            {
+                throw new AlreadyClosedException("Spellchecker has been closed");
+            }
+        }
+
+        /// <summary>
+        /// Close the IndexSearcher used by this SpellChecker </summary>
+        /// <exception cref="IOException"> if the close operation causes an <seealso cref="IOException"/> </exception>
+        /// <exception cref="AlreadyClosedException"> if the <seealso cref="SpellChecker"/> is already closed </exception>
+        public void Dispose()
+        {
+            lock (searcherLock)
+            {
+                EnsureOpen();
+                closed = true;
+                if (searcher != null)
+                {
+                    searcher.IndexReader.Dispose();
+                }
+                searcher = null;
+            }
+        }
+
+        private void SwapSearcher(Directory dir)
+        {
+            /*
+             * opening a searcher is possibly very expensive.
+             * We rather close it again if the Spellchecker was closed during
+             * this operation than block access to the current searcher while opening.
+             */
+            IndexSearcher indexSearcher = CreateSearcher(dir);
+            lock (searcherLock)
+            {
+                if (closed)
+                {
+                    indexSearcher.IndexReader.Dispose();
+                    throw new AlreadyClosedException("Spellchecker has been closed");
+                }
+                if (searcher != null)
+                {
+                    searcher.IndexReader.Dispose();
+                }
+                // set the spellindex in the sync block - ensure consistency.
+                searcher = indexSearcher;
+                this.spellIndex = dir;
+            }
+        }
+
+        /// <summary>
+        /// Creates a new read-only IndexSearcher </summary>
+        /// <param name="dir"> the directory used to open the searcher </param>
+        /// <returns> a new read-only IndexSearcher </returns>
+        /// <exception cref="IOException"> f there is a low-level IO error </exception>
+        // for testing purposes
+        internal virtual IndexSearcher CreateSearcher(Directory dir)
+        {
+            return new IndexSearcher(DirectoryReader.Open(dir));
+        }
+
+        /// <summary>
+        /// Returns <code>true</code> if and only if the <seealso cref="SpellChecker"/> is
+        /// closed, otherwise <code>false</code>.
+        /// </summary>
+        /// <returns> <code>true</code> if and only if the <seealso cref="SpellChecker"/> is
+        ///         closed, otherwise <code>false</code>. </returns>
+        internal virtual bool Closed
+        {
+            get
+            {
+                return closed;
+            }
+        }
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/StringDistance.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/StringDistance.cs b/src/Lucene.Net.Suggest/Spell/StringDistance.cs
new file mode 100644
index 0000000..d50a9b4
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/StringDistance.cs
@@ -0,0 +1,36 @@
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Interface for string distances.
+    /// </summary>
+    public interface StringDistance
+    {
+
+        /// <summary>
+        /// Returns a float between 0 and 1 based on how similar the specified strings are to one another.  
+        /// Returning a value of 1 means the specified strings are identical and 0 means the
+        /// string are maximally different. </summary>
+        /// <param name="s1"> The first string. </param>
+        /// <param name="s2"> The second string. </param>
+        /// <returns> a float between 0 and 1 based on how similar the specified strings are to one another. </returns>
+        float GetDistance(string s1, string s2);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SuggestMode.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SuggestMode.cs b/src/Lucene.Net.Suggest/Spell/SuggestMode.cs
new file mode 100644
index 0000000..f277323
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SuggestMode.cs
@@ -0,0 +1,46 @@
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Set of strategies for suggesting related terms
+    /// @lucene.experimental
+    /// </summary>
+    public enum SuggestMode
+    {
+        /// <summary>
+        /// Generate suggestions only for terms not in the index (default)
+        /// </summary>
+        SUGGEST_WHEN_NOT_IN_INDEX,
+
+        /// <summary>
+        /// Return only suggested words that are as frequent or more frequent than the
+        /// searched word
+        /// </summary>
+        SUGGEST_MORE_POPULAR,
+
+        /// <summary>
+        /// Always attempt to offer suggestions (however, other parameters may limit
+        /// suggestions. For example, see
+        /// <seealso cref="DirectSpellChecker#setMaxQueryFrequency(float)"/> ).
+        /// </summary>
+        SUGGEST_ALWAYS
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SuggestWord.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWord.cs b/src/Lucene.Net.Suggest/Spell/SuggestWord.cs
new file mode 100644
index 0000000..35aea47
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SuggestWord.cs
@@ -0,0 +1,53 @@
+namespace Lucene.Net.Search.Spell
+{
+
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    ///  SuggestWord, used in suggestSimilar method in SpellChecker class.
+    /// <p/>
+    /// Default sort is first by score, then by frequency.
+    /// </summary>
+    public sealed class SuggestWord
+    {
+
+        /// <summary>
+        /// Creates a new empty suggestion with null text.
+        /// </summary>
+        public SuggestWord()
+        {
+        }
+
+        /// <summary>
+        /// the score of the word
+        /// </summary>
+        public float score;
+
+        /// <summary>
+        /// The freq of the word
+        /// </summary>
+        public int freq;
+
+        /// <summary>
+        /// the suggested word
+        /// </summary>
+        public string @string;
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs
new file mode 100644
index 0000000..3e7abd1
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SuggestWordFrequencyComparator.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    ///  Frequency first, then score.
+    /// </summary>
+    public class SuggestWordFrequencyComparator : IComparer<SuggestWord>
+    {
+
+        /// <summary>
+        /// Creates a new comparator that will compare by <seealso cref="SuggestWord#freq"/>,
+        /// then by <seealso cref="SuggestWord#score"/>, then by <seealso cref="SuggestWord#string"/>.
+        /// </summary>
+        public SuggestWordFrequencyComparator()
+        {
+        }
+
+        public virtual int Compare(SuggestWord first, SuggestWord second)
+        {
+            // first criteria: the frequency
+            if (first.freq > second.freq)
+            {
+                return 1;
+            }
+            if (first.freq < second.freq)
+            {
+                return -1;
+            }
+
+            // second criteria (if first criteria is equal): the score
+            if (first.score > second.score)
+            {
+                return 1;
+            }
+            if (first.score < second.score)
+            {
+                return -1;
+            }
+            // third criteria: term text
+            return second.@string.CompareTo(first.@string);
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs
new file mode 100644
index 0000000..d46a524
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SuggestWordQueue.cs
@@ -0,0 +1,65 @@
+using System.Collections.Generic;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Spell
+{
+
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// Sorts SuggestWord instances
+    /// </summary>
+    /// <seealso cref= org.apache.lucene.search.spell.SuggestWordScoreComparator </seealso>
+    /// <seealso cref= org.apache.lucene.search.spell.SuggestWordFrequencyComparator
+    ///  </seealso>
+    public sealed class SuggestWordQueue : PriorityQueue<SuggestWord>
+    {
+        /// <summary>
+        /// Default comparator: score then frequency. </summary>
+        /// <seealso cref= SuggestWordScoreComparator </seealso>
+        public static readonly IComparer<SuggestWord> DEFAULT_COMPARATOR = new SuggestWordScoreComparator();
+
+
+        private readonly IComparer<SuggestWord> comparator;
+
+        /// <summary>
+        /// Use the <seealso cref="#DEFAULT_COMPARATOR"/> </summary>
+        /// <param name="size"> The size of the queue </param>
+        public SuggestWordQueue(int size)
+            : base(size)
+        {
+            comparator = DEFAULT_COMPARATOR;
+        }
+
+        /// <summary>
+        /// Specify the size of the queue and the comparator to use for sorting. </summary>
+        /// <param name="size"> The size </param>
+        /// <param name="comparator"> The comparator. </param>
+        public SuggestWordQueue(int size, IComparer<SuggestWord> comparator)
+            : base(size)
+        {
+            this.comparator = comparator;
+        }
+
+        public override bool LessThan(SuggestWord wa, SuggestWord wb)
+        {
+            int val = comparator.Compare(wa, wb);
+            return val < 0;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs b/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs
new file mode 100644
index 0000000..d626d91
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/SuggestWordScoreComparator.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search.Spell
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    /// Score first, then frequency
+    /// </summary>
+    public class SuggestWordScoreComparator : IComparer<SuggestWord>
+    {
+
+        /// <summary>
+        /// Creates a new comparator that will compare by <seealso cref="SuggestWord#score"/>,
+        /// then by <seealso cref="SuggestWord#freq"/>, then by <seealso cref="SuggestWord#string"/>.
+        /// </summary>
+        public SuggestWordScoreComparator()
+        {
+        }
+
+        public virtual int Compare(SuggestWord first, SuggestWord second)
+        {
+            // first criteria: the distance
+            if (first.score > second.score)
+            {
+                return 1;
+            }
+            if (first.score < second.score)
+            {
+                return -1;
+            }
+
+            // second criteria (if first criteria is equal): the popularity
+            if (first.freq > second.freq)
+            {
+                return 1;
+            }
+
+            if (first.freq < second.freq)
+            {
+                return -1;
+            }
+            // third criteria: term text
+            return second.@string.CompareTo(first.@string);
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/TermFreqIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/TermFreqIterator.cs b/src/Lucene.Net.Suggest/Spell/TermFreqIterator.cs
new file mode 100644
index 0000000..5414173
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/TermFreqIterator.cs
@@ -0,0 +1,68 @@
+using System.Collections.Generic;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Interface for enumerating term,weight pairs.
+    /// </summary>
+    public interface TermFreqIterator : BytesRefIterator
+    {
+
+        /// <summary>
+        /// Gets the term's weight, higher numbers mean better suggestions.
+        /// </summary>
+        long Weight { get; }
+    }
+
+    /// <summary>
+    /// Wraps a BytesRefIterator as a TermFreqIterator, with all weights
+    /// set to <code>1</code>
+    /// </summary>
+    public class TermFreqIteratorWrapper : TermFreqIterator
+    {
+        internal BytesRefIterator wrapped;
+
+        /// <summary>
+        /// Creates a new wrapper, wrapping the specified iterator and 
+        /// specifying a weight value of <code>1</code> for all terms.
+        /// </summary>
+        public TermFreqIteratorWrapper(BytesRefIterator wrapped)
+        {
+            this.wrapped = wrapped;
+        }
+
+        public virtual long Weight
+        {
+            get { return 1; }
+        }
+
+        public BytesRef Next()
+        {
+            return wrapped.Next();
+        }
+
+        public IComparer<BytesRef> Comparator
+        {
+            get { return wrapped.Comparator; }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs b/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs
new file mode 100644
index 0000000..03cb212
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/WordBreakSpellChecker.cs
@@ -0,0 +1,542 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// <para>
+    /// A spell checker whose sole function is to offer suggestions by combining
+    /// multiple terms into one word and/or breaking terms into multiple words.
+    /// </para>
+    /// </summary>
+    public class WordBreakSpellChecker
+    {
+        private int minSuggestionFrequency = 1;
+        private int minBreakWordLength = 1;
+        private int maxCombineWordLength = 20;
+        private int maxChanges = 1;
+        private int maxEvaluations = 1000;
+
+        /// <summary>
+        /// Term that can be used to prohibit adjacent terms from being combined </summary>
+        public static readonly Term SEPARATOR_TERM = new Term("", "");
+
+        /// <summary>
+        /// Creates a new spellchecker with default configuration values </summary>
+        /// <seealso cref= #setMaxChanges(int) </seealso>
+        /// <seealso cref= #setMaxCombineWordLength(int) </seealso>
+        /// <seealso cref= #setMaxEvaluations(int) </seealso>
+        /// <seealso cref= #setMinBreakWordLength(int) </seealso>
+        /// <seealso cref= #setMinSuggestionFrequency(int) </seealso>
+        public WordBreakSpellChecker()
+        {
+        }
+
+        /// <summary>
+        /// <para>
+        /// Determines the order to list word break suggestions
+        /// </para>
+        /// </summary>
+        public enum BreakSuggestionSortMethod
+        {
+            /// <summary>
+            /// <para>
+            /// Sort by Number of word breaks, then by the Sum of all the component
+            /// term's frequencies
+            /// </para>
+            /// </summary>
+            NUM_CHANGES_THEN_SUMMED_FREQUENCY,
+            /// <summary>
+            /// <para>
+            /// Sort by Number of word breaks, then by the Maximum of all the component
+            /// term's frequencies
+            /// </para>
+            /// </summary>
+            NUM_CHANGES_THEN_MAX_FREQUENCY
+        }
+
+        /// <summary>
+        /// <para>
+        /// Generate suggestions by breaking the passed-in term into multiple words.
+        /// The scores returned are equal to the number of word breaks needed so a
+        /// lower score is generally preferred over a higher score.
+        /// </para>
+        /// </summary>
+        /// <param name="suggestMode">
+        ///          - default = <seealso cref="SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX"/> </param>
+        /// <param name="sortMethod">
+        ///          - default =
+        ///          <seealso cref="BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY"/> </param>
+        /// <returns> one or more arrays of words formed by breaking up the original term </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public virtual SuggestWord[][] SuggestWordBreaks(Term term, int maxSuggestions, IndexReader ir, SuggestMode suggestMode, BreakSuggestionSortMethod sortMethod)
+        {
+            if (maxSuggestions < 1)
+            {
+                return new SuggestWord[0][];
+            }
+            if (suggestMode == null)
+            {
+                suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
+            }
+            if (sortMethod == null)
+            {
+                sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
+            }
+
+            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
+            IComparer<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator(this) : new LengthThenSumFreqComparator(this);
+            LinkedList<SuggestWordArrayWrapper> suggestions = new PriorityQueue<SuggestWordArrayWrapper>(queueInitialCapacity, queueComparator);
+
+            int origFreq = ir.DocFreq(term);
+            if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX)
+            {
+                return new SuggestWord[0][];
+            }
+
+            int useMinSuggestionFrequency = minSuggestionFrequency;
+            if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR)
+            {
+                useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
+            }
+
+            GenerateBreakUpSuggestions(term, ir, 1, maxSuggestions, useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0, sortMethod);
+
+            SuggestWord[][] suggestionArray = new SuggestWord[suggestions.Count][];
+            for (int i = suggestions.Count - 1; i >= 0; i--)
+            {
+                suggestionArray[i] = suggestions.RemoveFirst().SuggestWords;
+            }
+
+            return suggestionArray;
+        }
+
+        /// <summary>
+        /// <para>
+        /// Generate suggestions by combining one or more of the passed-in terms into
+        /// single words. The returned <seealso cref="CombineSuggestion"/> contains both a
+        /// <seealso cref="SuggestWord"/> and also an array detailing which passed-in terms were
+        /// involved in creating this combination. The scores returned are equal to the
+        /// number of word combinations needed, also one less than the length of the
+        /// array <seealso cref="CombineSuggestion#originalTermIndexes"/>. Generally, a
+        /// suggestion with a lower score is preferred over a higher score.
+        /// </para>
+        /// <para>
+        /// To prevent two adjacent terms from being combined (for instance, if one is
+        /// mandatory and the other is prohibited), separate the two terms with
+        /// <seealso cref="WordBreakSpellChecker#SEPARATOR_TERM"/>
+        /// </para>
+        /// <para>
+        /// When suggestMode equals <seealso cref="SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX"/>, each
+        /// suggestion will include at least one term not in the index.
+        /// </para>
+        /// <para>
+        /// When suggestMode equals <seealso cref="SuggestMode#SUGGEST_MORE_POPULAR"/>, each
+        /// suggestion will have the same, or better frequency than the most-popular
+        /// included term.
+        /// </para>
+        /// </summary>
+        /// <returns> an array of words generated by combining original terms </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public virtual CombineSuggestion[] SuggestWordCombinations(Term[] terms, int maxSuggestions, IndexReader ir, SuggestMode suggestMode)
+        {
+            if (maxSuggestions < 1)
+            {
+                return new CombineSuggestion[0];
+            }
+
+            int[] origFreqs = null;
+            if (suggestMode != SuggestMode.SUGGEST_ALWAYS)
+            {
+                origFreqs = new int[terms.Length];
+                for (int i = 0; i < terms.Length; i++)
+                {
+                    origFreqs[i] = ir.DocFreq(terms[i]);
+                }
+            }
+
+            int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
+            IComparer<CombineSuggestionWrapper> queueComparator = new CombinationsThenFreqComparator(this);
+            LinkedList<CombineSuggestionWrapper> suggestions = new PriorityQueue<CombineSuggestionWrapper>(queueInitialCapacity, queueComparator);
+
+            int thisTimeEvaluations = 0;
+            for (int i = 0; i < terms.Length - 1; i++)
+            {
+                if (terms[i].Equals(SEPARATOR_TERM))
+                {
+                    continue;
+                }
+                string leftTermText = terms[i].Text();
+                int leftTermLength = leftTermText.CodePointCount(0, leftTermText.Length);
+                if (leftTermLength > maxCombineWordLength)
+                {
+                    continue;
+                }
+                int maxFreq = 0;
+                int minFreq = int.MaxValue;
+                if (origFreqs != null)
+                {
+                    maxFreq = origFreqs[i];
+                    minFreq = origFreqs[i];
+                }
+                string combinedTermText = leftTermText;
+                int combinedLength = leftTermLength;
+                for (int j = i + 1; j < terms.Length && j - i <= maxChanges; j++)
+                {
+                    if (terms[j].Equals(SEPARATOR_TERM))
+                    {
+                        break;
+                    }
+                    string rightTermText = terms[j].Text();
+                    int rightTermLength = rightTermText.CodePointCount(0, rightTermText.Length);
+                    combinedTermText += rightTermText;
+                    combinedLength += rightTermLength;
+                    if (combinedLength > maxCombineWordLength)
+                    {
+                        break;
+                    }
+
+                    if (origFreqs != null)
+                    {
+                        maxFreq = Math.Max(maxFreq, origFreqs[j]);
+                        minFreq = Math.Min(minFreq, origFreqs[j]);
+                    }
+
+                    Term combinedTerm = new Term(terms[0].Field(), combinedTermText);
+                    int combinedTermFreq = ir.DocFreq(combinedTerm);
+
+                    if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR || combinedTermFreq >= maxFreq)
+                    {
+                        if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX || minFreq == 0)
+                        {
+                            if (combinedTermFreq >= minSuggestionFrequency)
+                            {
+                                int[] origIndexes = new int[j - i + 1];
+                                origIndexes[0] = i;
+                                for (int k = 1; k < origIndexes.Length; k++)
+                                {
+                                    origIndexes[k] = i + k;
+                                }
+                                SuggestWord word = new SuggestWord();
+                                word.freq = combinedTermFreq;
+                                word.score = origIndexes.Length - 1;
+                                word.@string = combinedTerm.Text();
+                                CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(this, new CombineSuggestion(word, origIndexes), (origIndexes.Length - 1));
+                                suggestions.AddLast(suggestion);
+                                if (suggestions.Count > maxSuggestions)
+                                {
+                                    suggestions.RemoveFirst();
+                                }
+                            }
+                        }
+                    }
+                    thisTimeEvaluations++;
+                    if (thisTimeEvaluations == maxEvaluations)
+                    {
+                        break;
+                    }
+                }
+            }
+            CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions.Count];
+            for (int i = suggestions.Count - 1; i >= 0; i--)
+            {
+                combineSuggestions[i] = suggestions.RemoveFirst().CombineSuggestion;
+            }
+            return combineSuggestions;
+        }
+
+        private int GenerateBreakUpSuggestions(Term term, IndexReader ir, int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency, SuggestWord[] prefix, LinkedList<SuggestWordArrayWrapper> suggestions, int totalEvaluations, BreakSuggestionSortMethod sortMethod)
+        {
+            string termText = term.Text();
+            int termLength = termText.CodePointCount(0, termText.Length);
+            int useMinBreakWordLength = minBreakWordLength;
+            if (useMinBreakWordLength < 1)
+            {
+                useMinBreakWordLength = 1;
+            }
+            if (termLength < (useMinBreakWordLength * 2))
+            {
+                return 0;
+            }
+
+            int thisTimeEvaluations = 0;
+            for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++)
+            {
+                int end = termText.OffsetByCodePoints(0, i);
+                string leftText = termText.Substring(0, end);
+                string rightText = termText.Substring(end);
+                SuggestWord leftWord = GenerateSuggestWord(ir, term.Field(), leftText);
+
+                if (leftWord.freq >= useMinSuggestionFrequency)
+                {
+                    SuggestWord rightWord = GenerateSuggestWord(ir, term.Field(), rightText);
+                    if (rightWord.freq >= useMinSuggestionFrequency)
+                    {
+                        SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(this, NewSuggestion(prefix, leftWord, rightWord));
+                        suggestions.AddLast(suggestion);
+                        if (suggestions.Count > maxSuggestions)
+                        {
+                            suggestions.RemoveFirst();
+                        }
+                    }
+                    int newNumberBreaks = numberBreaks + 1;
+                    if (newNumberBreaks <= maxChanges)
+                    {
+                        int evaluations = GenerateBreakUpSuggestions(new Term(term.Field(), rightWord.@string), ir, newNumberBreaks, maxSuggestions, useMinSuggestionFrequency, NewPrefix(prefix, leftWord), suggestions, totalEvaluations, sortMethod);
+                        totalEvaluations += evaluations;
+                    }
+                }
+
+                thisTimeEvaluations++;
+                totalEvaluations++;
+                if (totalEvaluations >= maxEvaluations)
+                {
+                    break;
+                }
+            }
+            return thisTimeEvaluations;
+        }
+
+        private static SuggestWord[] NewPrefix(SuggestWord[] oldPrefix, SuggestWord append)
+        {
+            SuggestWord[] newPrefix = new SuggestWord[oldPrefix.Length + 1];
+            Array.Copy(oldPrefix, 0, newPrefix, 0, oldPrefix.Length);
+            newPrefix[newPrefix.Length - 1] = append;
+            return newPrefix;
+        }
+
+        private static SuggestWord[] NewSuggestion(SuggestWord[] prefix, SuggestWord append1, SuggestWord append2)
+        {
+            SuggestWord[] newSuggestion = new SuggestWord[prefix.Length + 2];
+            int score = prefix.Length + 1;
+            for (int i = 0; i < prefix.Length; i++)
+            {
+                SuggestWord word = new SuggestWord();
+                word.@string = prefix[i].@string;
+                word.freq = prefix[i].freq;
+                word.score = score;
+                newSuggestion[i] = word;
+            }
+            append1.score = score;
+            append2.score = score;
+            newSuggestion[newSuggestion.Length - 2] = append1;
+            newSuggestion[newSuggestion.Length - 1] = append2;
+            return newSuggestion;
+        }
+
+        private SuggestWord GenerateSuggestWord(IndexReader ir, string fieldname, string text)
+        {
+            Term term = new Term(fieldname, text);
+            int freq = ir.DocFreq(term);
+            SuggestWord word = new SuggestWord();
+            word.freq = freq;
+            word.score = 1;
+            word.@string = text;
+            return word;
+        }
+
+        /// <summary>
+        /// Returns the minimum frequency a term must have
+        /// to be part of a suggestion. </summary>
+        /// <seealso cref= #setMinSuggestionFrequency(int) </seealso>
+        public virtual int MinSuggestionFrequency
+        {
+            get
+            {
+                return minSuggestionFrequency;
+            }
+            set
+            {
+                this.minSuggestionFrequency = value;
+            }
+        }
+
+        /// <summary>
+        /// Returns the maximum length of a combined suggestion </summary>
+        /// <seealso cref= #setMaxCombineWordLength(int) </seealso>
+        public virtual int MaxCombineWordLength
+        {
+            get
+            {
+                return maxCombineWordLength;
+            }
+            set
+            {
+                this.maxCombineWordLength = value;
+            }
+        }
+
+        /// <summary>
+        /// Returns the minimum size of a broken word </summary>
+        /// <seealso cref= #setMinBreakWordLength(int) </seealso>
+        public virtual int MinBreakWordLength
+        {
+            get
+            {
+                return minBreakWordLength;
+            }
+            set
+            {
+                this.minBreakWordLength = value;
+            }
+        }
+
+        /// <summary>
+        /// Returns the maximum number of changes to perform on the input </summary>
+        /// <seealso cref= #setMaxChanges(int) </seealso>
+        public virtual int MaxChanges
+        {
+            get
+            {
+                return maxChanges;
+            }
+            set
+            {
+                this.maxChanges = value;
+            }
+        }
+
+        /// <summary>
+        /// Returns the maximum number of word combinations to evaluate. </summary>
+        /// <seealso cref= #setMaxEvaluations(int) </seealso>
+        public virtual int MaxEvaluations
+        {
+            get
+            {
+                return maxEvaluations;
+            }
+            set
+            {
+                this.maxEvaluations = value;
+            }
+        }
+
+        private sealed class LengthThenMaxFreqComparator : IComparer<SuggestWordArrayWrapper>
+        {
+            private readonly WordBreakSpellChecker outerInstance;
+
+            public LengthThenMaxFreqComparator(WordBreakSpellChecker outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public int Compare(SuggestWordArrayWrapper o1, SuggestWordArrayWrapper o2)
+            {
+                if (o1.suggestWords.Length != o2.suggestWords.Length)
+                {
+                    return o2.suggestWords.Length - o1.suggestWords.Length;
+                }
+                if (o1.freqMax != o2.freqMax)
+                {
+                    return o1.freqMax - o2.freqMax;
+                }
+                return 0;
+            }
+        }
+
+        private sealed class LengthThenSumFreqComparator : IComparer<SuggestWordArrayWrapper>
+        {
+            private readonly WordBreakSpellChecker outerInstance;
+
+            public LengthThenSumFreqComparator(WordBreakSpellChecker outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public int Compare(SuggestWordArrayWrapper o1, SuggestWordArrayWrapper o2)
+            {
+                if (o1.suggestWords.Length != o2.suggestWords.Length)
+                {
+                    return o2.suggestWords.Length - o1.suggestWords.Length;
+                }
+                if (o1.freqSum != o2.freqSum)
+                {
+                    return o1.freqSum - o2.freqSum;
+                }
+                return 0;
+            }
+        }
+
+        private sealed class CombinationsThenFreqComparator : IComparer<CombineSuggestionWrapper>
+        {
+            private readonly WordBreakSpellChecker outerInstance;
+
+            public CombinationsThenFreqComparator(WordBreakSpellChecker outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public int Compare(CombineSuggestionWrapper o1, CombineSuggestionWrapper o2)
+            {
+                if (o1.numCombinations != o2.numCombinations)
+                {
+                    return o2.numCombinations - o1.numCombinations;
+                }
+                if (o1.combineSuggestion.suggestion.freq != o2.combineSuggestion.suggestion.freq)
+                {
+                    return o1.combineSuggestion.suggestion.freq - o2.combineSuggestion.suggestion.freq;
+                }
+                return 0;
+            }
+        }
+
+        private class SuggestWordArrayWrapper
+        {
+            private readonly WordBreakSpellChecker outerInstance;
+
+            internal readonly SuggestWord[] suggestWords;
+            internal readonly int freqMax;
+            internal readonly int freqSum;
+
+            internal SuggestWordArrayWrapper(WordBreakSpellChecker outerInstance, SuggestWord[] suggestWords)
+            {
+                this.outerInstance = outerInstance;
+                this.suggestWords = suggestWords;
+                int aFreqSum = 0;
+                int aFreqMax = 0;
+                foreach (SuggestWord sw in suggestWords)
+                {
+                    aFreqSum += sw.freq;
+                    aFreqMax = Math.Max(aFreqMax, sw.freq);
+                }
+                this.freqSum = aFreqSum;
+                this.freqMax = aFreqMax;
+            }
+        }
+
+        private class CombineSuggestionWrapper
+        {
+            private readonly WordBreakSpellChecker outerInstance;
+
+            internal readonly CombineSuggestion combineSuggestion;
+            internal readonly int numCombinations;
+
+            internal CombineSuggestionWrapper(WordBreakSpellChecker outerInstance, CombineSuggestion combineSuggestion, int numCombinations)
+            {
+                this.outerInstance = outerInstance;
+                this.combineSuggestion = combineSuggestion;
+                this.numCombinations = numCombinations;
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/StringHelperClass.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/StringHelperClass.cs b/src/Lucene.Net.Suggest/StringHelperClass.cs
new file mode 100644
index 0000000..172a21e
--- /dev/null
+++ b/src/Lucene.Net.Suggest/StringHelperClass.cs
@@ -0,0 +1,90 @@
+//-------------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2014 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class is used to convert some aspects of the Java String class.
+//-------------------------------------------------------------------------------------------
+internal static class StringHelperClass
+{
+	//----------------------------------------------------------------------------------
+	//	This method replaces the Java String.substring method when 'start' is a
+	//	method call or calculated value to ensure that 'start' is obtained just once.
+	//----------------------------------------------------------------------------------
+	internal static string SubstringSpecial(this string self, int start, int end)
+	{
+		return self.Substring(start, end - start);
+	}
+
+	//------------------------------------------------------------------------------------
+	//	This method is used to replace calls to the 2-arg Java String.startsWith method.
+	//------------------------------------------------------------------------------------
+	internal static bool StartsWith(this string self, string prefix, int toffset)
+	{
+		return self.IndexOf(prefix, toffset, System.StringComparison.Ordinal) == toffset;
+	}
+
+	//------------------------------------------------------------------------------
+	//	This method is used to replace most calls to the Java String.split method.
+	//------------------------------------------------------------------------------
+	internal static string[] Split(this string self, string regexDelimiter, bool trimTrailingEmptyStrings)
+	{
+		string[] splitArray = System.Text.RegularExpressions.Regex.Split(self, regexDelimiter);
+
+		if (trimTrailingEmptyStrings)
+		{
+			if (splitArray.Length > 1)
+			{
+				for (int i = splitArray.Length; i > 0; i--)
+				{
+					if (splitArray[i - 1].Length > 0)
+					{
+						if (i < splitArray.Length)
+							System.Array.Resize(ref splitArray, i);
+
+						break;
+					}
+				}
+			}
+		}
+
+		return splitArray;
+	}
+
+	//-----------------------------------------------------------------------------
+	//	These methods are used to replace calls to some Java String constructors.
+	//-----------------------------------------------------------------------------
+	internal static string NewString(sbyte[] bytes)
+	{
+		return NewString(bytes, 0, bytes.Length);
+	}
+	internal static string NewString(sbyte[] bytes, int index, int count)
+	{
+		return System.Text.Encoding.UTF8.GetString((byte[])(object)bytes, index, count);
+	}
+	internal static string NewString(sbyte[] bytes, string encoding)
+	{
+		return NewString(bytes, 0, bytes.Length, encoding);
+	}
+	internal static string NewString(sbyte[] bytes, int index, int count, string encoding)
+	{
+		return System.Text.Encoding.GetEncoding(encoding).GetString((byte[])(object)bytes, index, count);
+	}
+
+	//--------------------------------------------------------------------------------
+	//	These methods are used to replace calls to the Java String.getBytes methods.
+	//--------------------------------------------------------------------------------
+	internal static sbyte[] GetBytes(this string self)
+	{
+		return GetSBytesForEncoding(System.Text.Encoding.UTF8, self);
+	}
+	internal static sbyte[] GetBytes(this string self, string encoding)
+	{
+		return GetSBytesForEncoding(System.Text.Encoding.GetEncoding(encoding), self);
+	}
+	private static sbyte[] GetSBytesForEncoding(System.Text.Encoding encoding, string s)
+	{
+		sbyte[] sbytes = new sbyte[encoding.GetByteCount(s)];
+		encoding.GetBytes(s, 0, s.Length, (byte[])(object)sbytes, 0);
+		return sbytes;
+	}
+}
\ No newline at end of file


Mime
View raw message