lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [26/62] [abbrv] lucenenet git commit: Deleted obsolete Contrib folder
Date Sat, 01 Apr 2017 01:09:19 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Nl/DutchAnalyzer.cs b/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
deleted file mode 100644
index b7878dd..0000000
--- a/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Collections;
-using Lucene.Net.Analysis.Standard;
-using Lucene.Net.Support;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Nl
-{
-    /*
- * {@link Analyzer} for Dutch language. 
- * <p>
- * Supports an external list of stopwords (words that
- * will not be indexed at all), an external list of exclusions (word that will
- * not be stemmed, but indexed) and an external list of word-stem pairs that overrule
- * the algorithm (dictionary stemming).
- * A default set of stopwords is used unless an alternative list is specified, but the
- * exclusion list is empty by default.
- * </p>
- *
- * <p><b>NOTE</b>: This class uses the same {@link Version}
- * dependent settings as {@link StandardAnalyzer}.</p>
- */
-    public class DutchAnalyzer : Analyzer
-    {
-        /*
-         * List of typical Dutch stopwords.
-         * @deprecated use {@link #getDefaultStopSet()} instead
-         */
-        public static readonly String[] DUTCH_STOP_WORDS =
-      {
-        "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
-        "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
-        "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo",
-        "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar",
-        "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal",
-        "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren",
-        "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus",
-        "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt",
-        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
-        "uw", "iemand", "geweest", "andere"
-      };
-        /*
-         * Returns an unmodifiable instance of the default stop-words set.
-         * @return an unmodifiable instance of the default stop-words set.
-         */
-        public static ISet<string> getDefaultStopSet()
-        {
-            return DefaultSetHolder.DEFAULT_STOP_SET;
-        }
-
-        static class DefaultSetHolder
-        {
-            internal static readonly ISet<string> DEFAULT_STOP_SET = CharArraySet
-                .UnmodifiableSet(new CharArraySet((IEnumerable<string>)DUTCH_STOP_WORDS, false));
-        }
-
-
-        /*
-         * Contains the stopwords used with the StopFilter.
-         */
-        private readonly ISet<string> stoptable;
-
-        /*
-         * Contains words that should be indexed but not stemmed.
-         */
-        private ISet<string> excltable = Support.Compatibility.SetFactory.CreateHashSet<string>();
-
-        private IDictionary<String, String> stemdict = new HashMap<String, String>();
-        private readonly Version matchVersion;
-
-        /*
-         * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) 
-         * and a few default entries for the stem exclusion table.
-         * 
-         */
-        public DutchAnalyzer(Version matchVersion)
-            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
-        {
-            stemdict.Add("fiets", "fiets"); //otherwise fiet
-            stemdict.Add("bromfiets", "bromfiets"); //otherwise bromfiet
-            stemdict.Add("ei", "eier");
-            stemdict.Add("kind", "kinder");
-        }
-
-        public DutchAnalyzer(Version matchVersion, ISet<string> stopwords)
-            : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
-        {
-
-        }
-
-        public DutchAnalyzer(Version matchVersion, ISet<string> stopwords, ISet<string> stemExclusionTable)
-        {
-            stoptable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
-            excltable = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stemExclusionTable));
-            this.matchVersion = matchVersion;
-            SetOverridesTokenStreamMethod<DutchAnalyzer>();
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         *
-         * @param matchVersion
-         * @param stopwords
-         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
-         */
-        public DutchAnalyzer(Version matchVersion, params string[] stopwords)
-            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         *
-         * @param stopwords
-         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
-         */
-        public DutchAnalyzer(Version matchVersion, HashSet<string> stopwords)
-            : this(matchVersion, (ISet<string>)stopwords)
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         *
-         * @param stopwords
-         * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
-         */
-        public DutchAnalyzer(Version matchVersion, FileInfo stopwords)
-        {
-            // this is completely broken!
-            SetOverridesTokenStreamMethod<DutchAnalyzer>();
-            try
-            {
-                stoptable = WordlistLoader.GetWordSet(stopwords);
-            }
-            catch (IOException e)
-            {
-                // TODO: throw IOException
-                throw new Exception("", e);
-            }
-            this.matchVersion = matchVersion;
-        }
-
-        /*
-         * Builds an exclusionlist from an array of Strings.
-         *
-         * @param exclusionlist
-         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
-         */
-        public void SetStemExclusionTable(params string[] exclusionlist)
-        {
-            excltable = StopFilter.MakeStopSet(exclusionlist);
-            PreviousTokenStream = null; // force a new stemmer to be created
-        }
-
-        /*
-         * Builds an exclusionlist from a Hashtable.
-         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
-         */
-        public void SetStemExclusionTable(ISet<string> exclusionlist)
-        {
-            excltable = exclusionlist;
-            PreviousTokenStream = null; // force a new stemmer to be created
-        }
-
-        /*
-         * Builds an exclusionlist from the words contained in the given file.
-         * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
-         */
-        public void SetStemExclusionTable(FileInfo exclusionlist)
-        {
-            try
-            {
-                excltable = WordlistLoader.GetWordSet(exclusionlist);
-                PreviousTokenStream = null; // force a new stemmer to be created
-            }
-            catch (IOException e)
-            {
-                // TODO: throw IOException
-                throw new Exception("", e);
-            }
-        }
-
-        /*
-         * Reads a stemdictionary file , that overrules the stemming algorithm
-         * This is a textfile that contains per line
-         * <tt>word<b>\t</b>stem</tt>, i.e: two tab seperated words
-         */
-        public void SetStemDictionary(FileInfo stemdictFile)
-        {
-            try
-            {
-                stemdict = WordlistLoader.GetStemDict(stemdictFile);
-                PreviousTokenStream = null; // force a new stemmer to be created
-            }
-            catch (IOException e)
-            {
-                // TODO: throw IOException
-                throw new Exception(string.Empty, e);
-            }
-        }
-
-        /*
-         * Creates a {@link TokenStream} which tokenizes all the text in the 
-         * provided {@link Reader}.
-         *
-         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
-         *   filtered with {@link StandardFilter}, {@link StopFilter}, 
-         *   and {@link DutchStemFilter}
-         */
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream result = new StandardTokenizer(matchVersion, reader);
-            result = new StandardFilter(result);
-            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                    result, stoptable);
-            result = new DutchStemFilter(result, excltable, stemdict);
-            return result;
-        }
-
-        class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        };
-
-        /*
-         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the 
-         * text in the provided {@link Reader}.
-         *
-         * @return A {@link TokenStream} built from a {@link StandardTokenizer}
-         *   filtered with {@link StandardFilter}, {@link StopFilter}, 
-         *   and {@link DutchStemFilter}
-         */
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            if (overridesTokenStreamMethod)
-            {
-                // LUCENE-1678: force fallback to tokenStream() if we
-                // have been subclassed and that subclass overrides
-                // tokenStream but not reusableTokenStream
-                return TokenStream(fieldName, reader);
-            }
-
-            SavedStreams streams = (SavedStreams)PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new StandardTokenizer(matchVersion, reader);
-                streams.result = new StandardFilter(streams.source);
-                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.result, stoptable);
-                streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-            }
-            return streams.result;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Nl/DutchStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Nl/DutchStemFilter.cs b/src/contrib/Analyzers/Nl/DutchStemFilter.cs
deleted file mode 100644
index a3a4c42..0000000
--- a/src/contrib/Analyzers/Nl/DutchStemFilter.cs
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Collections;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Support;
-
-namespace Lucene.Net.Analysis.Nl
-{
-    /*
- * A {@link TokenFilter} that stems Dutch words. 
- * <p>
- * It supports a table of words that should
- * not be stemmed at all. The stemmer used can be changed at runtime after the
- * filter object is created (as long as it is a {@link DutchStemmer}).
- * </p>
- * NOTE: This stemmer does not implement the Snowball algorithm correctly,
- * specifically doubled consonants. It is recommended that you consider using
- * the "Dutch" stemmer in the snowball package instead. This stemmer will likely
- * be deprecated in a future release.
- */
-    public sealed class DutchStemFilter : TokenFilter
-    {
-        /*
-         * The actual token in the input stream.
-         */
-        private DutchStemmer stemmer = null;
-        private ISet<string> exclusions = null;
-
-        private ITermAttribute termAtt;
-
-        public DutchStemFilter(TokenStream _in)
-            : base(_in)
-        {
-            stemmer = new DutchStemmer();
-            termAtt = AddAttribute<ITermAttribute>();
-        }
-
-        /*
-         * Builds a DutchStemFilter that uses an exclusion table.
-         */
-        public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable)
-            : this(_in)
-        {
-            exclusions = exclusiontable;
-        }
-
-        /*
-         * @param stemdictionary Dictionary of word stem pairs, that overrule the algorithm
-         */
-        public DutchStemFilter(TokenStream _in, ISet<string> exclusiontable, IDictionary<string, string> stemdictionary)
-            : this(_in, exclusiontable)
-        {
-            stemmer.SetStemDictionary(stemdictionary);
-        }
-
-        /*
-         * Returns the next token in the stream, or null at EOS
-         */
-        public override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                String term = termAtt.Term;
-
-                // Check the exclusion table.
-                if (exclusions == null || !exclusions.Contains(term))
-                {
-                    String s = stemmer.Stem(term);
-                    // If not stemmed, don't waste the time adjusting the token.
-                    if ((s != null) && !s.Equals(term))
-                        termAtt.SetTermBuffer(s);
-                }
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        /*
-         * Set a alternative/custom {@link DutchStemmer} for this filter.
-         */
-        public void SetStemmer(DutchStemmer stemmer)
-        {
-            if (stemmer != null)
-            {
-                this.stemmer = stemmer;
-            }
-        }
-
-        /*
-         * Set an alternative exclusion list for this filter.
-         */
-        public void SetExclusionTable(ISet<string> exclusiontable)
-        {
-            exclusions = exclusiontable;
-        }
-
-        /*
-         * Set dictionary for stemming, this dictionary overrules the algorithm,
-         * so you can correct for a particular unwanted word-stem pair.
-         */
-        public void SetStemDictionary(IDictionary<string, string> dict)
-        {
-            if (stemmer != null)
-                stemmer.SetStemDictionary(dict);
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Nl/DutchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Nl/DutchStemmer.cs b/src/contrib/Analyzers/Nl/DutchStemmer.cs
deleted file mode 100644
index b1036a8..0000000
--- a/src/contrib/Analyzers/Nl/DutchStemmer.cs
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.IO;
-using System.Text;
-using System.Collections;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis.Nl
-{
-    /*
-     * A stemmer for Dutch words. 
-     * <p>
-     * The algorithm is an implementation of
-     * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
-     * algorithm in Martin Porter's snowball project.
-     * </p>
-     */
-
-    public class DutchStemmer
-    {
-        /*
-         * Buffer for the terms while stemming them.
-         */
-        private StringBuilder sb = new StringBuilder();
-        private bool _removedE;
-        private IDictionary<string, string> _stemDict;
-
-        private int _R1;
-        private int _R2;
-
-        //TODO convert to internal
-        /*
-         * Stems the given term to an unique <tt>discriminator</tt>.
-         *
-         * @param term The term that should be stemmed.
-         * @return Discriminator for <tt>term</tt>
-         */
-        public String Stem(String term)
-        {
-            term = term.ToLower();
-            if (!isStemmable(term))
-                return term;
-            if (_stemDict != null && _stemDict.ContainsKey(term))
-                if (_stemDict[term] is String)
-                    return (String)_stemDict[term];
-                else
-                    return null;
-
-            // Reset the StringBuilder.
-            sb.Length = 0;
-            sb.Insert(0, term);
-            // Stemming starts here...
-            substitute(sb);
-            storeYandI(sb);
-            _R1 = getRIndex(sb, 0);
-            _R1 = Math.Max(3, _R1);
-            step1(sb);
-            step2(sb);
-            _R2 = getRIndex(sb, _R1);
-            step3a(sb);
-            step3b(sb);
-            step4(sb);
-            reStoreYandI(sb);
-            return sb.ToString();
-        }
-
-        private bool enEnding(StringBuilder sb)
-        {
-            String[] enend = new String[] { "ene", "en" };
-            for (int i = 0; i < enend.Length; i++)
-            {
-                String end = enend[i];
-                String s = sb.ToString();
-                int index = s.Length - end.Length;
-                if (s.EndsWith(end) &&
-                    index >= _R1 &&
-                    isValidEnEnding(sb, index - 1)
-                )
-                {
-                    sb.Remove(index, end.Length);
-                    unDouble(sb, index);
-                    return true;
-                }
-            }
-            return false;
-        }
-
-
-        private void step1(StringBuilder sb)
-        {
-            if (_R1 >= sb.Length)
-                return;
-
-            String s = sb.ToString();
-            int LengthR1 = sb.Length - _R1;
-            int index;
-
-            if (s.EndsWith("heden"))
-            {
-                var toReplace = sb.ToString(_R1, LengthR1).Replace("heden", "heid");
-                sb.Remove(_R1, LengthR1);
-                sb.Insert(_R1, toReplace);
-                return;
-            }
-
-            if (enEnding(sb))
-                return;
-
-            if (s.EndsWith("se") &&
-                (index = s.Length - 2) >= _R1 &&
-                isValidSEnding(sb, index - 1)
-            )
-            {
-                sb.Remove(index, 2);
-                return;
-            }
-            if (s.EndsWith("s") &&
-                (index = s.Length - 1) >= _R1 &&
-                isValidSEnding(sb, index - 1))
-            {
-                sb.Remove(index, 1);
-            }
-        }
-
-        /*
-         * Remove suffix e if in R1 and
-         * preceded by a non-vowel, and then undouble the ending
-         *
-         * @param sb String being stemmed
-         */
-        private void step2(StringBuilder sb)
-        {
-            _removedE = false;
-            if (_R1 >= sb.Length)
-                return;
-            String s = sb.ToString();
-            int index = s.Length - 1;
-            if (index >= _R1 &&
-                s.EndsWith("e") &&
-                !isVowel(sb[index - 1]))
-            {
-                sb.Remove(index, 1);
-                unDouble(sb);
-                _removedE = true;
-            }
-        }
-
-        /*
-         * Remove "heid"
-         *
-         * @param sb String being stemmed
-         */
-        private void step3a(StringBuilder sb)
-        {
-            if (_R2 >= sb.Length)
-                return;
-            String s = sb.ToString();
-            int index = s.Length - 4;
-            if (s.EndsWith("heid") && index >= _R2 && sb[index - 1] != 'c')
-            {
-                sb.Remove(index, 4); //remove heid
-                enEnding(sb);
-            }
-        }
-
-        /*
-         * <p>A d-suffix, or derivational suffix, enables a new word,
-         * often with a different grammatical category, or with a different
-         * sense, to be built from another word. Whether a d-suffix can be
-         * attached is discovered not from the rules of grammar, but by
-         * referring to a dictionary. So in English, ness can be added to
-         * certain adjectives to form corresponding nouns (littleness,
-         * kindness, foolishness ...) but not to all adjectives
-         * (not for example, to big, cruel, wise ...) d-suffixes can be
-         * used to change meaning, often in rather exotic ways.</p>
-         * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
-         *
-         * @param sb String being stemmed
-         */
-        private void step3b(StringBuilder sb)
-        {
-            if (_R2 >= sb.Length)
-                return;
-            String s = sb.ToString();
-            int index = 0;
-
-            if ((s.EndsWith("end") || s.EndsWith("ing")) &&
-                (index = s.Length - 3) >= _R2)
-            {
-                sb.Remove(index, 3);
-                if (sb[index - 2] == 'i' &&
-                    sb[index - 1] == 'g')
-                {
-                    if (sb[index - 3] != 'e' & index - 2 >= _R2)
-                    {
-                        index -= 2;
-                        sb.Remove(index, 2);
-                    }
-                }
-                else
-                {
-                    unDouble(sb, index);
-                }
-                return;
-            }
-            if (s.EndsWith("ig") &&
-                (index = s.Length - 2) >= _R2
-            )
-            {
-                if (sb[index - 1] != 'e')
-                    sb.Remove(index, 2);
-                return;
-            }
-            if (s.EndsWith("lijk") &&
-                (index = s.Length - 4) >= _R2
-            )
-            {
-                sb.Remove(index, 4);
-                step2(sb);
-                return;
-            }
-            if (s.EndsWith("baar") &&
-                (index = s.Length - 4) >= _R2
-            )
-            {
-                sb.Remove(index, 4);
-                return;
-            }
-            if (s.EndsWith("bar") &&
-                (index = s.Length - 3) >= _R2
-            )
-            {
-                if (_removedE)
-                    sb.Remove(index, 3);
-                return;
-            }
-        }
-
-        /*
-         * undouble vowel
-         * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
-         *
-         * @param sb String being stemmed
-         */
-        private void step4(StringBuilder sb)
-        {
-            if (sb.Length < 4)
-                return;
-            String end = sb.ToString(sb.Length - 4, 4);
-            char c = end[0];
-            char v1 = end[1];
-            char v2 = end[2];
-            char d = end[3];
-            if (v1 == v2 &&
-                d != 'I' &&
-                v1 != 'i' &&
-                isVowel(v1) &&
-                !isVowel(d) &&
-                !isVowel(c))
-            {
-                sb.Remove(sb.Length - 2, 1);
-            }
-        }
-
-        /*
-         * Checks if a term could be stemmed.
-         *
-         * @return true if, and only if, the given term consists in letters.
-         */
-        private bool isStemmable(String term)
-        {
-            for (int c = 0; c < term.Length; c++)
-            {
-                if (!char.IsLetter(term[c])) return false;
-            }
-            return true;
-        }
-
-        /*
-         * Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
-         */
-        private void substitute(StringBuilder buffer)
-        {
-            for (int i = 0; i < buffer.Length; i++)
-            {
-                switch (buffer[i])
-                {
-                    case 'ä':
-                    case 'á':
-                        {
-                            buffer[i] = 'a';
-                            break;
-                        }
-                    case 'ë':
-                    case 'é':
-                        {
-                            buffer[i] = 'e';
-                            break;
-                        }
-                    case 'ü':
-                    case 'ú':
-                        {
-                            buffer[i] = 'u';
-                            break;
-                        }
-                    case 'ï':
-                    case 'i':
-                        {
-                            buffer[i] = 'i';
-                            break;
-                        }
-                    case 'ö':
-                    case 'ó':
-                        {
-                            buffer[i] = 'o';
-                            break;
-                        }
-                }
-            }
-        }
-
-        /*private bool isValidSEnding(StringBuilder sb) {
-          return isValidSEnding(sb, sb.Length - 1);
-        }*/
-
-        private bool isValidSEnding(StringBuilder sb, int index)
-        {
-            char c = sb[index];
-            if (isVowel(c) || c == 'j')
-                return false;
-            return true;
-        }
-
-        /*private bool isValidEnEnding(StringBuilder sb) {
-          return isValidEnEnding(sb, sb.Length - 1);
-        }*/
-
-        private bool isValidEnEnding(StringBuilder sb, int index)
-        {
-            char c = sb[index];
-            if (isVowel(c))
-                return false;
-            if (c < 3)
-                return false;
-            // ends with "gem"?
-            if (c == 'm' && sb[index - 2] == 'g' && sb[index - 1] == 'e')
-                return false;
-            return true;
-        }
-
-        private void unDouble(StringBuilder sb)
-        {
-            unDouble(sb, sb.Length);
-        }
-
-        private void unDouble(StringBuilder sb, int endIndex)
-        {
-            String s = sb.ToString(0, endIndex);
-            if (s.EndsWith("kk") || s.EndsWith("tt") || s.EndsWith("dd") || s.EndsWith("nn") || s.EndsWith("mm") || s.EndsWith("ff"))
-            {
-                sb.Remove(endIndex - 1, 1);
-            }
-        }
-
-        private int getRIndex(StringBuilder sb, int start)
-        {
-            if (start == 0)
-                start = 1;
-            int i = start;
-            for (; i < sb.Length; i++)
-            {
-                //first non-vowel preceded by a vowel
-                if (!isVowel(sb[i]) && isVowel(sb[i - 1]))
-                {
-                    return i + 1;
-                }
-            }
-            return i + 1;
-        }
-
-        private void storeYandI(StringBuilder sb)
-        {
-            if (sb[0] == 'y')
-                sb[0] = 'Y';
-
-            int last = sb.Length - 1;
-
-            for (int i = 1; i < last; i++)
-            {
-                switch (sb[i])
-                {
-                    case 'i':
-                        {
-                            if (isVowel(sb[i - 1]) &&
-                                isVowel(sb[i + 1])
-                            )
-                                sb[i] = 'I';
-                            break;
-                        }
-                    case 'y':
-                        {
-                            if (isVowel(sb[i - 1]))
-                                sb[i] = 'Y';
-                            break;
-                        }
-                }
-            }
-            if (last > 0 && sb[last] == 'y' && isVowel(sb[last - 1]))
-                sb[last] = 'Y';
-        }
-
-        private void reStoreYandI(StringBuilder sb)
-        {
-            String tmp = sb.ToString();
-            sb.Length = 0;
-            sb.Insert(0, tmp.Replace("I", "i").Replace("Y", "y"));
-        }
-
-        private bool isVowel(char c)
-        {
-            switch (c)
-            {
-                case 'e':
-                case 'a':
-                case 'o':
-                case 'i':
-                case 'u':
-                case 'y':
-                case 'è':
-                    {
-                        return true;
-                    }
-            }
-            return false;
-        }
-
-        protected internal void SetStemDictionary(IDictionary<string, string> dict)
-        {
-            _stemDict = dict;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/AbstractEncoder.cs b/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
deleted file mode 100644
index 1c9ffe8..0000000
--- a/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Base class for payload encoders.
-    /// </summary>
-    public abstract class AbstractEncoder : PayloadEncoder
-    {
-        public Payload Encode(char[] buffer)
-        {
-            return Encode(buffer, 0, buffer.Length);
-        }
-
-        public abstract Payload Encode(char[] buffer, int offset, int length);
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs b/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
deleted file mode 100644
index b514735..0000000
--- a/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Characters before the delimiter are the "token", those after are the payload.
-    /// <p/>
-    /// For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
-    /// and "bar" is a payload.
-    /// <p/>
-    /// Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the 
-    /// payload in an appropriate way (from characters to bytes).
-    /// <p/>
-    /// Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
-    /// </summary>
-    /// <seealso cref="PayloadEncoder"/>
-    public sealed class DelimitedPayloadTokenFilter : TokenFilter
-    {
-        public static readonly char DEFAULT_DELIMITER = '|';
-        internal char delimiter = DEFAULT_DELIMITER;
-        internal ITermAttribute termAtt;
-        internal IPayloadAttribute payAtt;
-        internal PayloadEncoder encoder;
-
-        /// <summary>
-        /// Construct a token stream filtering the given input.
-        /// </summary>
-        internal DelimitedPayloadTokenFilter(TokenStream input)
-            : this(input, DEFAULT_DELIMITER, new IdentityEncoder())
-        {
-
-        }
-
-
-        public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder)
-            : base(input)
-        {
-            termAtt = AddAttribute<ITermAttribute>();
-            payAtt = AddAttribute<IPayloadAttribute>();
-            this.delimiter = delimiter;
-            this.encoder = encoder;
-        }
-
-        public override bool IncrementToken()
-        {
-            bool result = false;
-            if (input.IncrementToken())
-            {
-                char[] buffer = termAtt.TermBuffer();
-                int length = termAtt.TermLength();
-                //look for the delimiter
-                bool seen = false;
-                for (int i = 0; i < length; i++)
-                {
-                    if (buffer[i] == delimiter)
-                    {
-                        termAtt.SetTermBuffer(buffer, 0, i);
-                        payAtt.Payload = encoder.Encode(buffer, i + 1, (length - (i + 1)));
-                        seen = true;
-                        break;//at this point, we know the whole piece, so we can exit.  If we don't see the delimiter, then the termAtt is the same
-                    }
-                }
-                if (seen == false)
-                {
-                    //no delimiter
-                    payAtt.Payload = null;
-                }
-                result = true;
-            }
-            return result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/FloatEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/FloatEncoder.cs b/src/contrib/Analyzers/Payloads/FloatEncoder.cs
deleted file mode 100644
index ca9a8a9..0000000
--- a/src/contrib/Analyzers/Payloads/FloatEncoder.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Index;
-using Single = Lucene.Net.Support.Single;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Encode a character array Float as a {@link org.apache.lucene.index.Payload}.
-    /// </summary>
-    /// <seealso cref="PayloadHelper.EncodeFloat(float, byte[], int)"/>
-    public class FloatEncoder : AbstractEncoder, PayloadEncoder
-    {
-        public override Payload Encode(char[] buffer, int offset, int length)
-        {
-            Payload result = new Payload();
-            float payload = Single.Parse(new string(buffer, offset, length)); // TODO: improve this so that we don't have to new Strings
-            byte[] bytes = PayloadHelper.EncodeFloat(payload);
-            result.SetData(bytes);
-            return result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/IdentityEncoder.cs b/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
deleted file mode 100644
index 5a92eeb..0000000
--- a/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Text;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Does nothing other than convert the char array to a byte array using the specified encoding.
-    /// </summary>
-    public class IdentityEncoder : AbstractEncoder, PayloadEncoder
-    {
-
-        protected internal Encoding encoding = Encoding.UTF8;
-        protected internal String encodingName = "UTF-8";  //argh, stupid 1.4
-
-        public IdentityEncoder()
-        {
-        }
-
-        public IdentityEncoder(Encoding encoding)
-        {
-            this.encoding = encoding;
-            encodingName = encoding.EncodingName;
-        }
-
-
-        public override Payload Encode(char[] buffer, int offset, int length)
-        {
-            //what's the most efficient way to get a byte [] from a char[] array
-            //Do we have to go through String?
-            String tmp = new String(buffer, offset, length);
-            Payload result = null;//Can we avoid allocating by knowing where using the new API?
-            try
-            {
-                result = new Payload(encoding.GetBytes(tmp));
-            }
-            catch (EncoderFallbackException)
-            {
-                //should never hit this, since we get the name from the Charset
-            }
-
-            return result;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/IntegerEncoder.cs b/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
deleted file mode 100644
index 7b16d50..0000000
--- a/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Index;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Encode a character array Integer as a {@link org.apache.lucene.index.Payload}.
-    /// </summary>
-    /// <seealso cref="PayloadHelper.EncodeInt(int, byte[], int)"/>
-    public class IntegerEncoder : AbstractEncoder, PayloadEncoder
-    {
-        public override Payload Encode(char[] buffer, int offset, int length)
-        {
-            Payload result = new Payload();
-            int payload = ArrayUtil.ParseInt(buffer, offset, length);//TODO: improve this so that we don't have to new Strings
-            byte[] bytes = PayloadHelper.EncodeInt(payload);
-            result.SetData(bytes);
-            return result;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs b/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs
deleted file mode 100644
index 9f5167a..0000000
--- a/src/contrib/Analyzers/Payloads/NumericPayloadTokenFilter.cs
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Assigns a payload to a token based on the <see cref="Token.Type()"/>
-    /// </summary>
-    public class NumericPayloadTokenFilter : TokenFilter
-    {
-        private String typeMatch;
-        private Payload thePayload;
-
-        private IPayloadAttribute payloadAtt;
-        private ITypeAttribute typeAtt;
-
-        public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch)
-            : base(input)
-        {
-            //Need to encode the payload
-            thePayload = new Payload(PayloadHelper.EncodeFloat(payload));
-            this.typeMatch = typeMatch;
-            payloadAtt = AddAttribute<IPayloadAttribute>();
-            typeAtt = AddAttribute<ITypeAttribute>();
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                if (typeAtt.Type.Equals(typeMatch))
-                    payloadAtt.Payload = thePayload;
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/PayloadEncoder.cs b/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
deleted file mode 100644
index 5a8b6f6..0000000
--- a/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
-    /// <p/>
-    /// NOTE: this interface is subject to change
-    /// </summary>
-    public interface PayloadEncoder
-    {
-        Payload Encode(char[] buffer);
-
-        /// <summary>
-        /// Convert a char array to a <see cref="Payload"/>
-        /// </summary>
-        /// <returns>An encoded <see cref="Payload"/></returns>
-        Payload Encode(char[] buffer, int offset, int length);
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/PayloadHelper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/PayloadHelper.cs b/src/contrib/Analyzers/Payloads/PayloadHelper.cs
deleted file mode 100644
index a3c5619..0000000
--- a/src/contrib/Analyzers/Payloads/PayloadHelper.cs
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using Lucene.Net.Support;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Utility methods for encoding payloads.
-    /// </summary>
-    public static class PayloadHelper
-    {
-        public static byte[] EncodeFloat(float payload)
-        {
-            return EncodeFloat(payload, new byte[4], 0);
-        }
-
-        public static byte[] EncodeFloat(float payload, byte[] data, int offset)
-        {
-            return EncodeInt(Single.FloatToIntBits(payload), data, offset);
-        }
-
-        public static byte[] EncodeInt(int payload)
-        {
-            return EncodeInt(payload, new byte[4], 0);
-        }
-
-        public static byte[] EncodeInt(int payload, byte[] data, int offset)
-        {
-            data[offset] = (byte) (payload >> 24);
-            data[offset + 1] = (byte) (payload >> 16);
-            data[offset + 2] = (byte) (payload >> 8);
-            data[offset + 3] = (byte) payload;
-            return data;
-        }
-
-        /// <summary>
-        /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
-        /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
-        /// </summary>
-        /// <param name="bytes">The bytes to decode</param>
-        /// <returns>the decoded float</returns>
-        public static float DecodeFloat(byte[] bytes)
-        {
-            return DecodeFloat(bytes, 0);
-        }
-
-        /// <summary>
-        /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
-        /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
-        /// </summary>
-        /// <param name="bytes">The bytes to decode</param>
-        /// <param name="offset">The offset into the array.</param>
-        /// <returns>The float that was encoded</returns>
-        public static float DecodeFloat(byte[] bytes, int offset)
-        {
-            return Single.IntBitsToFloat(DecodeInt(bytes, offset));
-        }
-
-        public static int DecodeInt(byte[] bytes, int offset)
-        {
-            return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
-                   | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs b/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs
deleted file mode 100644
index a9d9b51..0000000
--- a/src/contrib/Analyzers/Payloads/TokenOffsetPayloadTokenFilter.cs
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Adds the <see cref="Token.StartOffset"/>
-    /// and <see cref="Token.EndOffset"/>
-    /// First 4 bytes are the start
-    /// </summary>
-    public class TokenOffsetPayloadTokenFilter : TokenFilter
-    {
-        protected IOffsetAttribute offsetAtt;
-        protected IPayloadAttribute payAtt;
-
-        public TokenOffsetPayloadTokenFilter(TokenStream input)
-            : base(input)
-        {
-            offsetAtt = AddAttribute<IOffsetAttribute>();
-            payAtt = AddAttribute<IPayloadAttribute>();
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                byte[] data = new byte[8];
-                PayloadHelper.EncodeInt(offsetAtt.StartOffset, data, 0);
-                PayloadHelper.EncodeInt(offsetAtt.EndOffset, data, 4);
-                Payload payload = new Payload(data);
-                payAtt.Payload = payload;
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilter.cs b/src/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilter.cs
deleted file mode 100644
index 8edc301..0000000
--- a/src/contrib/Analyzers/Payloads/TypeAsPayloadTokenFilter.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Analysis.Payloads
-{
-    /// <summary>
-    /// Makes the Token.Type() a payload.
-    /// Encodes the type using <see cref="System.Text.Encoding.UTF8"/> as the encoding
-    /// </summary>
-    public class TypeAsPayloadTokenFilter : TokenFilter
-    {
-        private IPayloadAttribute payloadAtt;
-        private ITypeAttribute typeAtt;
-
-        public TypeAsPayloadTokenFilter(TokenStream input)
-            : base(input)
-        {
-            payloadAtt = AddAttribute<IPayloadAttribute>();
-            typeAtt = AddAttribute<ITypeAttribute>();
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                String type = typeAtt.Type;
-                if (type != null && type.Equals("") == false)
-                {
-                    payloadAtt.Payload = new Payload(Encoding.UTF8.GetBytes(type));
-                }
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Position/PositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Position/PositionFilter.cs b/src/contrib/Analyzers/Position/PositionFilter.cs
deleted file mode 100644
index bbfa0d2..0000000
--- a/src/contrib/Analyzers/Position/PositionFilter.cs
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Position
-{
-    /* Set the positionIncrement of all tokens to the "positionIncrement",
-     * except the first return token which retains its original positionIncrement value.
-     * The default positionIncrement value is zero.
-     */
-    public sealed class PositionFilter : TokenFilter
-    {
-
-        /* Position increment to assign to all but the first token - default = 0 */
-        private int positionIncrement = 0;
-
-        /* The first token must have non-zero positionIncrement **/
-        private bool firstTokenPositioned = false;
-
-        private IPositionIncrementAttribute posIncrAtt;
-
-        /*
-         * Constructs a PositionFilter that assigns a position increment of zero to
-         * all but the first token from the given input stream.
-         * 
-         * @param input the input stream
-         */
-        public PositionFilter(TokenStream input)
-            : base(input)
-        {
-            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-        }
-
-        /*
-         * Constructs a PositionFilter that assigns the given position increment to
-         * all but the first token from the given input stream.
-         * 
-         * @param input the input stream
-         * @param positionIncrement position increment to assign to all but the first
-         *  token from the input stream
-         */
-        public PositionFilter(TokenStream input, int positionIncrement)
-            : this(input)
-        {
-            this.positionIncrement = positionIncrement;
-        }
-
-        public sealed override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                if (firstTokenPositioned)
-                {
-                    posIncrAtt.PositionIncrement = positionIncrement;
-                }
-                else
-                {
-                    firstTokenPositioned = true;
-                }
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        public override void Reset()
-        {
-            base.Reset();
-            firstTokenPositioned = false;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Properties/AssemblyInfo.cs b/src/contrib/Analyzers/Properties/AssemblyInfo.cs
deleted file mode 100644
index 1263583..0000000
--- a/src/contrib/Analyzers/Properties/AssemblyInfo.cs
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System.Reflection;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Security;
-
-// General Information about an assembly is controlled through the following 
-// set of attributes. Change these attribute values to modify the information
-// associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Contrib.Analyzers")]
-[assembly: AssemblyDescription("")]
-[assembly: AssemblyConfiguration("")]
-[assembly: AssemblyCompany("The Apache Software Foundation")]
-[assembly: AssemblyProduct("Lucene.Net.Contrib.Analyzers")]
-[assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
-[assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
-[assembly: AssemblyCulture("")]
-
-// Setting ComVisible to false makes the types in this assembly not visible 
-// to COM components.  If you need to access a type in this assembly from 
-// COM, set the ComVisible attribute to true on that type.
-[assembly: ComVisible(false)]
-
-// The following GUID is for the ID of the typelib if this project is exposed to COM
-[assembly: Guid("36a962fb-a8be-4238-88c4-32568216e247")]
-
-// Version information for an assembly consists of the following four values:
-//
-//      Major Version
-//      Minor Version 
-//      Build Number
-//      Revision
-//
-// You can specify all the values or you can default the Build and Revision Numbers 
-// by using the '*' as shown below:
-// [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("3.0.3")]
-[assembly: AssemblyFileVersion("3.0.3")]
-[assembly: AllowPartiallyTrustedCallers]
-
-// for testing
-[assembly: InternalsVisibleTo("Lucene.Net.Contrib.Analyzers.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010075a07ce602f88e" +
-                                                         "f263c7db8cb342c58ebd49ecdcc210fac874260b0213fb929ac3dcaf4f5b39744b800f99073eca" +
-                                                         "72aebfac5f7284e1d5f2c82012a804a140f06d7d043d83e830cdb606a04da2ad5374cc92c0a495" +
-                                                         "08437802fb4f8fb80a05e59f80afb99f4ccd0dfe44065743543c4b053b669509d29d332cd32a0c" +
-                                                         "b1e97e84")]
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs b/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
deleted file mode 100644
index ac358c5..0000000
--- a/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Index;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Query
-{
-/*
- * An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection
- * which prevents very common words from being passed into queries. 
- * <p>
- * For very large indexes the cost
- * of reading TermDocs for a very common word can be  high. This analyzer was created after experience with
- * a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for 
- * this term to take 2 seconds.
- * </p>
- * <p>
- * Use the various "addStopWords" methods in this class to automate the identification and addition of 
- * stop words found in an already existing index.
- * </p>
- */
-public class QueryAutoStopWordAnalyzer : Analyzer {
-  Analyzer _delegate;
-  HashMap<String,ISet<String>> stopWordsPerField = new HashMap<String,ISet<String>>();
-  //The default maximum percentage (40%) of index documents which
-  //can contain a term, after which the term is considered to be a stop word.
-  public const float defaultMaxDocFreqPercent = 0.4f;
-  private readonly Version matchVersion;
-
-  /*
-   * Initializes this analyzer with the Analyzer object that actually produces the tokens
-   *
-   * @param _delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
-   */
-  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer _delegate) 
-  {
-    this._delegate = _delegate;
-    SetOverridesTokenStreamMethod<QueryAutoStopWordAnalyzer>();
-    this.matchVersion = matchVersion;
-  }
-
-  /*
-   * Automatically adds stop words for all fields with terms exceeding the defaultMaxDocFreqPercent
-   *
-   * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
-   *               exceed the required document frequency
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int AddStopWords(IndexReader reader) 
-  {
-    return AddStopWords(reader, defaultMaxDocFreqPercent);
-  }
-
-  /*
-   * Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
-   *
-   * @param reader     The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                   exceed the required document frequency
-   * @param maxDocFreq The maximum number of index documents which can contain a term, after which
-   *                   the term is considered to be a stop word
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int AddStopWords(IndexReader reader, int maxDocFreq) 
-  {
-    int numStopWords = 0;
-    ICollection<String> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.INDEXED);
-    for (IEnumerator<String> iter = fieldNames.GetEnumerator(); iter.MoveNext();) {
-      String fieldName = iter.Current;
-      numStopWords += AddStopWords(reader, fieldName, maxDocFreq);
-    }
-    return numStopWords;
-  }
-
-  /*
-   * Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
-   *
-   * @param reader        The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                      exceed the required document frequency
-   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
-   *                      contain a term, after which the word is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int AddStopWords(IndexReader reader, float maxPercentDocs) 
-  {
-    int numStopWords = 0;
-    ICollection<String> fieldNames = reader.GetFieldNames(IndexReader.FieldOption.INDEXED);
-    for (IEnumerator<String> iter = fieldNames.GetEnumerator(); iter.MoveNext();) {
-      String fieldName = iter.Current;
-      numStopWords += AddStopWords(reader, fieldName, maxPercentDocs);
-    }
-    return numStopWords;
-  }
-
-  /*
-   * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
-   *
-   * @param reader         The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                       exceed the required document frequency
-   * @param fieldName      The field for which stopwords will be added
-   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
-   *                       contain a term, after which the word is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int AddStopWords(IndexReader reader, String fieldName, float maxPercentDocs) 
-  {
-    return AddStopWords(reader, fieldName, (int) (reader.NumDocs() * maxPercentDocs));
-  }
-
-  /*
-   * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
-   *
-   * @param reader     The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                   exceed the required document frequency
-   * @param fieldName  The field for which stopwords will be added
-   * @param maxDocFreq The maximum number of index documents which
-   *                   can contain a term, after which the term is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int AddStopWords(IndexReader reader, String fieldName, int maxDocFreq) 
-  {
-      var stopWords = Support.Compatibility.SetFactory.CreateHashSet<string>();
-    String internedFieldName = StringHelper.Intern(fieldName);
-    TermEnum te = reader.Terms(new Term(fieldName));
-    Term term = te.Term;
-    while (term != null) {
-      if (term.Field != internedFieldName) {
-        break;
-      }
-      if (te.DocFreq() > maxDocFreq) {
-        stopWords.Add(term.Text);
-      }
-      if (!te.Next()) {
-        break;
-      }
-      term = te.Term;
-    }
-    stopWordsPerField.Add(fieldName, stopWords);
-    
-    /* if the stopwords for a field are changed,
-     * then saved streams for that field are erased.
-     */
-    IDictionary<String,SavedStreams> streamMap = (IDictionary<String,SavedStreams>) PreviousTokenStream;
-    if (streamMap != null)
-      streamMap.Remove(fieldName);
-    
-    return stopWords.Count;
-  }
-
-  public override TokenStream TokenStream(String fieldName, TextReader reader) {
-    TokenStream result;
-    try {
-      result = _delegate.ReusableTokenStream(fieldName, reader);
-    } catch (IOException) {
-      result = _delegate.TokenStream(fieldName, reader);
-    }
-    var stopWords = stopWordsPerField[fieldName];
-    if (stopWords != null) {
-      result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                              result, stopWords);
-    }
-    return result;
-  }
-  
-  private class SavedStreams {
-    /* the underlying stream */
-    protected internal TokenStream Wrapped;
-
-    /*
-     * when there are no stopwords for the field, refers to wrapped.
-     * if there stopwords, it is a StopFilter around wrapped.
-     */
-    protected internal TokenStream WithStopFilter;
-  };
-  
-  public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-{
-    if (overridesTokenStreamMethod) {
-      // LUCENE-1678: force fallback to tokenStream() if we
-      // have been subclassed and that subclass overrides
-      // tokenStream but not reusableTokenStream
-      return TokenStream(fieldName, reader);
-    }
-
-    /* map of SavedStreams for each field */
-    IDictionary<String, SavedStreams> streamMap = (IDictionary<String, SavedStreams>)PreviousTokenStream;
-    if (streamMap == null) {
-      streamMap = new HashMap<String, SavedStreams>();
-      PreviousTokenStream = streamMap;
-    }
-
-    SavedStreams streams = streamMap[fieldName];
-    if (streams == null) {
-      /* an entry for this field does not exist, create one */
-      streams = new SavedStreams();
-      streamMap.Add(fieldName, streams);
-      streams.Wrapped = _delegate.ReusableTokenStream(fieldName, reader);
-
-      /* if there are any stopwords for the field, save the stopfilter */
-      var stopWords = stopWordsPerField[fieldName];
-      if (stopWords != null)
-        streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.Wrapped, stopWords);
-      else
-        streams.WithStopFilter = streams.Wrapped;
-
-    } else {
-      /*
-       * an entry for this field exists, verify the wrapped stream has not
-       * changed. if it has not, reuse it, otherwise wrap the new stream.
-       */
-      TokenStream result = _delegate.ReusableTokenStream(fieldName, reader);
-      if (result == streams.Wrapped) {
-        /* the wrapped analyzer reused the stream */
-        streams.WithStopFilter.Reset();
-      } else {
-        /*
-         * the wrapped analyzer did not. if there are any stopwords for the
-         * field, create a new StopFilter around the new stream
-         */
-        streams.Wrapped = result;
-        var stopWords = stopWordsPerField[fieldName];
-        if (stopWords != null)
-          streams.WithStopFilter = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                  streams.Wrapped, stopWords);
-        else
-          streams.WithStopFilter = streams.Wrapped;
-      }
-    }
-
-    return streams.WithStopFilter;
-  }
-
-  /*
-   * Provides information on which stop words have been identified for a field
-   *
-   * @param fieldName The field for which stop words identified in "addStopWords"
-   *                  method calls will be returned
-   * @return the stop words identified for a field
-   */
-  public String[] GetStopWords(String fieldName) {
-    String[] result;
-    var stopWords = stopWordsPerField[fieldName];
-    if (stopWords != null) {
-      result = stopWords.ToArray();
-    } else {
-      result = new String[0];
-    }
-    return result;
-  }
-
-  /*
-   * Provides information on which stop words have been identified for all fields
-   *
-   * @return the stop words (as terms)
-   */
-  public Term[] GetStopWords() {
-    List<Term> allStopWords = new List<Term>();
-    foreach(var fieldName in stopWordsPerField.Keys) 
-    {
-      var stopWords = stopWordsPerField[fieldName];
-      foreach(var text in stopWords) {
-        allStopWords.Add(new Term(fieldName, text));
-      }
-    }
-    return allStopWords.ToArray();
-    }
-
-}
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Reverse/ReverseStringFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Reverse/ReverseStringFilter.cs b/src/contrib/Analyzers/Reverse/ReverseStringFilter.cs
deleted file mode 100644
index 607bfea..0000000
--- a/src/contrib/Analyzers/Reverse/ReverseStringFilter.cs
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis.Tokenattributes;
-
-namespace Lucene.Net.Analysis.Reverse
-{
-    /*
-     * Reverse token string, for example "country" => "yrtnuoc".
-     * <p>
-     * If <c>marker</c> is supplied, then tokens will be also prepended by
-     * that character. For example, with a marker of &#x5C;u0001, "country" =>
-     * "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
-     * wildcards search.
-     * </p>
-     */
-    public sealed class ReverseStringFilter : TokenFilter
-    {
-
-        private ITermAttribute termAtt;
-        private readonly char marker;
-        private const char NOMARKER = '\uFFFF';
-
-        /*
-         * Example marker character: U+0001 (START OF HEADING) 
-         */
-        public const char START_OF_HEADING_MARKER = '\u0001';
-
-        /*
-         * Example marker character: U+001F (INFORMATION SEPARATOR ONE)
-         */
-        public const char INFORMATION_SEPARATOR_MARKER = '\u001F';
-
-        /*
-         * Example marker character: U+EC00 (PRIVATE USE AREA: EC00) 
-         */
-        public const char PUA_EC00_MARKER = '\uEC00';
-
-        /*
-         * Example marker character: U+200F (RIGHT-TO-LEFT MARK)
-         */
-        public const char RTL_DIRECTION_MARKER = '\u200F';
-
-        /*
-         * Create a new ReverseStringFilter that reverses all tokens in the 
-         * supplied {@link TokenStream}.
-         * <p>
-         * The reversed tokens will not be marked. 
-         * </p>
-         * 
-         * @param in {@link TokenStream} to filter
-         */
-        public ReverseStringFilter(TokenStream _in)
-            : this(_in, NOMARKER)
-        {
-
-        }
-
-        /*
-         * Create a new ReverseStringFilter that reverses and marks all tokens in the
-         * supplied {@link TokenStream}.
-         * <p>
-         * The reversed tokens will be prepended (marked) by the <c>marker</c>
-         * character.
-         * </p>
-         * 
-         * @param in {@link TokenStream} to filter
-         * @param marker A character used to mark reversed tokens
-         */
-        public ReverseStringFilter(TokenStream _in, char marker)
-            : base(_in)
-        {
-            this.marker = marker;
-            termAtt = AddAttribute<ITermAttribute>();
-        }
-
-        public override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                int len = termAtt.TermLength();
-                if (marker != NOMARKER)
-                {
-                    len++;
-                    termAtt.ResizeTermBuffer(len);
-                    termAtt.TermBuffer()[len - 1] = marker;
-                }
-                Reverse(termAtt.TermBuffer(), len);
-                termAtt.SetTermLength(len);
-                return true;
-            }
-            else
-            {
-                return false;
-            }
-        }
-
-        public static String Reverse(String input)
-        {
-            char[] charInput = input.ToCharArray();
-            Reverse(charInput);
-            return new String(charInput);
-        }
-
-        public static void Reverse(char[] buffer)
-        {
-            Reverse(buffer, buffer.Length);
-        }
-
-        public static void Reverse(char[] buffer, int len)
-        {
-            Reverse(buffer, 0, len);
-        }
-
-        public static void Reverse(char[] buffer, int start, int len)
-        {
-            if (len <= 1) return;
-            int num = len >> 1;
-            for (int i = start; i < (start + num); i++)
-            {
-                char c = buffer[i];
-                buffer[i] = buffer[start * 2 + len - i - 1];
-                buffer[start * 2 + len - i - 1] = c;
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/02362804/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianAnalyzer.cs b/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
deleted file mode 100644
index 21ad541..0000000
--- a/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.IO;
-using System.Collections;
-using Lucene.Net.Analysis;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis.Ru
-{
-    /// <summary>
-    /// Analyzer for Russian language. Supports an external list of stopwords (words that
-    /// will not be indexed at all).
-    /// A default set of stopwords is used unless an alternative list is specified.
-    /// </summary>
-    public sealed class RussianAnalyzer : Analyzer
-    {
-        /// <summary>
-        /// List of typical Russian stopwords.
-        /// </summary>
-        private static readonly String[] RUSSIAN_STOP_WORDS = {
-                                                                  "а", "без", "более", "бы", "был", "была", "были",
-                                                                  "было", "быть", "в",
-                                                                  "вам", "вас", "весь", "во", "вот", "все", "всего",
-                                                                  "всех", "вы", "где",
-                                                                  "да", "даже", "для", "до", "его", "ее", "ей", "ею",
-                                                                  "если", "есть",
-                                                                  "еще", "же", "за", "здесь", "и", "из", "или", "им",
-                                                                  "их", "к", "как",
-                                                                  "ко", "когда", "кто", "ли", "либо", "мне", "может",
-                                                                  "мы", "на", "надо",
-                                                                  "наш", "не", "него", "нее", "нет", "ни", "них", "но",
-                                                                  "ну", "о", "об",
-                                                                  "однако", "он", "она", "они", "оно", "от", "очень",
-                                                                  "по", "под", "при",
-                                                                  "с", "со", "так", "также", "такой", "там", "те", "тем"
-                                                                  , "то", "того",
-                                                                  "тоже", "той", "только", "том", "ты", "у", "уже",
-                                                                  "хотя", "чего", "чей",
-                                                                  "чем", "что", "чтобы", "чье", "чья", "эта", "эти",
-                                                                  "это", "я"
-                                                              };
-
-        private static class DefaultSetHolder
-        {
-            internal static readonly ISet<string> DEFAULT_STOP_SET = CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)RUSSIAN_STOP_WORDS, false));
-        }
-
-        /// <summary>
-        /// Contains the stopwords used with the StopFilter.
-        /// </summary>
-        private readonly ISet<string> stopSet;
-
-        private readonly Version matchVersion;
-
-
-        public RussianAnalyzer(Version matchVersion)
-            : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
-        {
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
-         */
-        public RussianAnalyzer(Version matchVersion, params string[] stopwords)
-            : this(matchVersion, StopFilter.MakeStopSet(stopwords))
-        {
-
-        }
-
-        /*
-         * Builds an analyzer with the given stop words
-         * 
-         * @param matchVersion
-         *          lucene compatibility version
-         * @param stopwords
-         *          a stopword set
-         */
-        public RussianAnalyzer(Version matchVersion, ISet<string> stopwords)
-        {
-            stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords));
-            this.matchVersion = matchVersion;
-        }
-
-        /*
-         * Builds an analyzer with the given stop words.
-         * TODO: create a Set version of this ctor
-         * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
-         */
-        public RussianAnalyzer(Version matchVersion, IDictionary<string, string> stopwords)
-            : this(matchVersion, stopwords.Keys.ToArray())
-        {
-        }
-
-        /*
-         * Creates a {@link TokenStream} which tokenizes all the text in the 
-         * provided {@link Reader}.
-         *
-         * @return  A {@link TokenStream} built from a 
-         *   {@link RussianLetterTokenizer} filtered with 
-         *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
-         *   and {@link RussianStemFilter}
-         */
-        public override TokenStream TokenStream(String fieldName, TextReader reader)
-        {
-            TokenStream result = new RussianLetterTokenizer(reader);
-            result = new LowerCaseFilter(result);
-            result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                    result, stopSet);
-            result = new RussianStemFilter(result);
-            return result;
-        }
-
-        private class SavedStreams
-        {
-            protected internal Tokenizer source;
-            protected internal TokenStream result;
-        };
-
-        /*
-         * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-         * in the provided {@link Reader}.
-         *
-         * @return  A {@link TokenStream} built from a 
-         *   {@link RussianLetterTokenizer} filtered with 
-         *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
-         *   and {@link RussianStemFilter}
-         */
-        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
-        {
-            SavedStreams streams = (SavedStreams)PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams();
-                streams.source = new RussianLetterTokenizer(reader);
-                streams.result = new LowerCaseFilter(streams.source);
-                streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.result, stopSet);
-                streams.result = new RussianStemFilter(streams.result);
-                PreviousTokenStream = streams;
-            }
-            else
-            {
-                streams.source.Reset(reader);
-            }
-            return streams.result;
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message