lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [34/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:38 GMT
Raw porting of Lucene.Net.Analysis.Common


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/99717176
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/99717176
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/99717176

Branch: refs/heads/master
Commit: 9971717653f66f944439ea63add45de338ec21dc
Parents: b8454a3
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Sat Nov 8 01:11:56 2014 +0200
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Sat Nov 8 01:11:56 2014 +0200

----------------------------------------------------------------------
 .../Analysis/Ar/ArabicAnalyzer.cs               |  161 ++
 .../Analysis/Ar/ArabicLetterTokenizer.cs        |   83 +
 .../Analysis/Ar/ArabicLetterTokenizerFactory.cs |   54 +
 .../Analysis/Ar/ArabicNormalizationFilter.cs    |   51 +
 .../Ar/ArabicNormalizationFilterFactory.cs      |   64 +
 .../Analysis/Ar/ArabicNormalizer.cs             |  111 +
 .../Analysis/Ar/ArabicStemFilter.cs             |   66 +
 .../Analysis/Ar/ArabicStemFilterFactory.cs      |   55 +
 .../Analysis/Ar/ArabicStemmer.cs                |  163 ++
 .../Analysis/Bg/BulgarianAnalyzer.cs            |  144 ++
 .../Analysis/Bg/BulgarianStemFilter.cs          |   68 +
 .../Analysis/Bg/BulgarianStemFilterFactory.cs   |   55 +
 .../Analysis/Bg/BulgarianStemmer.cs             |  187 ++
 .../Analysis/Br/BrazilianAnalyzer.cs            |  143 ++
 .../Analysis/Br/BrazilianStemFilter.cs          |   90 +
 .../Analysis/Br/BrazilianStemFilterFactory.cs   |   56 +
 .../Analysis/Br/BrazilianStemmer.cs             | 1395 ++++++++++++
 .../Analysis/Ca/CatalanAnalyzer.cs              |  154 ++
 .../Analysis/CharFilter/BaseCharFilter.cs       |  129 ++
 .../CharFilter/HTMLStripCharFilterFactory.cs    |   67 +
 .../Analysis/CharFilter/MappingCharFilter.cs    |  240 ++
 .../CharFilter/MappingCharFilterFactory.cs      |  184 ++
 .../Analysis/CharFilter/NormalizeCharMap.cs     |  162 ++
 .../Analysis/Cjk/CJKAnalyzer.cs                 |  118 +
 .../Analysis/Cjk/CJKBigramFilter.cs             |  420 ++++
 .../Analysis/Cjk/CJKBigramFilterFactory.cs      |   79 +
 .../Analysis/Cjk/CJKTokenizer.cs                |  370 +++
 .../Analysis/Cjk/CJKTokenizerFactory.cs         |   58 +
 .../Analysis/Cjk/CJKWidthFilter.cs              |  113 +
 .../Analysis/Cjk/CJKWidthFilterFactory.cs       |   66 +
 .../Analysis/Ckb/SoraniAnalyzer.cs              |  139 ++
 .../Analysis/Ckb/SoraniNormalizationFilter.cs   |   52 +
 .../Ckb/SoraniNormalizationFilterFactory.cs     |   64 +
 .../Analysis/Ckb/SoraniNormalizer.cs            |  140 ++
 .../Analysis/Ckb/SoraniStemFilter.cs            |   66 +
 .../Analysis/Ckb/SoraniStemFilterFactory.cs     |   55 +
 .../Analysis/Ckb/SoraniStemmer.cs               |  139 ++
 .../Analysis/Cn/ChineseAnalyzer.cs              |   49 +
 .../Analysis/Cn/ChineseFilter.cs                |  104 +
 .../Analysis/Cn/ChineseFilterFactory.cs         |   51 +
 .../Analysis/Cn/ChineseTokenizer.cs             |  199 ++
 .../Analysis/Cn/ChineseTokenizerFactory.cs      |   52 +
 .../Analysis/CommonGrams/CommonGramsFilter.cs   |  199 ++
 .../CommonGrams/CommonGramsFilterFactory.cs     |  104 +
 .../CommonGrams/CommonGramsQueryFilter.cs       |  139 ++
 .../CommonGramsQueryFilterFactory.cs            |   55 +
 .../Compound/CompoundWordTokenFilterBase.cs     |  202 ++
 .../DictionaryCompoundWordTokenFilter.cs        |  137 ++
 .../DictionaryCompoundWordTokenFilterFactory.cs |   81 +
 .../HyphenationCompoundWordTokenFilter.cs       |  255 +++
 ...HyphenationCompoundWordTokenFilterFactory.cs |  125 ++
 .../Analysis/Compound/hyphenation/ByteVector.cs |  151 ++
 .../Analysis/Compound/hyphenation/CharVector.cs |  163 ++
 .../Analysis/Compound/hyphenation/Hyphen.cs     |   76 +
 .../Compound/hyphenation/Hyphenation.cs         |   55 +
 .../Compound/hyphenation/HyphenationTree.cs     |  533 +++++
 .../Compound/hyphenation/PatternConsumer.cs     |   57 +
 .../Compound/hyphenation/PatternParser.cs       |  463 ++++
 .../Compound/hyphenation/TernaryTree.cs         |  780 +++++++
 .../Analysis/Core/KeywordAnalyzer.cs            |   40 +
 .../Analysis/Core/KeywordTokenizer.cs           |  106 +
 .../Analysis/Core/KeywordTokenizerFactory.cs    |   53 +
 .../Analysis/Core/LetterTokenizer.cs            |   84 +
 .../Analysis/Core/LetterTokenizerFactory.cs     |   54 +
 .../Analysis/Core/LowerCaseFilter.cs            |   62 +
 .../Analysis/Core/LowerCaseFilterFactory.cs     |   62 +
 .../Analysis/Core/LowerCaseTokenizer.cs         |   84 +
 .../Analysis/Core/LowerCaseTokenizerFactory.cs  |   63 +
 .../Analysis/Core/SimpleAnalyzer.cs             |   58 +
 .../Analysis/Core/StopAnalyzer.cs               |  104 +
 .../Analysis/Core/StopFilter.cs                 |  129 ++
 .../Analysis/Core/StopFilterFactory.cs          |  162 ++
 .../Analysis/Core/TypeTokenFilter.cs            |   83 +
 .../Analysis/Core/TypeTokenFilterFactory.cs     |   94 +
 .../Analysis/Core/UpperCaseFilter.cs            |   71 +
 .../Analysis/Core/UpperCaseFilterFactory.cs     |   74 +
 .../Analysis/Core/WhitespaceAnalyzer.cs         |   58 +
 .../Analysis/Core/WhitespaceTokenizer.cs        |   75 +
 .../Analysis/Core/WhitespaceTokenizerFactory.cs |   58 +
 .../Analysis/Cz/CzechAnalyzer.cs                |  161 ++
 .../Analysis/Cz/CzechStemFilter.cs              |   67 +
 .../Analysis/Cz/CzechStemFilterFactory.cs       |   55 +
 .../Analysis/Cz/CzechStemmer.cs                 |  157 ++
 .../Analysis/Da/DanishAnalyzer.cs               |  139 ++
 .../Analysis/De/GermanAnalyzer.cs               |  185 ++
 .../Analysis/De/GermanLightStemFilter.cs        |   66 +
 .../Analysis/De/GermanLightStemFilterFactory.cs |   55 +
 .../Analysis/De/GermanLightStemmer.cs           |  177 ++
 .../Analysis/De/GermanMinimalStemFilter.cs      |   66 +
 .../De/GermanMinimalStemFilterFactory.cs        |   55 +
 .../Analysis/De/GermanMinimalStemmer.cs         |  151 ++
 .../Analysis/De/GermanNormalizationFilter.cs    |  130 ++
 .../De/GermanNormalizationFilterFactory.cs      |   65 +
 .../Analysis/De/GermanStemFilter.cs             |   96 +
 .../Analysis/De/GermanStemFilterFactory.cs      |   56 +
 .../Analysis/De/GermanStemmer.cs                |  308 +++
 .../Analysis/El/GreekAnalyzer.cs                |  139 ++
 .../Analysis/El/GreekLowerCaseFilter.cs         |  135 ++
 .../Analysis/El/GreekLowerCaseFilterFactory.cs  |   66 +
 .../Analysis/El/GreekStemFilter.cs              |   72 +
 .../Analysis/El/GreekStemFilterFactory.cs       |   55 +
 .../Analysis/El/GreekStemmer.cs                 |  799 +++++++
 .../Analysis/En/EnglishAnalyzer.cs              |  121 +
 .../Analysis/En/EnglishMinimalStemFilter.cs     |   66 +
 .../En/EnglishMinimalStemFilterFactory.cs       |   55 +
 .../Analysis/En/EnglishMinimalStemmer.cs        |   61 +
 .../Analysis/En/EnglishPossessiveFilter.cs      |   79 +
 .../En/EnglishPossessiveFilterFactory.cs        |   56 +
 .../Analysis/En/KStemData1.cs                   |   55 +
 .../Analysis/En/KStemData2.cs                   |   53 +
 .../Analysis/En/KStemData3.cs                   |   53 +
 .../Analysis/En/KStemData4.cs                   |   53 +
 .../Analysis/En/KStemData5.cs                   |   53 +
 .../Analysis/En/KStemData6.cs                   |   53 +
 .../Analysis/En/KStemData7.cs                   |   53 +
 .../Analysis/En/KStemData8.cs                   |   53 +
 .../Analysis/En/KStemFilter.cs                  |   81 +
 .../Analysis/En/KStemFilterFactory.cs           |   55 +
 .../Analysis/En/KStemmer.cs                     | 2044 +++++++++++++++++
 .../Analysis/En/PorterStemFilter.cs             |   81 +
 .../Analysis/En/PorterStemFilterFactory.cs      |   55 +
 .../Analysis/En/PorterStemmer.cs                |  871 +++++++
 .../Analysis/Es/SpanishAnalyzer.cs              |  155 ++
 .../Analysis/Es/SpanishLightStemFilter.cs       |   66 +
 .../Es/SpanishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Es/SpanishLightStemmer.cs          |  137 ++
 .../Analysis/Eu/BasqueAnalyzer.cs               |  137 ++
 .../Analysis/Fa/PersianAnalyzer.cs              |  155 ++
 .../Analysis/Fa/PersianCharFilter.cs            |   79 +
 .../Analysis/Fa/PersianCharFilterFactory.cs     |   65 +
 .../Analysis/Fa/PersianNormalizationFilter.cs   |   54 +
 .../Fa/PersianNormalizationFilterFactory.cs     |   66 +
 .../Analysis/Fa/PersianNormalizer.cs            |   97 +
 .../Analysis/Fi/FinnishAnalyzer.cs              |  139 ++
 .../Analysis/Fi/FinnishLightStemFilter.cs       |   66 +
 .../Fi/FinnishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Fi/FinnishLightStemmer.cs          |  335 +++
 .../Analysis/Fr/FrenchAnalyzer.cs               |  205 ++
 .../Analysis/Fr/FrenchLightStemFilter.cs        |   66 +
 .../Analysis/Fr/FrenchLightStemFilterFactory.cs |   56 +
 .../Analysis/Fr/FrenchLightStemmer.cs           |  357 +++
 .../Analysis/Fr/FrenchMinimalStemFilter.cs      |   66 +
 .../Fr/FrenchMinimalStemFilterFactory.cs        |   56 +
 .../Analysis/Fr/FrenchMinimalStemmer.cs         |  106 +
 .../Analysis/Fr/FrenchStemFilter.cs             |  102 +
 .../Analysis/Fr/FrenchStemmer.cs                |  785 +++++++
 .../Analysis/Ga/IrishAnalyzer.cs                |  153 ++
 .../Analysis/Ga/IrishLowerCaseFilter.cs         |   95 +
 .../Analysis/Ga/IrishLowerCaseFilterFactory.cs  |   65 +
 .../Analysis/Gl/GalicianAnalyzer.cs             |  137 ++
 .../Analysis/Gl/GalicianMinimalStemFilter.cs    |   66 +
 .../Gl/GalicianMinimalStemFilterFactory.cs      |   55 +
 .../Analysis/Gl/GalicianMinimalStemmer.cs       |   43 +
 .../Analysis/Gl/GalicianStemFilter.cs           |   70 +
 .../Analysis/Gl/GalicianStemFilterFactory.cs    |   55 +
 .../Analysis/Gl/GalicianStemmer.cs              |  102 +
 .../Analysis/Hi/HindiAnalyzer.cs                |  158 ++
 .../Analysis/Hi/HindiNormalizationFilter.cs     |   62 +
 .../Hi/HindiNormalizationFilterFactory.cs       |   64 +
 .../Analysis/Hi/HindiNormalizer.cs              |  193 ++
 .../Analysis/Hi/HindiStemFilter.cs              |   56 +
 .../Analysis/Hi/HindiStemFilterFactory.cs       |   54 +
 .../Analysis/Hi/HindiStemmer.cs                 |   71 +
 .../Analysis/Hu/HungarianAnalyzer.cs            |  139 ++
 .../Analysis/Hu/HungarianLightStemFilter.cs     |   66 +
 .../Hu/HungarianLightStemFilterFactory.cs       |   55 +
 .../Analysis/Hu/HungarianLightStemmer.cs        |  292 +++
 .../Analysis/Hunspell/Dictionary.cs             | 1235 ++++++++++
 .../Analysis/Hunspell/HunspellStemFilter.cs     |  171 ++
 .../Hunspell/HunspellStemFilterFactory.cs       |  116 +
 .../Analysis/Hunspell/ISO8859_14Decoder.cs      |   47 +
 .../Analysis/Hunspell/Stemmer.cs                |  475 ++++
 .../Analysis/Hy/ArmenianAnalyzer.cs             |  137 ++
 .../Analysis/Id/IndonesianAnalyzer.cs           |  138 ++
 .../Analysis/Id/IndonesianStemFilter.cs         |   75 +
 .../Analysis/Id/IndonesianStemFilterFactory.cs  |   57 +
 .../Analysis/Id/IndonesianStemmer.cs            |  334 +++
 .../Analysis/In/IndicNormalizationFilter.cs     |   52 +
 .../In/IndicNormalizationFilterFactory.cs       |   64 +
 .../Analysis/In/IndicNormalizer.cs              |  194 ++
 .../Analysis/In/IndicTokenizer.cs               |   48 +
 .../Analysis/It/ItalianAnalyzer.cs              |  164 ++
 .../Analysis/It/ItalianLightStemFilter.cs       |   66 +
 .../It/ItalianLightStemFilterFactory.cs         |   55 +
 .../Analysis/It/ItalianLightStemmer.cs          |  155 ++
 .../Analysis/Lv/LatvianAnalyzer.cs              |  137 ++
 .../Analysis/Lv/LatvianStemFilter.cs            |   66 +
 .../Analysis/Lv/LatvianStemFilterFactory.cs     |   55 +
 .../Analysis/Lv/LatvianStemmer.cs               |  198 ++
 .../Miscellaneous/ASCIIFoldingFilter.cs         | 2118 ++++++++++++++++++
 .../Miscellaneous/ASCIIFoldingFilterFactory.cs  |   69 +
 .../Miscellaneous/CapitalizationFilter.cs       |  208 ++
 .../CapitalizationFilterFactory.cs              |  117 +
 .../Miscellaneous/CodepointCountFilter.cs       |   82 +
 .../CodepointCountFilterFactory.cs              |   61 +
 .../Analysis/Miscellaneous/EmptyTokenStream.cs  |   34 +
 .../Miscellaneous/HyphenatedWordsFilter.cs      |  164 ++
 .../HyphenatedWordsFilterFactory.cs             |   55 +
 .../Analysis/Miscellaneous/KeepWordFilter.cs    |   67 +
 .../Miscellaneous/KeepWordFilterFactory.cs      |  113 +
 .../Miscellaneous/KeywordMarkerFilter.cs        |   61 +
 .../Miscellaneous/KeywordMarkerFilterFactory.cs |   99 +
 .../Miscellaneous/KeywordRepeatFilter.cs        |   75 +
 .../Miscellaneous/KeywordRepeatFilterFactory.cs |   52 +
 .../Analysis/Miscellaneous/LengthFilter.cs      |   89 +
 .../Miscellaneous/LengthFilterFactory.cs        |   67 +
 .../Miscellaneous/LimitTokenCountAnalyzer.cs    |   68 +
 .../Miscellaneous/LimitTokenCountFilter.cs      |  109 +
 .../LimitTokenCountFilterFactory.cs             |   67 +
 .../Miscellaneous/LimitTokenPositionFilter.cs   |  116 +
 .../LimitTokenPositionFilterFactory.cs          |   66 +
 .../Lucene47WordDelimiterFilter.cs              |  625 ++++++
 .../Analysis/Miscellaneous/PatternAnalyzer.cs   |  566 +++++
 .../Miscellaneous/PatternKeywordMarkerFilter.cs |   60 +
 .../Miscellaneous/PerFieldAnalyzerWrapper.cs    |   93 +
 .../PrefixAndSuffixAwareTokenFilter.cs          |  112 +
 .../Miscellaneous/PrefixAwareTokenFilter.cs     |  246 ++
 .../RemoveDuplicatesTokenFilter.cs              |   99 +
 .../RemoveDuplicatesTokenFilterFactory.cs       |   55 +
 .../Miscellaneous/ScandinavianFoldingFilter.cs  |  135 ++
 .../ScandinavianFoldingFilterFactory.cs         |   53 +
 .../ScandinavianNormalizationFilter.cs          |  145 ++
 .../ScandinavianNormalizationFilterFactory.cs   |   53 +
 .../Miscellaneous/SetKeywordMarkerFilter.cs     |   59 +
 .../Miscellaneous/SingleTokenTokenStream.cs     |   79 +
 .../Miscellaneous/StemmerOverrideFilter.cs      |  265 +++
 .../StemmerOverrideFilterFactory.cs             |   97 +
 .../Analysis/Miscellaneous/TrimFilter.cs        |  114 +
 .../Analysis/Miscellaneous/TrimFilterFactory.cs |   63 +
 .../Miscellaneous/TruncateTokenFilter.cs        |   66 +
 .../Miscellaneous/TruncateTokenFilterFactory.cs |   66 +
 .../Miscellaneous/WordDelimiterFilter.cs        |  761 +++++++
 .../Miscellaneous/WordDelimiterFilterFactory.cs |  270 +++
 .../Miscellaneous/WordDelimiterIterator.cs      |  367 +++
 .../Analysis/Ngram/EdgeNGramFilterFactory.cs    |   61 +
 .../Analysis/Ngram/EdgeNGramTokenFilter.cs      |  266 +++
 .../Analysis/Ngram/EdgeNGramTokenizer.cs        |   71 +
 .../Analysis/Ngram/EdgeNGramTokenizerFactory.cs |   74 +
 .../Ngram/Lucene43EdgeNGramTokenizer.cs         |  328 +++
 .../Analysis/Ngram/Lucene43NGramTokenizer.cs    |  182 ++
 .../Analysis/Ngram/NGramFilterFactory.cs        |   59 +
 .../Analysis/Ngram/NGramTokenFilter.cs          |  251 +++
 .../Analysis/Ngram/NGramTokenizer.cs            |  278 +++
 .../Analysis/Ngram/NGramTokenizerFactory.cs     |   70 +
 .../Analysis/Nl/DutchAnalyzer.cs                |  231 ++
 .../Analysis/Nl/DutchStemFilter.cs              |  129 ++
 .../Analysis/Nl/DutchStemmer.cs                 |  477 ++++
 .../Analysis/No/NorwegianAnalyzer.cs            |  140 ++
 .../Analysis/No/NorwegianLightStemFilter.cs     |   79 +
 .../No/NorwegianLightStemFilterFactory.cs       |   79 +
 .../Analysis/No/NorwegianLightStemmer.cs        |  158 ++
 .../Analysis/No/NorwegianMinimalStemFilter.cs   |   79 +
 .../No/NorwegianMinimalStemFilterFactory.cs     |   79 +
 .../Analysis/No/NorwegianMinimalStemmer.cs      |  121 +
 .../Analysis/Path/PathHierarchyTokenizer.cs     |  242 ++
 .../Path/PathHierarchyTokenizerFactory.cs       |  105 +
 .../Path/ReversePathHierarchyTokenizer.cs       |  214 ++
 .../Pattern/PatternCaptureGroupFilterFactory.cs |   54 +
 .../Pattern/PatternCaptureGroupTokenFilter.cs   |  227 ++
 .../Pattern/PatternReplaceCharFilter.cs         |  179 ++
 .../Pattern/PatternReplaceCharFilterFactory.cs  |   67 +
 .../Analysis/Pattern/PatternReplaceFilter.cs    |   81 +
 .../Pattern/PatternReplaceFilterFactory.cs      |   64 +
 .../Analysis/Pattern/PatternTokenizer.cs        |  185 ++
 .../Analysis/Pattern/PatternTokenizerFactory.cs |   94 +
 .../Analysis/Payloads/AbstractEncoder.cs        |   39 +
 .../Payloads/DelimitedPayloadTokenFilter.cs     |   82 +
 .../DelimitedPayloadTokenFilterFactory.cs       |   85 +
 .../Analysis/Payloads/FloatEncoder.cs           |   41 +
 .../Analysis/Payloads/IdentityEncoder.cs        |   63 +
 .../Analysis/Payloads/IntegerEncoder.cs         |   42 +
 .../Payloads/NumericPayloadTokenFilter.cs       |   70 +
 .../NumericPayloadTokenFilterFactory.cs         |   60 +
 .../Analysis/Payloads/PayloadEncoder.cs         |   43 +
 .../Analysis/Payloads/PayloadHelper.cs          |   81 +
 .../Payloads/TokenOffsetPayloadTokenFilter.cs   |   61 +
 .../TokenOffsetPayloadTokenFilterFactory.cs     |   56 +
 .../Payloads/TypeAsPayloadTokenFilter.cs        |   62 +
 .../Payloads/TypeAsPayloadTokenFilterFactory.cs |   56 +
 .../Analysis/Position/PositionFilter.cs         |  109 +
 .../Analysis/Position/PositionFilterFactory.cs  |   70 +
 .../Analysis/Pt/PortugueseAnalyzer.cs           |  155 ++
 .../Analysis/Pt/PortugueseLightStemFilter.cs    |   66 +
 .../Pt/PortugueseLightStemFilterFactory.cs      |   55 +
 .../Analysis/Pt/PortugueseLightStemmer.cs       |  252 +++
 .../Analysis/Pt/PortugueseMinimalStemFilter.cs  |   66 +
 .../Pt/PortugueseMinimalStemFilterFactory.cs    |   55 +
 .../Analysis/Pt/PortugueseMinimalStemmer.cs     |   44 +
 .../Analysis/Pt/PortugueseStemFilter.cs         |   70 +
 .../Analysis/Pt/PortugueseStemFilterFactory.cs  |   55 +
 .../Analysis/Pt/PortugueseStemmer.cs            |  126 ++
 .../Analysis/Pt/RSLPStemmerBase.cs              |  410 ++++
 .../Analysis/Query/QueryAutoStopWordAnalyzer.cs |  213 ++
 .../Analysis/Reverse/ReverseStringFilter.cs     |  281 +++
 .../Reverse/ReverseStringFilterFactory.cs       |   59 +
 .../Analysis/Ro/RomanianAnalyzer.cs             |  142 ++
 .../Analysis/Ru/RussianAnalyzer.cs              |  172 ++
 .../Analysis/Ru/RussianLetterTokenizer.cs       |   83 +
 .../Ru/RussianLetterTokenizerFactory.cs         |   52 +
 .../Analysis/Ru/RussianLightStemFilter.cs       |   66 +
 .../Ru/RussianLightStemFilterFactory.cs         |   55 +
 .../Analysis/Ru/RussianLightStemmer.cs          |  134 ++
 .../Analysis/Shingle/ShingleAnalyzerWrapper.cs  |  182 ++
 .../Analysis/Shingle/ShingleFilter.cs           |  724 ++++++
 .../Analysis/Shingle/ShingleFilterFactory.cs    |   86 +
 .../Analysis/Sinks/DateRecognizerSinkFilter.cs  |   79 +
 .../Analysis/Sinks/TeeSinkTokenFilter.cs        |  300 +++
 .../Analysis/Sinks/TokenRangeSinkFilter.cs      |   73 +
 .../Analysis/Sinks/TokenTypeSinkFilter.cs       |   50 +
 .../Analysis/Snowball/SnowballAnalyzer.cs       |  102 +
 .../Analysis/Snowball/SnowballFilter.cs         |  129 ++
 .../Snowball/SnowballPorterFilterFactory.cs     |  101 +
 .../Analysis/Standard/ClassicAnalyzer.cs        |  161 ++
 .../Analysis/Standard/ClassicFilter.cs          |   92 +
 .../Analysis/Standard/ClassicFilterFactory.cs   |   55 +
 .../Analysis/Standard/ClassicTokenizer.cs       |  210 ++
 .../Standard/ClassicTokenizerFactory.cs         |   61 +
 .../Analysis/Standard/ClassicTokenizerImpl.cs   |  723 ++++++
 .../Analysis/Standard/StandardAnalyzer.cs       |  162 ++
 .../Analysis/Standard/StandardFilter.cs         |  100 +
 .../Analysis/Standard/StandardFilterFactory.cs  |   56 +
 .../Analysis/Standard/StandardTokenizer.cs      |  257 +++
 .../Standard/StandardTokenizerFactory.cs        |   61 +
 .../Analysis/Standard/StandardTokenizerImpl.cs  |  733 ++++++
 .../Standard/StandardTokenizerInterface.cs      |   77 +
 .../Analysis/Standard/UAX29URLEmailAnalyzer.cs  |  150 ++
 .../Analysis/Standard/UAX29URLEmailTokenizer.cs |  221 ++
 .../Standard/UAX29URLEmailTokenizerFactory.cs   |   61 +
 .../Analysis/Sv/SwedishAnalyzer.cs              |  139 ++
 .../Analysis/Sv/SwedishLightStemFilter.cs       |   66 +
 .../Sv/SwedishLightStemFilterFactory.cs         |   55 +
 .../Analysis/Sv/SwedishLightStemmer.cs          |  114 +
 .../Analysis/Synonym/FSTSynonymFilterFactory.cs |  186 ++
 .../Analysis/Synonym/SlowSynonymFilter.cs       |  317 +++
 .../Synonym/SlowSynonymFilterFactory.cs         |  391 ++++
 .../Analysis/Synonym/SlowSynonymMap.cs          |  210 ++
 .../Analysis/Synonym/SolrSynonymParser.cs       |  218 ++
 .../Analysis/Synonym/SynonymFilter.cs           |  789 +++++++
 .../Analysis/Synonym/SynonymFilterFactory.cs    |  115 +
 .../Analysis/Synonym/SynonymMap.cs              |  430 ++++
 .../Analysis/Synonym/WordnetSynonymParser.cs    |  135 ++
 .../Analysis/Th/ThaiAnalyzer.cs                 |  143 ++
 .../Analysis/Th/ThaiTokenizer.cs                |  116 +
 .../Analysis/Th/ThaiTokenizerFactory.cs         |   56 +
 .../Analysis/Th/ThaiWordFilter.cs               |  172 ++
 .../Analysis/Th/ThaiWordFilterFactory.cs        |   59 +
 .../Analysis/Tr/ApostropheFilter.cs             |   70 +
 .../Analysis/Tr/ApostropheFilterFactory.cs      |   52 +
 .../Analysis/Tr/TurkishAnalyzer.cs              |  145 ++
 .../Analysis/Tr/TurkishLowerCaseFilter.cs       |  151 ++
 .../Tr/TurkishLowerCaseFilterFactory.cs         |   64 +
 .../Analysis/Util/AbstractAnalysisFactory.cs    |  406 ++++
 .../Analysis/Util/AnalysisSPILoader.cs          |  165 ++
 .../Analysis/Util/CharArrayIterator.cs          |  278 +++
 .../Analysis/Util/CharArrayMap.cs               |  928 ++++++++
 .../Analysis/Util/CharArraySet.cs               |  267 +++
 .../Analysis/Util/CharFilterFactory.cs          |   86 +
 .../Analysis/Util/CharTokenizer.cs              |  209 ++
 .../Analysis/Util/CharacterUtils.cs             |  498 ++++
 .../Analysis/Util/ClasspathResourceLoader.cs    |  105 +
 .../Analysis/Util/ElisionFilter.cs              |   80 +
 .../Analysis/Util/ElisionFilterFactory.cs       |   86 +
 .../Analysis/Util/FilesystemResourceLoader.cs   |  113 +
 .../Analysis/Util/FilteringTokenFilter.cs       |  150 ++
 .../Analysis/Util/MultiTermAwareComponent.cs    |   39 +
 .../Analysis/Util/OpenStringBuilder.cs          |  205 ++
 .../Analysis/Util/ResourceLoader.cs             |   49 +
 .../Analysis/Util/ResourceLoaderAware.cs        |   38 +
 .../Analysis/Util/RollingCharBuffer.cs          |  200 ++
 .../Analysis/Util/SegmentingTokenizerBase.cs    |  258 +++
 .../Analysis/Util/StemmerUtil.cs                |  153 ++
 .../Analysis/Util/StopwordAnalyzerBase.cs       |  172 ++
 .../Analysis/Util/TokenFilterFactory.cs         |   86 +
 .../Analysis/Util/TokenizerFactory.cs           |   93 +
 .../Analysis/Util/WordlistLoader.cs             |  305 +++
 .../Analysis/Wikipedia/WikipediaTokenizer.cs    |  343 +++
 .../Wikipedia/WikipediaTokenizerFactory.cs      |   57 +
 .../Collation/CollationAttributeFactory.cs      |   99 +
 .../Collation/CollationKeyAnalyzer.cs           |  129 ++
 .../Collation/CollationKeyFilter.cs             |  112 +
 .../Collation/CollationKeyFilterFactory.cs      |  254 +++
 .../CollatedTermAttributeImpl.cs                |   52 +
 .../Lucene.Net.Analysis.Common.csproj           |  244 ++
 .../Properties/AssemblyInfo.cs                  |   36 +
 src/Lucene.Net.Core/Analysis/Analyzer.cs        |    2 +-
 src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs |    2 +-
 src/Lucene.Net.Core/Analysis/TokenStream.cs     |    2 +
 387 files changed, 60480 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
new file mode 100644
index 0000000..45318a7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicAnalyzer.cs
@@ -0,0 +1,161 @@
+using System;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Arabic. 
+	/// <para>
+	/// This analyzer implements light-stemming as specified by:
+	/// <i>
+	/// Light Stemming for Arabic Information Retrieval
+	/// </i>    
+	/// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf
+	/// </para>
+	/// <para>
+	/// The analysis package contains three primary components:
+	/// <ul>
+	///  <li><seealso cref="ArabicNormalizationFilter"/>: Arabic orthographic normalization.
+	///  <li><seealso cref="ArabicStemFilter"/>: Arabic light stemming
+	///  <li>Arabic stop words file: a set of default Arabic stop words.
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public sealed class ArabicAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// File containing default Arabic stopwords.
+	  /// 
+	  /// Default stopword list is from http://members.unine.ch/jacques.savoy/clef/index.html
+	  /// The stopword list is BSD-Licensed.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(ArabicAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ArabicAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// <seealso cref="ArabicStemFilter"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclusionSet">
+	  ///          a set of terms not to be stemmed </param>
+	  public ArabicAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>,
+	  ///         <seealso cref="ArabicNormalizationFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
+	  ///         if a stem exclusion set is provided and <seealso cref="ArabicStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = matchVersion.onOrAfter(org.apache.lucene.util.Version.LUCENE_31) ? new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader) : new ArabicLetterTokenizer(matchVersion, reader);
+		Tokenizer source = matchVersion.onOrAfter(Version.LUCENE_31) ? new StandardTokenizer(matchVersion, reader) : new ArabicLetterTokenizer(matchVersion, reader);
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		// the order here is important: the stopword list is not normalized!
+		result = new StopFilter(matchVersion, result, stopwords);
+		// TODO maybe we should make ArabicNormalization filter also KeywordAttribute aware?!
+		result = new ArabicNormalizationFilter(result);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		return new TokenStreamComponents(source, new ArabicStemFilter(result));
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
new file mode 100644
index 0000000..5427293
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizer.cs
@@ -0,0 +1,83 @@
+using System;
+
+namespace org.apache.lucene.analysis.ar
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LetterTokenizer = org.apache.lucene.analysis.core.LetterTokenizer;
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Tokenizer that breaks text into runs of letters and diacritics.
+	/// <para>
+	/// The problem with the standard Letter tokenizer is that it fails on diacritics.
+	/// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
+	/// </para>
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="ArabicLetterTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="#isTokenChar(int)"/> and
+	/// <seealso cref="#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead. 
+	[Obsolete("(3.1) Use <seealso cref="StandardTokenizer"/> instead.")]
+	public class ArabicLetterTokenizer : LetterTokenizer
+	{
+	  /// <summary>
+	  /// Construct a new ArabicLetterTokenizer. </summary>
+	  /// <param name="matchVersion"> Lucene version
+	  /// to match See <seealso cref="<a href="#version">above</a>"/>
+	  /// </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public ArabicLetterTokenizer(Version matchVersion, Reader @in) : base(matchVersion, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a new ArabicLetterTokenizer using a given
+	  /// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
+	  /// matchVersion Lucene version to match See
+	  /// <seealso cref="<a href="#version">above</a>"/>
+	  /// </summary>
+	  /// <param name="factory">
+	  ///          the attribute factory to use for this Tokenizer </param>
+	  /// <param name="in">
+	  ///          the input to split up into tokens </param>
+	  public ArabicLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader @in) : base(matchVersion, factory, @in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Allows for Letter category or NonspacingMark category </summary>
+	  /// <seealso cref= org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int) </seealso>
+	  protected internal override bool isTokenChar(int c)
+	  {
+		return base.isTokenChar(c) || char.getType(c) == char.NON_SPACING_MARK;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
new file mode 100644
index 0000000..3b6def0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicLetterTokenizerFactory.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicLetterTokenizer"/> </summary>
+	/// @deprecated (3.1) Use StandardTokenizerFactory instead.
+	///  
+	[Obsolete("(3.1) Use StandardTokenizerFactory instead.")]
+	public class ArabicLetterTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicNormalizationFilterFactory </summary>
+	  public ArabicLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicLetterTokenizer create(AttributeFactory factory, Reader input)
+	  {
+		return new ArabicLetterTokenizer(luceneMatchVersion, factory, input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
new file mode 100644
index 0000000..7561878
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilter.cs
@@ -0,0 +1,51 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicNormalizer"/> to normalize the orthography.
+	/// 
+	/// </summary>
+
+	public sealed class ArabicNormalizationFilter : TokenFilter
+	{
+	  private readonly ArabicNormalizer normalizer = new ArabicNormalizer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  public ArabicNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  termAtt.Length = newlen;
+		  return true;
+		}
+		return false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
new file mode 100644
index 0000000..bb38dd4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ArabicNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicNormalizationFilterFactory </summary>
+	  public ArabicNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicNormalizationFilter create(TokenStream input)
+	  {
+		return new ArabicNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
new file mode 100644
index 0000000..05ddad0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicNormalizer.cs
@@ -0,0 +1,111 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	///  Normalizer for Arabic.
+	///  <para>
+	///  Normalization is done in-place for efficiency, operating on a termbuffer.
+	/// </para>
+	///  <para>
+	///  Normalization is defined as:
+	///  <ul>
+	///  <li> Normalization of hamza with alef seat to a bare alef.
+	///  <li> Normalization of teh marbuta to heh
+	///  <li> Normalization of dotless yeh (alef maksura) to yeh.
+	///  <li> Removal of Arabic diacritics (the harakat)
+	///  <li> Removal of tatweel (stretching character).
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ArabicNormalizer
+	{
+	  public const char ALEF = '\u0627';
+	  public const char ALEF_MADDA = '\u0622';
+	  public const char ALEF_HAMZA_ABOVE = '\u0623';
+	  public const char ALEF_HAMZA_BELOW = '\u0625';
+
+	  public const char YEH = '\u064A';
+	  public const char DOTLESS_YEH = '\u0649';
+
+	  public const char TEH_MARBUTA = '\u0629';
+	  public const char HEH = '\u0647';
+
+	  public const char TATWEEL = '\u0640';
+
+	  public const char FATHATAN = '\u064B';
+	  public const char DAMMATAN = '\u064C';
+	  public const char KASRATAN = '\u064D';
+	  public const char FATHA = '\u064E';
+	  public const char DAMMA = '\u064F';
+	  public const char KASRA = '\u0650';
+	  public const char SHADDA = '\u0651';
+	  public const char SUKUN = '\u0652';
+
+	  /// <summary>
+	  /// Normalize an input buffer of Arabic text
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int normalize(char[] s, int len)
+	  {
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+		  case ALEF_MADDA:
+		  case ALEF_HAMZA_ABOVE:
+		  case ALEF_HAMZA_BELOW:
+			s[i] = ALEF;
+			break;
+		  case DOTLESS_YEH:
+			s[i] = YEH;
+			break;
+		  case TEH_MARBUTA:
+			s[i] = HEH;
+			break;
+		  case TATWEEL:
+		  case KASRATAN:
+		  case DAMMATAN:
+		  case FATHATAN:
+		  case FATHA:
+		  case DAMMA:
+		  case KASRA:
+		  case SHADDA:
+		  case SUKUN:
+			len = StemmerUtil.delete(s, i, len);
+			i--;
+			break;
+		  default:
+			break;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
new file mode 100644
index 0000000..8dca664
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // javadoc @link
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ArabicStemmer"/> to stem Arabic words..
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter  </seealso>
+
+	public sealed class ArabicStemFilter : TokenFilter
+	{
+	  private readonly ArabicStemmer stemmer = new ArabicStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public ArabicStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
new file mode 100644
index 0000000..851df64
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ar
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ArabicStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ArabicNormalizationFilterFactory"/&gt;
+	///     &lt;filter class="solr.ArabicStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ArabicStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ArabicStemFilterFactory </summary>
+	  public ArabicStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ArabicStemFilter create(TokenStream input)
+	  {
+		return new ArabicStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
new file mode 100644
index 0000000..1776906
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ar/ArabicStemmer.cs
@@ -0,0 +1,163 @@
+namespace org.apache.lucene.analysis.ar
+{
+
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	///  Stemmer for Arabic.
+	///  <para>
+	///  Stemming  is done in-place for efficiency, operating on a termbuffer.
+	/// </para>
+	///  <para>
+	///  Stemming is defined as:
+	///  <ul>
+	///  <li> Removal of attached definite article, conjunction, and prepositions.
+	///  <li> Stemming of common suffixes.
+	/// </ul>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ArabicStemmer
+	{
+	  public const char ALEF = '\u0627';
+	  public const char BEH = '\u0628';
+	  public const char TEH_MARBUTA = '\u0629';
+	  public const char TEH = '\u062A';
+	  public const char FEH = '\u0641';
+	  public const char KAF = '\u0643';
+	  public const char LAM = '\u0644';
+	  public const char NOON = '\u0646';
+	  public const char HEH = '\u0647';
+	  public const char WAW = '\u0648';
+	  public const char YEH = '\u064A';
+
+	  public static readonly char[][] prefixes = {};
+
+	  public static readonly char[][] suffixes = {};
+
+	  /// <summary>
+	  /// Stem an input buffer of Arabic text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		len = stemPrefix(s, len);
+		len = stemSuffix(s, len);
+
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Stem a prefix off an Arabic word. </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new length of input buffer after stemming. </returns>
+	  public virtual int stemPrefix(char[] s, int len)
+	  {
+		for (int i = 0; i < prefixes.Length; i++)
+		{
+		  if (startsWithCheckLength(s, len, prefixes[i]))
+		  {
+			return StemmerUtil.deleteN(s, 0, len, prefixes[i].Length);
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Stem suffix(es) off an Arabic word. </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new length of input buffer after stemming </returns>
+	  public virtual int stemSuffix(char[] s, int len)
+	  {
+		for (int i = 0; i < suffixes.Length; i++)
+		{
+		  if (endsWithCheckLength(s, len, suffixes[i]))
+		  {
+			len = StemmerUtil.deleteN(s, len - suffixes[i].Length, len, suffixes[i].Length);
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Returns true if the prefix matches and can be stemmed </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="prefix"> prefix to check </param>
+	  /// <returns> true if the prefix matches and can be stemmed </returns>
+	  internal virtual bool startsWithCheckLength(char[] s, int len, char[] prefix)
+	  {
+		if (prefix.Length == 1 && len < 4) // wa- prefix requires at least 3 characters
+		{
+		  return false;
+		} // other prefixes require only 2.
+		else if (len < prefix.Length + 2)
+		{
+		  return false;
+		}
+		else
+		{
+		  for (int i = 0; i < prefix.Length; i++)
+		  {
+			if (s[i] != prefix[i])
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns true if the suffix matches and can be stemmed </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <param name="suffix"> suffix to check </param>
+	  /// <returns> true if the suffix matches and can be stemmed </returns>
+	  internal virtual bool endsWithCheckLength(char[] s, int len, char[] suffix)
+	  {
+		if (len < suffix.Length + 2) // all suffixes require at least 2 characters after stemming
+		{
+		  return false;
+		}
+		else
+		{
+		  for (int i = 0; i < suffix.Length; i++)
+		  {
+			if (s[len - suffix.Length + i] != suffix[i])
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
new file mode 100644
index 0000000..eb42363
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianAnalyzer.cs
@@ -0,0 +1,144 @@
+using System;
+
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Bulgarian.
+	/// <para>
+	/// This analyzer implements light-stemming as specified by: <i> Searching
+	/// Strategies for the Bulgarian Language </i>
+	/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
+	/// </para>
+	/// <para>
+	/// </para>
+	/// </summary>
+	public sealed class BulgarianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Bulgarian stopwords.
+	  /// 
+	  /// Default stopword list is from
+	  /// http://members.unine.ch/jacques.savoy/clef/index.html The stopword list is
+	  /// BSD-Licensed.
+	  /// </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set.
+	  /// </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer
+	  /// class accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(BulgarianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words:
+	  /// <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words and a stem exclusion set.
+	  /// If a stem exclusion set is provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> 
+	  /// before <seealso cref="BulgarianStemFilter"/>.
+	  /// </summary>
+	  public BulgarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="BulgarianStemFilter"/>. </returns>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new BulgarianStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
new file mode 100644
index 0000000..d89426c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilter.cs
@@ -0,0 +1,68 @@
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; // for javadoc
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="BulgarianStemmer"/> to stem Bulgarian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class BulgarianStemFilter : TokenFilter
+	{
+	  private readonly BulgarianStemmer stemmer = new BulgarianStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public BulgarianStemFilter(final org.apache.lucene.analysis.TokenStream input)
+	  public BulgarianStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
new file mode 100644
index 0000000..1f09691
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="BulgarianStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.BulgarianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class BulgarianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new BulgarianStemFilterFactory </summary>
+	  public BulgarianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new BulgarianStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
new file mode 100644
index 0000000..dda3b5d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Bg/BulgarianStemmer.cs
@@ -0,0 +1,187 @@
+namespace org.apache.lucene.analysis.bg
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Bulgarian.
+	/// <para>
+	/// Implements the algorithm described in:  
+	/// <i>
+	/// Searching Strategies for the Bulgarian Language
+	/// </i>
+	/// http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
+	/// </para>
+	/// </summary>
+	public class BulgarianStemmer
+	{
+
+	  /// <summary>
+	  /// Stem an input buffer of Bulgarian text.
+	  /// </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> length of input buffer after normalization </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public int stem(final char s[] , int len)
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 4) // do not stem
+		{
+		  return len;
+		}
+
+		if (len > 5 && StemmerUtil.EndsWith(s, len, "ища"))
+		{
+		  return len - 3;
+		}
+
+		len = removeArticle(s, len);
+		len = removePlural(s, len);
+
+		if (len > 3)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "я"))
+		  {
+			len--;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "а") || StemmerUtil.EndsWith(s, len, "о") || StemmerUtil.EndsWith(s, len, "е"))
+		  {
+			len--;
+		  }
+		}
+
+		// the rule to rewrite ен -> н is duplicated in the paper.
+		// in the perl implementation referenced by the paper, this is fixed.
+		// (it is fixed here as well)
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ен"))
+		{
+		  s[len - 2] = 'н'; // replace with н
+		  len--;
+		}
+
+		if (len > 5 && s[len - 2] == 'ъ')
+		{
+		  s[len - 2] = s[len - 1]; // replace ъN with N
+		  len--;
+		}
+
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Mainly remove the definite article </summary>
+	  /// <param name="s"> input buffer </param>
+	  /// <param name="len"> length of input buffer </param>
+	  /// <returns> new stemmed length </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private int removeArticle(final char s[] , final int len)
+	  private int removeArticle(char[] s, int len)
+	  {
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "ият"))
+		{
+		  return len - 3;
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ът") || StemmerUtil.EndsWith(s, len, "то") || StemmerUtil.EndsWith(s, len, "те") || StemmerUtil.EndsWith(s, len, "та") || StemmerUtil.EndsWith(s, len, "ия"))
+		  {
+			return len - 2;
+		  }
+		}
+
+		if (len > 4 && StemmerUtil.EndsWith(s, len, "ят"))
+		{
+		  return len - 2;
+		}
+
+		return len;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private int removePlural(final char s[] , final int len)
+	  private int removePlural(char[] s, int len)
+	  {
+		if (len > 6)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "овци"))
+		  {
+			return len - 3; // replace with о
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "ове"))
+		  {
+			return len - 3;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "еве"))
+		  {
+			s[len - 3] = 'й'; // replace with й
+			return len - 2;
+		  }
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "ища"))
+		  {
+			return len - 3;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "та"))
+		  {
+			return len - 2;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "ци"))
+		  {
+			s[len - 2] = 'к'; // replace with к
+			return len - 1;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "зи"))
+		  {
+			s[len - 2] = 'г'; // replace with г
+			return len - 1;
+		  }
+
+		  if (s[len - 3] == 'е' && s[len - 1] == 'и')
+		  {
+			s[len - 3] = 'я'; // replace е with я, remove и
+			return len - 1;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "си"))
+		  {
+			s[len - 2] = 'х'; // replace with х
+			return len - 1;
+		  }
+		  if (StemmerUtil.EndsWith(s, len, "и"))
+		  {
+			return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
new file mode 100644
index 0000000..5cd8bd8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianAnalyzer.cs
@@ -0,0 +1,143 @@
+using System;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Brazilian Portuguese language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all) and an external list of exclusions (words that will
+	/// not be stemmed, but indexed).
+	/// </para>
+	/// 
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>.</para>
+	/// </summary>
+	public sealed class BrazilianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Brazilian Portuguese stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(BrazilianAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Contains words that should be indexed but not stemmed.
+	  /// </summary>
+	  private CharArraySet excltable = CharArraySet.EMPTY_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words (<seealso cref="#getDefaultStopSet()"/>).
+	  /// </summary>
+	  public BrazilianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords) : base(matchVersion, stopwords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words and stemming exclusion words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public BrazilianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : this(matchVersion, stopwords)
+	  {
+		excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="LowerCaseFilter"/>, <seealso cref="StandardFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , and <seealso cref="BrazilianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new LowerCaseFilter(matchVersion, source);
+		result = new StandardFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (excltable != null && !excltable.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, excltable);
+		}
+		return new TokenStreamComponents(source, new BrazilianStemFilter(result));
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
new file mode 100644
index 0000000..4ea054b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilter.cs
@@ -0,0 +1,90 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="BrazilianStemmer"/>.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para> </summary>
+	/// <seealso cref= SetKeywordMarkerFilter
+	///  </seealso>
+	public sealed class BrazilianStemFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// <seealso cref="BrazilianStemmer"/> in use by this filter.
+	  /// </summary>
+	  private BrazilianStemmer stemmer = new BrazilianStemmer();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private java.util.Set<?> exclusions = null;
+	  private HashSet<?> exclusions = null;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Creates a new BrazilianStemFilter 
+	  /// </summary>
+	  /// <param name="in"> the source <seealso cref="TokenStream"/>  </param>
+	  public BrazilianStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String term = termAtt.toString();
+		  string term = termAtt.ToString();
+		  // Check the exclusion table.
+		  if (!keywordAttr.Keyword && (exclusions == null || !exclusions.Contains(term)))
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String s = stemmer.stem(term);
+			string s = stemmer.stem(term);
+			// If not stemmed, don't waste the time adjusting the token.
+			if ((s != null) && !s.Equals(term))
+			{
+			  termAtt.setEmpty().append(s);
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
new file mode 100644
index 0000000..17a5fce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Br/BrazilianStemFilterFactory.cs
@@ -0,0 +1,56 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.br
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="BrazilianStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.BrazilianStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class BrazilianStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new BrazilianStemFilterFactory </summary>
+	  public BrazilianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override BrazilianStemFilter create(TokenStream @in)
+	  {
+		return new BrazilianStemFilter(@in);
+	  }
+	}
+
+
+}
\ No newline at end of file


Mime
View raw message