lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject [37/50] [abbrv] git commit: Finish up Lucene.Net.Analysis.Core
Date Tue, 24 Sep 2013 18:33:13 GMT
Finish up Lucene.Net.Analysis.Core


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/98e877d5
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/98e877d5
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/98e877d5

Branch: refs/heads/branch_4x
Commit: 98e877d50c803e381fcc92250068f366c1dc6c4c
Parents: d72f5c1
Author: Paul Irwin <paulirwin@gmail.com>
Authored: Wed Aug 7 15:02:40 2013 -0400
Committer: Paul Irwin <paulirwin@gmail.com>
Committed: Wed Aug 7 15:02:40 2013 -0400

----------------------------------------------------------------------
 src/contrib/Analyzers/Contrib.Analyzers.csproj  | 136 ++++++-------------
 src/contrib/Analyzers/Core/LetterTokenizer.cs   |   4 +-
 src/contrib/Analyzers/Core/LowerCaseFilter.cs   |  34 +++++
 .../Analyzers/Core/LowerCaseFilterFactory.cs    |  31 +++++
 .../Analyzers/Core/LowerCaseTokenizer.cs        |  27 ++++
 .../Analyzers/Core/LowerCaseTokenizerFactory.cs |  32 +++++
 src/contrib/Analyzers/Core/SimpleAnalyzer.cs    |  23 ++++
 src/contrib/Analyzers/Core/StopAnalyzer.cs      |  55 ++++++++
 src/contrib/Analyzers/Core/StopFilter.cs        |  53 ++++++++
 src/contrib/Analyzers/Core/StopFilterFactory.cs |  81 +++++++++++
 src/contrib/Analyzers/Core/TypeTokenFilter.cs   |  34 +++++
 .../Analyzers/Core/TypeTokenFilterFactory.cs    |  63 +++++++++
 .../Analyzers/Core/WhitespaceAnalyzer.cs        |  23 ++++
 .../Analyzers/Core/WhitespaceTokenizer.cs       |  28 ++++
 .../Core/WhitespaceTokenizerFactory.cs          |  26 ++++
 src/contrib/Analyzers/Support/AbstractSet.cs    |   2 +-
 .../Analyzers/Util/AbstractAnalysisFactory.cs   |  11 +-
 src/contrib/Analyzers/Util/CharArrayMap.cs      |  73 +++++++++-
 src/contrib/Analyzers/Util/CharArraySet.cs      |  17 ++-
 src/contrib/Analyzers/Util/CharTokenizer.cs     |   4 +-
 src/contrib/Analyzers/Util/CharacterUtils.cs    |   2 +-
 .../Analyzers/Util/FilteringTokenFilter.cs      |  77 +++++++++++
 .../Analyzers/Util/IMultiTermAwareComponent.cs  |  12 ++
 .../Analyzers/Util/IResourceLoaderAware.cs      |  12 ++
 .../Analyzers/Util/StopwordAnalyzerBase.cs      |  10 +-
 .../Analyzers/Util/TokenFilterFactory.cs        |  44 ++++++
 src/contrib/Analyzers/Util/WordlistLoader.cs    |   4 +-
 27 files changed, 792 insertions(+), 126 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Contrib.Analyzers.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Contrib.Analyzers.csproj b/src/contrib/Analyzers/Contrib.Analyzers.csproj
index 8613c88..74b0f63 100644
--- a/src/contrib/Analyzers/Contrib.Analyzers.csproj
+++ b/src/contrib/Analyzers/Contrib.Analyzers.csproj
@@ -103,116 +103,39 @@
     <Reference Condition="'$(Framework)' == 'NET35'" Include="System.Core" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="AR\ArabicAnalyzer.cs" />
-    <Compile Include="AR\ArabicLetterTokenizer.cs" />
-    <Compile Include="AR\ArabicNormalizationFilter.cs" />
-    <Compile Include="AR\ArabicNormalizer.cs" />
-    <Compile Include="AR\ArabicStemFilter.cs" />
-    <Compile Include="AR\ArabicStemmer.cs" />
-    <Compile Include="BR\BrazilianAnalyzer.cs" />
-    <Compile Include="BR\BrazilianStemFilter.cs" />
-    <Compile Include="BR\BrazilianStemmer.cs" />
-    <Compile Include="CJK\CJKAnalyzer.cs" />
-    <Compile Include="CJK\CJKTokenizer.cs" />
-    <Compile Include="Cn\ChineseAnalyzer.cs" />
-    <Compile Include="Cn\ChineseFilter.cs" />
-    <Compile Include="Cn\ChineseTokenizer.cs" />
-    <Compile Include="Compound\CompoundWordTokenFilterBase.cs" />
-    <Compile Include="Compound\DictionaryCompoundWordTokenFilter.cs" />
-    <Compile Include="Compound\HyphenationCompoundWordTokenFilter.cs" />
-    <Compile Include="Compound\Hyphenation\ByteVector.cs" />
-    <Compile Include="Compound\Hyphenation\CharVector.cs" />
-    <Compile Include="Compound\Hyphenation\Hyphen.cs" />
-    <Compile Include="Compound\Hyphenation\Hyphenation.cs" />
-    <Compile Include="Compound\Hyphenation\HyphenationException.cs" />
-    <Compile Include="Compound\Hyphenation\HyphenationTree.cs" />
-    <Compile Include="Compound\Hyphenation\PatternConsumer.cs" />
-    <Compile Include="Compound\Hyphenation\PatternParser.cs" />
-    <Compile Include="Compound\Hyphenation\TernaryTree.cs" />
     <Compile Include="Core\KeywordAnalyzer.cs" />
     <Compile Include="Core\KeywordTokenizer.cs" />
     <Compile Include="Core\KeywordTokenizerFactory.cs" />
     <Compile Include="Core\LetterTokenizer.cs" />
     <Compile Include="Core\LetterTokenizerFactory.cs" />
-    <Compile Include="Cz\CzechAnalyzer.cs" />
-    <Compile Include="De\GermanAnalyzer.cs" />
-    <Compile Include="De\GermanStemFilter.cs" />
-    <Compile Include="De\GermanStemmer.cs" />
-    <Compile Include="De\GermanDIN2Stemmer.cs" />
-    <Compile Include="El\GreekAnalyzer.cs" />
-    <Compile Include="El\GreekLowerCaseFilter.cs" />
-    <Compile Include="Fa\PersianAnalyzer.cs" />
-    <Compile Include="Fa\PersianNormalizationFilter.cs" />
-    <Compile Include="Fa\PersianNormalizer.cs" />
-    <Compile Include="Fr\ElisionFilter.cs" />
-    <Compile Include="Fr\FrenchAnalyzer.cs" />
-    <Compile Include="Fr\FrenchStemFilter.cs" />
-    <Compile Include="Fr\FrenchStemmer.cs" />
-    <Compile Include="Hunspell\HunspellAffix.cs" />
-    <Compile Include="Hunspell\HunspellDictionary.cs" />
-    <Compile Include="Hunspell\HunspellStem.cs" />
-    <Compile Include="Hunspell\HunspellStemFilter.cs" />
-    <Compile Include="Hunspell\HunspellStemmer.cs" />
-    <Compile Include="Hunspell\HunspellWord.cs" />
-    <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
-    <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
-    <Compile Include="Miscellaneous\PatternAnalyzer.cs" />
-    <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
-    <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
-    <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
-    <Compile Include="NGram\EdgeNGramTokenFilter.cs" />
-    <Compile Include="NGram\EdgeNGramTokenizer.cs" />
-    <Compile Include="NGram\NGramTokenFilter.cs" />
-    <Compile Include="NGram\NGramTokenizer.cs" />
-    <Compile Include="Nl\DutchAnalyzer.cs" />
-    <Compile Include="Nl\DutchStemFilter.cs" />
-    <Compile Include="Nl\DutchStemmer.cs" />
-    <Compile Include="Payloads\AbstractEncoder.cs" />
-    <Compile Include="Payloads\DelimitedPayloadTokenFilter.cs" />
-    <Compile Include="Payloads\FloatEncoder.cs" />
-    <Compile Include="Payloads\IdentityEncoder.cs" />
-    <Compile Include="Payloads\IntegerEncoder.cs" />
-    <Compile Include="Payloads\NumericPayloadTokenFilter.cs" />
-    <Compile Include="Payloads\PayloadEncoder.cs" />
-    <Compile Include="Payloads\PayloadHelper.cs" />
-    <Compile Include="Payloads\TokenOffsetPayloadTokenFilter.cs" />
-    <Compile Include="Payloads\TypeAsPayloadTokenFilter.cs" />
-    <Compile Include="Position\PositionFilter.cs" />
-    <Compile Include="Query\QueryAutoStopWordAnalyzer.cs" />
-    <Compile Include="Reverse\ReverseStringFilter.cs" />
-    <Compile Include="Ru\RussianAnalyzer.cs" />
-    <Compile Include="Ru\RussianLetterTokenizer.cs" />
-    <Compile Include="Ru\RussianLowerCaseFilter.cs" />
-    <Compile Include="Ru\RussianStemFilter.cs" />
-    <Compile Include="Ru\RussianStemmer.cs" />
+    <Compile Include="Core\LowerCaseFilter.cs" />
+    <Compile Include="Core\LowerCaseFilterFactory.cs" />
+    <Compile Include="Core\LowerCaseTokenizer.cs" />
+    <Compile Include="Core\LowerCaseTokenizerFactory.cs" />
+    <Compile Include="Core\SimpleAnalyzer.cs" />
+    <Compile Include="Core\StopAnalyzer.cs" />
+    <Compile Include="Core\StopFilter.cs" />
+    <Compile Include="Core\StopFilterFactory.cs" />
+    <Compile Include="Core\TypeTokenFilter.cs" />
+    <Compile Include="Core\TypeTokenFilterFactory.cs" />
+    <Compile Include="Core\WhitespaceAnalyzer.cs" />
+    <Compile Include="Core\WhitespaceTokenizer.cs" />
+    <Compile Include="Core\WhitespaceTokenizerFactory.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
-    <Compile Include="Shingle\Matrix\Column.cs" />
-    <Compile Include="Shingle\Matrix\Matrix.cs" />
-    <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
-    <Compile Include="Shingle\Matrix\Row.cs" />
-    <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
-    <Compile Include="Shingle\ShingleFilter.cs" />
-    <Compile Include="Shingle\ShingleMatrixFilter.cs" />
-    <Compile Include="Shingle\TokenPositioner.cs" />
-    <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
-    <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
-    <Compile Include="Sinks\DateRecognizerSinkFilter.cs" />
-    <Compile Include="Sinks\TokenRangeSinkFilter.cs" />
-    <Compile Include="Sinks\TokenTypeSinkFilter.cs" />
     <Compile Include="Support\AbstractSet.cs" />
     <Compile Include="Support\StringExtensions.cs" />
-    <Compile Include="Th\ThaiAnalyzer.cs" />
-    <Compile Include="Th\ThaiWordFilter.cs" />
     <Compile Include="Util\AbstractAnalysisFactory.cs" />
     <Compile Include="Util\AnalysisSPILoader.cs" />
     <Compile Include="Util\CharacterUtils.cs" />
     <Compile Include="Util\CharArrayMap.cs" />
     <Compile Include="Util\CharArraySet.cs" />
     <Compile Include="Util\CharTokenizer.cs" />
+    <Compile Include="Util\FilteringTokenFilter.cs" />
+    <Compile Include="Util\IMultiTermAwareComponent.cs" />
     <Compile Include="Util\IResourceLoader.cs" />
+    <Compile Include="Util\IResourceLoaderAware.cs" />
     <Compile Include="Util\StopwordAnalyzerBase.cs" />
+    <Compile Include="Util\TokenFilterFactory.cs" />
     <Compile Include="Util\TokenizerFactory.cs" />
     <Compile Include="Util\WordlistLoader.cs" />
     <Compile Include="WordlistLoader.cs" />
@@ -227,9 +150,32 @@
     </ProjectReference>
   </ItemGroup>
   <ItemGroup>
-    <None Include="Compound\Hyphenation\hyphenation.dtd" />
     <None Include="Lucene.Net.snk" />
   </ItemGroup>
+  <ItemGroup>
+    <Folder Include="BR\" />
+    <Folder Include="CJK\" />
+    <Folder Include="Cn\" />
+    <Folder Include="Compound\Hyphenation\" />
+    <Folder Include="Cz\" />
+    <Folder Include="De\" />
+    <Folder Include="El\" />
+    <Folder Include="Fa\" />
+    <Folder Include="Fr\" />
+    <Folder Include="Hunspell\" />
+    <Folder Include="Miscellaneous\" />
+    <Folder Include="NGram\" />
+    <Folder Include="Nl\" />
+    <Folder Include="Payloads\" />
+    <Folder Include="Position\" />
+    <Folder Include="Query\" />
+    <Folder Include="Reverse\" />
+    <Folder Include="Ru\" />
+    <Folder Include="Shingle\Codec\" />
+    <Folder Include="Shingle\Matrix\" />
+    <Folder Include="Sinks\" />
+    <Folder Include="Th\" />
+  </ItemGroup>
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/LetterTokenizer.cs b/src/contrib/Analyzers/Core/LetterTokenizer.cs
index 669d8dc..a4e4938 100644
--- a/src/contrib/Analyzers/Core/LetterTokenizer.cs
+++ b/src/contrib/Analyzers/Core/LetterTokenizer.cs
@@ -10,12 +10,12 @@ namespace Lucene.Net.Analysis.Core
 {
     public class LetterTokenizer : CharTokenizer
     {
-        public LetterTokenizer(Version matchVersion, TextReader input)
+        public LetterTokenizer(Version? matchVersion, TextReader input)
             : base(matchVersion, input)
         {
         }
 
-        public LetterTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
+        public LetterTokenizer(Version? matchVersion, AttributeFactory factory, TextReader input)
             : base(matchVersion, factory, input)
         {
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/LowerCaseFilter.cs b/src/contrib/Analyzers/Core/LowerCaseFilter.cs
new file mode 100644
index 0000000..d0157f5
--- /dev/null
+++ b/src/contrib/Analyzers/Core/LowerCaseFilter.cs
@@ -0,0 +1,34 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class LowerCaseFilter : TokenFilter
+    {
+        private readonly CharacterUtils charUtils;
+        private readonly ICharTermAttribute termAtt; // = addAttribute(CharTermAttribute.class);
+
+        public LowerCaseFilter(Version? matchVersion, TokenStream input)
+            : base(input)
+        {
+            charUtils = CharacterUtils.GetInstance(matchVersion);
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                charUtils.ToLowerCase(termAtt.Buffer, 0, termAtt.Length);
+                return true;
+            }
+            else
+                return false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/LowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/LowerCaseFilterFactory.cs b/src/contrib/Analyzers/Core/LowerCaseFilterFactory.cs
new file mode 100644
index 0000000..6ea42e9
--- /dev/null
+++ b/src/contrib/Analyzers/Core/LowerCaseFilterFactory.cs
@@ -0,0 +1,31 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public class LowerCaseFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
+    {
+        public LowerCaseFilterFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+            AssureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+        
+        public override TokenStream Create(TokenStream input)
+        {
+            return new LowerCaseFilter(luceneMatchVersion, input);
+        }
+
+        public AbstractAnalysisFactory MultiTermComponent
+        {
+            get { return this; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/LowerCaseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/LowerCaseTokenizer.cs b/src/contrib/Analyzers/Core/LowerCaseTokenizer.cs
new file mode 100644
index 0000000..34d4a23
--- /dev/null
+++ b/src/contrib/Analyzers/Core/LowerCaseTokenizer.cs
@@ -0,0 +1,27 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class LowerCaseTokenizer : LetterTokenizer
+    {
+        public LowerCaseTokenizer(Version? matchVersion, TextReader input)
+            : base(matchVersion, input)
+        {
+        }
+
+        public LowerCaseTokenizer(Version? matchVersion, AttributeFactory factory, TextReader input)
+            : base(matchVersion, factory, input)
+        {
+        }
+
+        protected override int Normalize(int c)
+        {
+            return (int)char.ToLower((char)c);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/LowerCaseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/LowerCaseTokenizerFactory.cs b/src/contrib/Analyzers/Core/LowerCaseTokenizerFactory.cs
new file mode 100644
index 0000000..316f775
--- /dev/null
+++ b/src/contrib/Analyzers/Core/LowerCaseTokenizerFactory.cs
@@ -0,0 +1,32 @@
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public class LowerCaseTokenizerFactory : TokenizerFactory, IMultiTermAwareComponent
+    {
+        public LowerCaseTokenizerFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+            AssureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(Net.Util.AttributeSource.AttributeFactory factory, System.IO.TextReader input)
+        {
+            return new LowerCaseTokenizer(luceneMatchVersion, factory, input);
+        }
+        
+        public AbstractAnalysisFactory MultiTermComponent
+        {
+            get { return new LowerCaseFilterFactory(new HashMap<String, String>(OriginalArgs)); }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/SimpleAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/SimpleAnalyzer.cs b/src/contrib/Analyzers/Core/SimpleAnalyzer.cs
new file mode 100644
index 0000000..2b2b97d
--- /dev/null
+++ b/src/contrib/Analyzers/Core/SimpleAnalyzer.cs
@@ -0,0 +1,23 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class SimpleAnalyzer : Analyzer
+    {
+        private readonly Version? matchVersion;
+
+        public SimpleAnalyzer(Version? matchVersion)
+        {
+            this.matchVersion = matchVersion;
+        }
+        
+        public override Analyzer.TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
+        {
+            return new TokenStreamComponents(new LowerCaseTokenizer(matchVersion, reader));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/StopAnalyzer.cs b/src/contrib/Analyzers/Core/StopAnalyzer.cs
new file mode 100644
index 0000000..ed41f02
--- /dev/null
+++ b/src/contrib/Analyzers/Core/StopAnalyzer.cs
@@ -0,0 +1,55 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class StopAnalyzer : StopwordAnalyzerBase
+    {
+        public static readonly CharArraySet ENGLISH_STOP_WORDS_SET;
+
+        static StopAnalyzer()
+        {
+            string[] stopWords = new string[] {
+              "a", "an", "and", "are", "as", "at", "be", "but", "by",
+              "for", "if", "in", "into", "is", "it",
+              "no", "not", "of", "on", "or", "such",
+              "that", "the", "their", "then", "there", "these",
+              "they", "this", "to", "was", "will", "with"
+            };
+            CharArraySet stopSet = new CharArraySet(Version.LUCENE_CURRENT, stopWords, false);
+            ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
+        }
+
+        public StopAnalyzer(Version? matchVersion)
+            : this(matchVersion, ENGLISH_STOP_WORDS_SET)
+        {
+        }
+
+        public StopAnalyzer(Version? matchVersion, CharArraySet stopWords)
+            : base(matchVersion, stopWords)
+        {
+        }
+
+        public StopAnalyzer(Version? matchVersion, Stream stopwordsFile)
+            : this(matchVersion, LoadStopwordSet(stopwordsFile, matchVersion))
+        {
+        }
+
+        public StopAnalyzer(Version? matchVersion, TextReader stopwords)
+            : this(matchVersion, LoadStopwordSet(stopwords, matchVersion))
+        {
+        }
+
+        public override Analyzer.TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+        {
+            Tokenizer source = new LowerCaseTokenizer(matchVersion, reader);
+            return new TokenStreamComponents(source, new StopFilter(matchVersion,
+                  source, stopwords));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/StopFilter.cs b/src/contrib/Analyzers/Core/StopFilter.cs
new file mode 100644
index 0000000..c9a193b
--- /dev/null
+++ b/src/contrib/Analyzers/Core/StopFilter.cs
@@ -0,0 +1,53 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class StopFilter : FilteringTokenFilter
+    {
+        private readonly CharArraySet stopWords;
+        private readonly ICharTermAttribute termAtt; // = addAttribute(CharTermAttribute.class);
+
+        public StopFilter(Version? matchVersion, TokenStream input, CharArraySet stopWords)
+            : base(true, input)
+        {
+            this.stopWords = stopWords;
+            termAtt = AddAttribute<ICharTermAttribute>();
+        }
+
+        public static CharArraySet MakeStopSet(Version? matchVersion, params String[] stopWords)
+        {
+            return MakeStopSet(matchVersion, stopWords, false);
+        }
+
+        public static CharArraySet MakeStopSet(Version? matchVersion, List<object> stopWords)
+        {
+            return MakeStopSet(matchVersion, stopWords, false);
+        }
+
+        public static CharArraySet MakeStopSet(Version? matchVersion, String[] stopWords, bool ignoreCase)
+        {
+            CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Length, ignoreCase);
+            stopSet.AddAll(stopWords);
+            return stopSet;
+        }
+
+        public static CharArraySet MakeStopSet(Version? matchVersion, List<object> stopWords, bool ignoreCase)
+        {
+            CharArraySet stopSet = new CharArraySet(matchVersion, stopWords.Count, ignoreCase);
+            stopSet.AddAll(stopWords);
+            return stopSet;
+        }
+
+        protected override bool Accept()
+        {
+            return !stopWords.Contains(termAtt.Buffer, 0, termAtt.Length);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/StopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/StopFilterFactory.cs b/src/contrib/Analyzers/Core/StopFilterFactory.cs
new file mode 100644
index 0000000..907c383
--- /dev/null
+++ b/src/contrib/Analyzers/Core/StopFilterFactory.cs
@@ -0,0 +1,81 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public class StopFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private CharArraySet stopWords;
+        private readonly String stopWordFiles;
+        private readonly String format;
+        private readonly bool ignoreCase;
+        private readonly bool enablePositionIncrements;
+
+        public StopFilterFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+            AssureMatchVersion();
+            stopWordFiles = Get(args, "words");
+            format = Get(args, "format");
+            ignoreCase = GetBoolean(args, "ignoreCase", false);
+            enablePositionIncrements = GetBoolean(args, "enablePositionIncrements", false);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public void Inform(IResourceLoader loader)
+        {
+            if (stopWordFiles != null)
+            {
+                if ("snowball".EqualsIgnoreCase(format))
+                {
+                    stopWords = GetSnowballWordSet(loader, stopWordFiles, ignoreCase);
+                }
+                else
+                {
+                    stopWords = GetWordSet(loader, stopWordFiles, ignoreCase);
+                }
+            }
+            else
+            {
+                stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+            }
+        }
+
+        public bool IsEnablePositionIncrements
+        {
+            get
+            {
+                return enablePositionIncrements;
+            }
+        }
+
+        public bool IsIgnoreCase
+        {
+            get
+            {
+                return ignoreCase;
+            }
+        }
+
+        public CharArraySet StopWords
+        {
+            get
+            {
+                return stopWords;
+            }
+        }
+        
+        public override TokenStream Create(TokenStream input)
+        {
+            StopFilter stopFilter = new StopFilter(luceneMatchVersion, input, stopWords);
+            stopFilter.EnablePositionIncrements = enablePositionIncrements;
+            return stopFilter;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/TypeTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/TypeTokenFilter.cs b/src/contrib/Analyzers/Core/TypeTokenFilter.cs
new file mode 100644
index 0000000..f6ea7dd
--- /dev/null
+++ b/src/contrib/Analyzers/Core/TypeTokenFilter.cs
@@ -0,0 +1,34 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class TypeTokenFilter : FilteringTokenFilter
+    {
+        private readonly ISet<String> stopTypes;
+        private readonly ITypeAttribute typeAttribute; // = addAttribute(TypeAttribute.class);
+        private readonly bool useWhiteList;
+
+        public TypeTokenFilter(bool enablePositionIncrements, TokenStream input, ISet<String> stopTypes, bool useWhiteList)
+            : base(enablePositionIncrements, input)
+        {
+            this.stopTypes = stopTypes;
+            this.useWhiteList = useWhiteList;
+            typeAttribute = AddAttribute<ITypeAttribute>();
+        }
+
+        public TypeTokenFilter(bool enablePositionIncrements, TokenStream input, ISet<String> stopTypes)
+            : this(enablePositionIncrements, input, stopTypes, false)
+        {
+        }
+
+        protected override bool Accept()
+        {
+            return useWhiteList == stopTypes.Contains(typeAttribute.Type);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/TypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/TypeTokenFilterFactory.cs b/src/contrib/Analyzers/Core/TypeTokenFilterFactory.cs
new file mode 100644
index 0000000..1552375
--- /dev/null
+++ b/src/contrib/Analyzers/Core/TypeTokenFilterFactory.cs
@@ -0,0 +1,63 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public class TypeTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private readonly bool useWhitelist;
+        private readonly bool enablePositionIncrements;
+        private readonly String stopTypesFiles;
+        private ISet<String> stopTypes;
+
+        public TypeTokenFilterFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+            stopTypesFiles = Require(args, "types");
+            enablePositionIncrements = GetBoolean(args, "enablePositionIncrements", false);
+            useWhitelist = GetBoolean(args, "useWhitelist", false);
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public void Inform(IResourceLoader loader)
+        {
+            IList<String> files = SplitFileNames(stopTypesFiles);
+            if (files.Count > 0)
+            {
+                stopTypes = new HashSet<String>();
+                foreach (String file in files)
+                {
+                    IList<String> typesLines = GetLines(loader, file.Trim());
+                    stopTypes.UnionWith(typesLines);
+                }
+            }
+        }
+
+        public bool IsEnablePositionIncrements
+        {
+            get
+            {
+                return enablePositionIncrements;
+            }
+        }
+
+        public ISet<String> StopTypes
+        {
+            get
+            {
+                return stopTypes;
+            }
+        }
+
+        public override TokenStream Create(TokenStream input)
+        {
+            return new TypeTokenFilter(enablePositionIncrements, input, stopTypes, useWhitelist);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/WhitespaceAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/WhitespaceAnalyzer.cs b/src/contrib/Analyzers/Core/WhitespaceAnalyzer.cs
new file mode 100644
index 0000000..180329e
--- /dev/null
+++ b/src/contrib/Analyzers/Core/WhitespaceAnalyzer.cs
@@ -0,0 +1,23 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class WhitespaceAnalyzer : Analyzer
+    {
+        private readonly Version? matchVersion;
+
+        public WhitespaceAnalyzer(Version? matchVersion)
+        {
+            this.matchVersion = matchVersion;
+        }
+        
+        public override Analyzer.TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
+        {
+            return new TokenStreamComponents(new WhitespaceTokenizer(matchVersion, reader));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/WhitespaceTokenizer.cs b/src/contrib/Analyzers/Core/WhitespaceTokenizer.cs
new file mode 100644
index 0000000..87909a2
--- /dev/null
+++ b/src/contrib/Analyzers/Core/WhitespaceTokenizer.cs
@@ -0,0 +1,28 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public sealed class WhitespaceTokenizer : CharTokenizer
+    {
+        public WhitespaceTokenizer(Version? matchVersion, TextReader input)
+            : base(matchVersion, input)
+        {
+        }
+
+        public WhitespaceTokenizer(Version? matchVersion, AttributeFactory factory, TextReader input)
+            : base(matchVersion, factory, input)
+        {
+        }
+
+        protected override bool IsTokenChar(int c)
+        {
+            return !char.IsWhiteSpace((char)c);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Core/WhitespaceTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Core/WhitespaceTokenizerFactory.cs b/src/contrib/Analyzers/Core/WhitespaceTokenizerFactory.cs
new file mode 100644
index 0000000..378d30f
--- /dev/null
+++ b/src/contrib/Analyzers/Core/WhitespaceTokenizerFactory.cs
@@ -0,0 +1,26 @@
+using Lucene.Net.Analysis.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Core
+{
+    public class WhitespaceTokenizerFactory : TokenizerFactory
+    {
+        public WhitespaceTokenizerFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+            AssureMatchVersion();
+            if (args.Count > 0)
+            {
+                throw new ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public override Tokenizer Create(Net.Util.AttributeSource.AttributeFactory factory, System.IO.TextReader input)
+        {
+            return new WhitespaceTokenizer(luceneMatchVersion, factory, input);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Support/AbstractSet.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Support/AbstractSet.cs b/src/contrib/Analyzers/Support/AbstractSet.cs
index f732d08..a9249d5 100644
--- a/src/contrib/Analyzers/Support/AbstractSet.cs
+++ b/src/contrib/Analyzers/Support/AbstractSet.cs
@@ -5,7 +5,7 @@ using System.Text;
 
 namespace Lucene.Net.Analysis.Support
 {
-    public class AbstractSet<T> : ISet<T>
+    public abstract class AbstractSet<T> : ISet<T>
     {
         public virtual bool Add(T item)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/AbstractAnalysisFactory.cs b/src/contrib/Analyzers/Util/AbstractAnalysisFactory.cs
index ab0b117..b815eb6 100644
--- a/src/contrib/Analyzers/Util/AbstractAnalysisFactory.cs
+++ b/src/contrib/Analyzers/Util/AbstractAnalysisFactory.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Support;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
@@ -15,7 +16,7 @@ namespace Lucene.Net.Analysis.Util
 
         private readonly IDictionary<string, string> originalArgs;
 
-        protected readonly Lucene.Net.Util.Version luceneMatchVersion;
+        protected readonly Lucene.Net.Util.Version? luceneMatchVersion;
 
         private bool isExplicitLuceneMatchVersion = false;
 
@@ -23,7 +24,7 @@ namespace Lucene.Net.Analysis.Util
         {
             originalArgs = new HashMap<String, String>(args);
             String version = Get(args, LUCENE_MATCH_VERSION_PARAM);
-            luceneMatchVersion = version == null ? (Lucene.Net.Util.Version)null : version.ParseLeniently();
+            luceneMatchVersion = version == null ? (Lucene.Net.Util.Version?)null : version.ParseLeniently();
             args.Remove(CLASS_NAME);  // consume the class arg
         }
 
@@ -44,7 +45,7 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        public Lucene.Net.Util.Version LuceneMatchVersion
+        public Lucene.Net.Util.Version? LuceneMatchVersion
         {
             get
             {
@@ -274,7 +275,7 @@ namespace Lucene.Net.Analysis.Util
                 foreach (String file in files)
                 {
                     IList<String> wlist = GetLines(loader, file.Trim());
-                    words.UnionWith(StopFilter.MakeStopSet(luceneMatchVersion, wlist,
+                    words.UnionWith(StopFilter.MakeStopSet(luceneMatchVersion, wlist.Cast<object>().ToList(),
                         ignoreCase));
                 }
             }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/CharArrayMap.cs b/src/contrib/Analyzers/Util/CharArrayMap.cs
index e124451..fb7ee13 100644
--- a/src/contrib/Analyzers/Util/CharArrayMap.cs
+++ b/src/contrib/Analyzers/Util/CharArrayMap.cs
@@ -16,11 +16,11 @@ namespace Lucene.Net.Analysis.Util
         private readonly CharacterUtils charUtils;
         private bool ignoreCase;
         private int count;
-        internal readonly Lucene.Net.Util.Version matchVersion; // package private because used in CharArraySet
+        internal readonly Lucene.Net.Util.Version? matchVersion; // package private because used in CharArraySet
         internal char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
         internal V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
 
-        public CharArrayMap(Lucene.Net.Util.Version matchVersion, int startSize, bool ignoreCase)
+        public CharArrayMap(Lucene.Net.Util.Version? matchVersion, int startSize, bool ignoreCase)
         {
             this.ignoreCase = ignoreCase;
             int size = INIT_SIZE;
@@ -28,11 +28,11 @@ namespace Lucene.Net.Analysis.Util
                 size <<= 1;
             keys = new char[size][];
             values = new V[size];
-            this.charUtils = CharacterUtils.GetInstance(matchVersion);
+            this.charUtils = CharacterUtils.GetInstance(matchVersion.GetValueOrDefault());
             this.matchVersion = matchVersion;
         }
 
-        public CharArrayMap(Lucene.Net.Util.Version matchVersion, IDictionary<object, V> c, bool ignoreCase)
+        public CharArrayMap(Lucene.Net.Util.Version? matchVersion, IDictionary<object, V> c, bool ignoreCase)
             : this(matchVersion, c.Count, ignoreCase)
         {
             foreach (var kvp in c)
@@ -367,7 +367,7 @@ namespace Lucene.Net.Analysis.Util
                 if (keySet == null)
                 {
                     // prevent adding of entries
-                    keySet = new AnonymousCharArraySet(this);
+                    keySet = new AnonymousCharArraySet(new CharArrayMap<object>(matchVersion, this.ToDictionary(i => (object)i.Key, i => (object)i.Value), ignoreCase));
                 }
 
                 return keySet;
@@ -376,7 +376,7 @@ namespace Lucene.Net.Analysis.Util
 
         private sealed class AnonymousCharArraySet : CharArraySet
         {
-            public AnonymousCharArraySet(CharArrayMap<V> map)
+            public AnonymousCharArraySet(CharArrayMap<object> map)
                 : base(map)
             {
             }
@@ -581,6 +581,65 @@ namespace Lucene.Net.Analysis.Util
                 parent.Clear();
             }
         }
+
+        public void Add(object key, V value)
+        {
+            Put(key, value);
+        }
+
+        bool IDictionary<object, V>.Remove(object key)
+        {
+            Remove(key);
+            return true;
+        }
+
+        public bool TryGetValue(object key, out V value)
+        {
+            value = Get(key);
+
+            return value != null;
+        }
+
+        public ICollection<V> Values
+        {
+            get { return values; }
+        }
+
+        public void Add(KeyValuePair<object, V> item)
+        {
+            Put(item.Key, item.Value);
+        }
+
+        public bool Contains(KeyValuePair<object, V> item)
+        {
+            return ContainsKey(item.Key);
+        }
+
+        public void CopyTo(KeyValuePair<object, V>[] array, int arrayIndex)
+        {
+            throw new NotImplementedException();
+        }
+
+        public bool IsReadOnly
+        {
+            get { return false; }
+        }
+
+        public bool Remove(KeyValuePair<object, V> item)
+        {
+            Remove(item.Key);
+            return true;
+        }
+
+        public IEnumerator<KeyValuePair<object, V>> GetEnumerator()
+        {
+            return GetEntrySet().GetEnumerator();
+        }
+
+        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
+        {
+            return GetEnumerator();
+        }
     }
 
     // .NET Port: non-generic static clas to hold nested types and static methods
@@ -597,7 +656,7 @@ namespace Lucene.Net.Analysis.Util
             return new UnmodifiableCharArrayMap<V>(map);
         }
 
-        public static CharArrayMap<V> Copy<V>(Lucene.Net.Util.Version matchVersion, IDictionary<object, V> map)
+        public static CharArrayMap<V> Copy<V>(Lucene.Net.Util.Version? matchVersion, IDictionary<object, V> map)
         {
             if (map == CharArrayMap<V>.EMPTY_MAP)
                 return EmptyMap<V>();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/CharArraySet.cs b/src/contrib/Analyzers/Util/CharArraySet.cs
index 522bcaa..23eb0ea 100644
--- a/src/contrib/Analyzers/Util/CharArraySet.cs
+++ b/src/contrib/Analyzers/Util/CharArraySet.cs
@@ -14,12 +14,12 @@ namespace Lucene.Net.Analysis.Util
 
         private readonly CharArrayMap<object> map;
 
-        public CharArraySet(Lucene.Net.Util.Version matchVersion, int startSize, bool ignoreCase)
+        public CharArraySet(Lucene.Net.Util.Version? matchVersion, int startSize, bool ignoreCase)
             : this(new CharArrayMap<Object>(matchVersion, startSize, ignoreCase))
         {
         }
 
-        public CharArraySet(Lucene.Net.Util.Version matchVersion, ICollection<object> c, bool ignoreCase)
+        public CharArraySet(Lucene.Net.Util.Version? matchVersion, ICollection<object> c, bool ignoreCase)
             : this(matchVersion, c.Count, ignoreCase)
         {
             AddAll(c);
@@ -55,17 +55,17 @@ namespace Lucene.Net.Analysis.Util
             return map.Put(o, PLACEHOLDER) == null;
         }
 
-        public bool Add(ICharSequence text)
+        public virtual bool Add(ICharSequence text)
         {
             return map.Put(text, PLACEHOLDER) == null;
         }
 
-        public bool Add(string text)
+        public virtual bool Add(string text)
         {
             return map.Put(text, PLACEHOLDER) == null;
         }
 
-        public bool Add(char[] text)
+        public virtual bool Add(char[] text)
         {
             return map.Put(text, PLACEHOLDER) == null;
         }
@@ -86,7 +86,7 @@ namespace Lucene.Net.Analysis.Util
             return new CharArraySet(CharArrayMap.UnmodifiableMap(set.map));
         }
 
-        public static CharArraySet Copy(Lucene.Net.Util.Version matchVersion, ICollection<object> set)
+        public static CharArraySet Copy(Lucene.Net.Util.Version? matchVersion, ICollection<object> set)
         {
             if (set == EMPTY_SET)
                 return EMPTY_SET;
@@ -121,5 +121,10 @@ namespace Lucene.Net.Analysis.Util
             }
             return sb.Append(']').ToString();
         }
+
+        public override bool Remove(object item)
+        {
+            throw new NotImplementedException();
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/CharTokenizer.cs b/src/contrib/Analyzers/Util/CharTokenizer.cs
index b0029fa..0a31781 100644
--- a/src/contrib/Analyzers/Util/CharTokenizer.cs
+++ b/src/contrib/Analyzers/Util/CharTokenizer.cs
@@ -11,7 +11,7 @@ namespace Lucene.Net.Analysis.Util
 {
     public abstract class CharTokenizer : Tokenizer
     {
-        public CharTokenizer(Version matchVersion, TextReader input)
+        public CharTokenizer(Version? matchVersion, TextReader input)
             : base(input)
         {
             charUtils = CharacterUtils.GetInstance(matchVersion);
@@ -19,7 +19,7 @@ namespace Lucene.Net.Analysis.Util
             offsetAtt = AddAttribute<IOffsetAttribute>();
         }
 
-        public CharTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
+        public CharTokenizer(Version? matchVersion, AttributeFactory factory, TextReader input)
             : base(factory, input)
         {
             charUtils = CharacterUtils.GetInstance(matchVersion);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/CharacterUtils.cs b/src/contrib/Analyzers/Util/CharacterUtils.cs
index 223d8f0..5fdc78f 100644
--- a/src/contrib/Analyzers/Util/CharacterUtils.cs
+++ b/src/contrib/Analyzers/Util/CharacterUtils.cs
@@ -16,7 +16,7 @@ namespace Lucene.Net.Analysis.Util
         // .NET Port: we never changed how we handle strings and chars :-)
         private static readonly DotNetCharacterUtils DOTNET = new DotNetCharacterUtils();
 
-        public static CharacterUtils GetInstance(Lucene.Net.Util.Version matchVersion)
+        public static CharacterUtils GetInstance(Lucene.Net.Util.Version? matchVersion)
         {
             //return matchVersion.OnOrAfter(Lucene.Net.Util.Version.LUCENE_31) ? JAVA_5 : JAVA_4;
             return DOTNET;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/FilteringTokenFilter.cs b/src/contrib/Analyzers/Util/FilteringTokenFilter.cs
new file mode 100644
index 0000000..d06af92
--- /dev/null
+++ b/src/contrib/Analyzers/Util/FilteringTokenFilter.cs
@@ -0,0 +1,77 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Util
+{
+    public abstract class FilteringTokenFilter : TokenFilter
+    {
+        private readonly IPositionIncrementAttribute posIncrAtt; // = addAttribute(PositionIncrementAttribute.class);
+        private bool enablePositionIncrements; // no init needed, as ctor enforces setting value!
+        private bool first = true; // only used when not preserving gaps
+
+        public FilteringTokenFilter(bool enablePositionIncrements, TokenStream input)
+            : base(input)
+        {
+            this.enablePositionIncrements = enablePositionIncrements;
+            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        protected abstract bool Accept();
+
+        public override bool IncrementToken()
+        {
+            if (enablePositionIncrements)
+            {
+                int skippedPositions = 0;
+                while (input.IncrementToken())
+                {
+                    if (Accept())
+                    {
+                        if (skippedPositions != 0)
+                        {
+                            posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+                        }
+                        return true;
+                    }
+                    skippedPositions += posIncrAtt.PositionIncrement;
+                }
+            }
+            else
+            {
+                while (input.IncrementToken())
+                {
+                    if (Accept())
+                    {
+                        if (first)
+                        {
+                            // first token having posinc=0 is illegal.
+                            if (posIncrAtt.PositionIncrement == 0)
+                            {
+                                posIncrAtt.PositionIncrement = 1;
+                            }
+                            first = false;
+                        }
+                        return true;
+                    }
+                }
+            }
+            // reached EOS -- return false
+            return false;
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            first = true;
+        }
+
+        public bool EnablePositionIncrements
+        {
+            get { return enablePositionIncrements; }
+            set { enablePositionIncrements = value; }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/IMultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/IMultiTermAwareComponent.cs b/src/contrib/Analyzers/Util/IMultiTermAwareComponent.cs
new file mode 100644
index 0000000..3e256c3
--- /dev/null
+++ b/src/contrib/Analyzers/Util/IMultiTermAwareComponent.cs
@@ -0,0 +1,12 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Util
+{
+    public interface IMultiTermAwareComponent
+    {
+        AbstractAnalysisFactory MultiTermComponent { get; }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/IResourceLoaderAware.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/IResourceLoaderAware.cs b/src/contrib/Analyzers/Util/IResourceLoaderAware.cs
new file mode 100644
index 0000000..8ff35bf
--- /dev/null
+++ b/src/contrib/Analyzers/Util/IResourceLoaderAware.cs
@@ -0,0 +1,12 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Util
+{
+    public interface IResourceLoaderAware
+    {
+        void Inform(IResourceLoader loader);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/StopwordAnalyzerBase.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/StopwordAnalyzerBase.cs b/src/contrib/Analyzers/Util/StopwordAnalyzerBase.cs
index f6e9194..a041e65 100644
--- a/src/contrib/Analyzers/Util/StopwordAnalyzerBase.cs
+++ b/src/contrib/Analyzers/Util/StopwordAnalyzerBase.cs
@@ -12,7 +12,7 @@ namespace Lucene.Net.Analysis.Util
     {
         protected readonly CharArraySet stopwords;
 
-        protected readonly Version matchVersion;
+        protected readonly Version? matchVersion;
 
         public CharArraySet StopwordSet
         {
@@ -22,7 +22,7 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        protected StopwordAnalyzerBase(Version version, CharArraySet stopwords)
+        protected StopwordAnalyzerBase(Version? version, CharArraySet stopwords)
         {
             matchVersion = version;
             // analyzers should use char array set for stopwords!
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.Util
                 .UnmodifiableSet(CharArraySet.Copy(version, stopwords));
         }
 
-        protected StopwordAnalyzerBase(Version version)
+        protected StopwordAnalyzerBase(Version? version)
             : this(version, null)
         {
         }
@@ -49,7 +49,7 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        protected static CharArraySet LoadStopwordSet(Stream stopwords, Version matchVersion)
+        protected static CharArraySet LoadStopwordSet(Stream stopwords, Version? matchVersion)
         {
             TextReader reader = null;
             try
@@ -63,7 +63,7 @@ namespace Lucene.Net.Analysis.Util
             }
         }
 
-        protected static CharArraySet LoadStopwordSet(TextReader stopwords, Version matchVersion)
+        protected static CharArraySet LoadStopwordSet(TextReader stopwords, Version? matchVersion)
         {
             try
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/TokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/TokenFilterFactory.cs b/src/contrib/Analyzers/Util/TokenFilterFactory.cs
new file mode 100644
index 0000000..fcb674e
--- /dev/null
+++ b/src/contrib/Analyzers/Util/TokenFilterFactory.cs
@@ -0,0 +1,44 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Util
+{
+    public abstract class TokenFilterFactory : AbstractAnalysisFactory
+    {
+        private static readonly AnalysisSPILoader<TokenFilterFactory> loader =
+            new AnalysisSPILoader<TokenFilterFactory>(typeof(TokenFilterFactory),
+                new String[] { "TokenFilterFactory", "FilterFactory" });
+
+        public static TokenFilterFactory ForName(String name, IDictionary<String, String> args)
+        {
+            return loader.NewInstance(name, args);
+        }
+
+        public static Type LookupClass(String name)
+        {
+            return loader.LookupClass(name);
+        }
+
+        public static ICollection<String> AvailableTokenFilters
+        {
+            get
+            {
+                return loader.AvailableServices;
+            }
+        }
+
+        public static void ReloadTokenFilters()
+        {
+            loader.Reload();
+        }
+
+        protected TokenFilterFactory(IDictionary<String, String> args)
+            : base(args)
+        {
+        }
+
+        public abstract TokenStream Create(TokenStream input);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/98e877d5/src/contrib/Analyzers/Util/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Util/WordlistLoader.cs b/src/contrib/Analyzers/Util/WordlistLoader.cs
index e78ea9b..c2430d0 100644
--- a/src/contrib/Analyzers/Util/WordlistLoader.cs
+++ b/src/contrib/Analyzers/Util/WordlistLoader.cs
@@ -31,12 +31,12 @@ namespace Lucene.Net.Analysis.Util
             return result;
         }
 
-        public static CharArraySet GetWordSet(TextReader reader, Lucene.Net.Util.Version matchVersion)
+        public static CharArraySet GetWordSet(TextReader reader, Lucene.Net.Util.Version? matchVersion)
         {
             return GetWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
         }
 
-        public static CharArraySet GetWordSet(TextReader reader, String comment, Lucene.Net.Util.Version matchVersion)
+        public static CharArraySet GetWordSet(TextReader reader, String comment, Lucene.Net.Util.Version? matchVersion)
         {
             return GetWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
         }


Mime
View raw message