lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [37/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed NGram in Git
Date Sat, 04 Feb 2017 20:32:56 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
new file mode 100644
index 0000000..4ccecfa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/EdgeNGramTokenizerTest.cs
@@ -0,0 +1,278 @@
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+using Reader = System.IO.TextReader;
+using Version = Lucene.Net.Util.LuceneVersion;
+
+namespace Lucene.Net.Analysis.NGram
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Tests <seealso cref="EdgeNGramTokenizer"/> for correctness.
+    /// </summary>
+    public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
+    {
+        private StringReader input;
+
+        public override void SetUp()
+        {
+            base.SetUp();
+            input = new StringReader("abcde");
+        }
+
+        [Test]
+        public virtual void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 0, 0);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestInvalidInput3()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, -1, 2);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestFrontUnigram()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+            AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestBackUnigram()
+        {
+#pragma warning disable 612, 618
+            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 1);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestOversizedNgrams()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6);
+            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestFrontRangeOfNgrams()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestBackRangeOfNgrams()
+        {
+#pragma warning disable 612, 618
+            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 3);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, 5, false); // abcde
+        }
+
+        [Test]
+        public virtual void TestReset()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+            tokenizer.SetReader(new StringReader("abcde"));
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            for (int i = 0; i < 10; i++)
+            {
+                int min = TestUtil.NextInt(Random(), 2, 10);
+                int max = TestUtil.NextInt(Random(), min, 20);
+
+                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
+                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20);
+                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192);
+            }
+
+            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
+            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
+            CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly EdgeNGramTokenizerTest outerInstance;
+
+            private int min;
+            private int max;
+
+            public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenizerTest outerInstance, int min, int max)
+            {
+                this.outerInstance = outerInstance;
+                this.min = min;
+                this.max = max;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+            {
+                Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+        {
+            private readonly EdgeNGramTokenizerTest outerInstance;
+
+            public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenizerTest outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
+            {
+#pragma warning disable 612, 618
+                Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, reader, Lucene43EdgeNGramTokenizer.Side.BACK, 2, 4);
+#pragma warning restore 612, 618
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            }
+        }
+
+        [Test]
+        public virtual void TestTokenizerPositions()
+        {
+#pragma warning disable 612, 618
+            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 0, 0 }, null, null, false);
+
+            tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 1, 1 }, null, null, false);
+        }
+
+        private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
+        {
+            // LUCENENET TODO: Changed randomizing strategy - not sure if this is right...
+            //string s = RandomStrings.randomAsciiOfLength(Random(), length);
+            string s = TestUtil.RandomAnalysisString(Random(), length, true);
+            TestNGrams(minGram, maxGram, s, nonTokenChars);
+        }
+
+        private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
+        {
+            NGramTokenizerTest.TestNGrams(minGram, maxGram, s, nonTokenChars, true);
+        }
+
+        [Test]
+        public virtual void TestLargeInput()
+        {
+            // test sliding
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+        }
+
+        [Test]
+        public virtual void TestLargeMaxGram()
+        {
+            // test sliding with maxGram > 1024
+            int minGram = TestUtil.NextInt(Random(), 1290, 1300);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+        }
+
+        [Test]
+        public virtual void TestPreTokenization()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
+        }
+
+        [Test]
+        public virtual void TestHeavyPreTokenization()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
+        }
+
+        [Test]
+        public virtual void TestFewTokenChars()
+        {
+            char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
+            Arrays.Fill(chrs, ' ');
+            for (int i = 0; i < chrs.Length; ++i)
+            {
+                if (Random().NextDouble() < 0.1)
+                {
+                    chrs[i] = 'a';
+                }
+            }
+            int minGram = TestUtil.NextInt(Random(), 1, 2);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 2);
+            TestNGrams(minGram, maxGram, new string(chrs), " ");
+        }
+
+        [Test]
+        public virtual void TestFullUTF8Range()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
+            TestNGrams(minGram, maxGram, s, "");
+            TestNGrams(minGram, maxGram, s, "abcdef");
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
new file mode 100644
index 0000000..e485fc0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenFilterTest.cs
@@ -0,0 +1,249 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Tests <seealso cref="NGramTokenFilter"/> for correctness.
+    /// </summary>
+    public class NGramTokenFilterTest : BaseTokenStreamTestCase
+    {
+        private TokenStream input;
+
+        public override void SetUp()
+        {
+            base.SetUp();
+            input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
+        }
+
+        [Test]
+        public virtual void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 1);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestUnigrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1);
+            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestBigrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2);
+            AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 });
+        }
+
+        [Test]
+        public virtual void TestNgrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
+            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
+        }
+
+        [Test]
+        public virtual void TestNgramsNoIncrement()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
+            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
+        }
+
+        [Test]
+        public virtual void TestOversizedNgrams()
+        {
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
+            AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]);
+        }
+
+        [Test]
+        public virtual void TestSmallTokenInStream()
+        {
+            input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
+            AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 });
+        }
+
+        [Test]
+        public virtual void TestReset()
+        {
+            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
+            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
+            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+            tokenizer.SetReader(new StringReader("abcde"));
+            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
+        }
+
+        // LUCENE-3642
+        // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
+        // wrt original text if a previous filter increases the length of the word (in this case æ -> ae)
+        // so in this case we behave like WDF, and preserve any modified offsets
+        [Test]
+        public virtual void TestInvalidOffsets()
+        {
+            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+            AssertAnalyzesTo(analyzer, "mosfellsbær", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly NGramTokenFilterTest outerInstance;
+
+            public AnalyzerAnonymousInnerClassHelper(NGramTokenFilterTest outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
+                filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
+                return new TokenStreamComponents(tokenizer, filters);
+            }
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            for (int i = 0; i < 10; i++)
+            {
+                int min = TestUtil.NextInt(Random(), 2, 10);
+                int max = TestUtil.NextInt(Random(), min, 20);
+                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
+                CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+        {
+            private readonly NGramTokenFilterTest outerInstance;
+
+            private int min;
+            private int max;
+
+            public AnalyzerAnonymousInnerClassHelper2(NGramTokenFilterTest outerInstance, int min, int max)
+            {
+                this.outerInstance = outerInstance;
+                this.min = min;
+                this.max = max;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+                return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
+            }
+        }
+
+        [Test]
+        public virtual void TestEmptyTerm()
+        {
+            Random random = Random();
+            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+            CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+        {
+            private readonly NGramTokenFilterTest outerInstance;
+
+            public AnalyzerAnonymousInnerClassHelper3(NGramTokenFilterTest outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new KeywordTokenizer(reader);
+                return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
+            }
+        }
+
+        [Test]
+        public virtual void TestLucene43()
+        {
+#pragma warning disable 612, 618
+            NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false);
+        }
+
+        [Test]
+        public virtual void TestSupplementaryCharacters()
+        {
+            string s = TestUtil.RandomUnicodeString(Random(), 10);
+            int codePointCount = s.CodePointCount(0, s.Length);
+            int minGram = TestUtil.NextInt(Random(), 1, 3);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 10);
+            TokenStream tk = new KeywordTokenizer(new StringReader(s));
+            tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
+            ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
+            IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
+            tk.Reset();
+            for (int start = 0; start < codePointCount; ++start)
+            {
+                for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
+                {
+                    assertTrue(tk.IncrementToken());
+                    assertEquals(0, offsetAtt.StartOffset);
+                    assertEquals(s.Length, offsetAtt.EndOffset);
+                    int startIndex = Character.OffsetByCodePoints(s, 0, start);
+                    int endIndex = Character.OffsetByCodePoints(s, 0, end);
+                    assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
+                }
+            }
+            assertFalse(tk.IncrementToken());
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
new file mode 100644
index 0000000..2fc1356
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/NGramTokenizerTest.cs
@@ -0,0 +1,303 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+
+namespace Lucene.Net.Analysis.NGram
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Tests <seealso cref="NGramTokenizer"/> for correctness.
+    /// </summary>
+    public class NGramTokenizerTest : BaseTokenStreamTestCase
+    {
+        private StringReader input;
+
+        public override void SetUp()
+        {
+            base.SetUp();
+            input = new StringReader("abcde");
+        }
+
+        [Test]
+        public virtual void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
+            }
+            catch (System.ArgumentException)
+            {
+                gotException = true;
+            }
+            assertTrue(gotException);
+        }
+
+        [Test]
+        public virtual void TestUnigrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestBigrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 2);
+            AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestNgrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4 }, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde
+        }
+
+        [Test]
+        public virtual void TestOversizedNgrams()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6, 7);
+            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
+        }
+
+        [Test]
+        public virtual void TestReset()
+        {
+            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+            tokenizer.SetReader(new StringReader("abcde"));
+            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
+        }
+
+        /// <summary>
+        /// blast some random strings through the analyzer </summary>
+        [Test]
+        public virtual void TestRandomStrings()
+        {
+            for (int i = 0; i < 10; i++)
+            {
+                int min = TestUtil.NextInt(Random(), 2, 10);
+                int max = TestUtil.NextInt(Random(), min, 20);
+                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
+                CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
+                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 1027);
+            }
+        }
+
+        private class AnalyzerAnonymousInnerClassHelper : Analyzer
+        {
+            private readonly NGramTokenizerTest outerInstance;
+
+            private int min;
+            private int max;
+
+            public AnalyzerAnonymousInnerClassHelper(NGramTokenizerTest outerInstance, int min, int max)
+            {
+                this.outerInstance = outerInstance;
+                this.min = min;
+                this.max = max;
+            }
+
+            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+            {
+                Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
+                return new TokenStreamComponents(tokenizer, tokenizer);
+            }
+        }
+
+        private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
+        {
+            //string s = RandomStrings.randomAsciiOfLength(Random(), length);
+            string s = TestUtil.RandomAnalysisString(Random(), length, true);
+            TestNGrams(minGram, maxGram, s, nonTokenChars);
+        }
+
+        private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
+        {
+            TestNGrams(minGram, maxGram, s, nonTokenChars, false);
+        }
+
+        internal static int[] toCodePoints(string s)
+        {
+            int[] codePoints = new int[Character.CodePointCount(s, 0, s.Length)];
+            for (int i = 0, j = 0; i < s.Length; ++j)
+            {
+                codePoints[j] = Character.CodePointAt(s, i);
+                i += Character.CharCount(codePoints[j]);
+            }
+            return codePoints;
+        }
+
+        internal static bool isTokenChar(string nonTokenChars, int codePoint)
+        {
+            for (int i = 0; i < nonTokenChars.Length;)
+            {
+                int cp = char.ConvertToUtf32(nonTokenChars, i);
+                if (cp == codePoint)
+                {
+                    return false;
+                }
+                i += Character.CharCount(cp);
+            }
+            return true;
+        }
+
+        internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
+        {
+            // convert the string to code points
+            int[] codePoints = toCodePoints(s);
+            int[] offsets = new int[codePoints.Length + 1];
+            for (int i = 0; i < codePoints.Length; ++i)
+            {
+                offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
+            }
+            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
+            ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>();
+            IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>();
+            IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>();
+            IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>();
+            grams.Reset();
+            for (int start = 0; start < codePoints.Length; ++start)
+            {
+                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
+                {
+                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
+                    {
+                        // not on an edge
+                        goto nextGramContinue;
+                    }
+                    for (int j = start; j < end; ++j)
+                    {
+                        if (!isTokenChar(nonTokenChars, codePoints[j]))
+                        {
+                            goto nextGramContinue;
+                        }
+                    }
+                    assertTrue(grams.IncrementToken());
+                    assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
+                    assertEquals(1, posIncAtt.PositionIncrement);
+                    assertEquals(1, posLenAtt.PositionLength);
+                    assertEquals(offsets[start], offsetAtt.StartOffset);
+                    assertEquals(offsets[end], offsetAtt.EndOffset);
+                    nextGramContinue:;
+                }
+                //nextGramBreak:;
+            }
+            assertFalse(grams.IncrementToken());
+            grams.End();
+            assertEquals(s.Length, offsetAtt.StartOffset);
+            assertEquals(s.Length, offsetAtt.EndOffset);
+        }
+
+        private class NGramTokenizerAnonymousInnerClassHelper : NGramTokenizer
+        {
+            private string nonTokenChars;
+
+            public NGramTokenizerAnonymousInnerClassHelper(LuceneVersion TEST_VERSION_CURRENT, StringReader java, int minGram, int maxGram, bool edgesOnly, string nonTokenChars)
+                  : base(TEST_VERSION_CURRENT, java, minGram, maxGram, edgesOnly)
+            {
+                this.nonTokenChars = nonTokenChars;
+            }
+
+            protected override bool IsTokenChar(int chr)
+            {
+                return nonTokenChars.IndexOf((char)chr) < 0;
+            }
+        }
+
+        [Test]
+        public virtual void TestLargeInput()
+        {
+            // test sliding
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+        }
+
+        [Test]
+        public virtual void TestLargeMaxGram()
+        {
+            // test sliding with maxGram > 1024
+            int minGram = TestUtil.NextInt(Random(), 1290, 1300);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
+        }
+
+        [Test]
+        public virtual void TestPreTokenization()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
+        }
+
+        [Test]
+        public virtual void TestHeavyPreTokenization()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
+        }
+
+        [Test]
+        public virtual void TestFewTokenChars()
+        {
+            char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
+            Arrays.Fill(chrs, ' ');
+            for (int i = 0; i < chrs.Length; ++i)
+            {
+                if (Random().NextDouble() < 0.1)
+                {
+                    chrs[i] = 'a';
+                }
+            }
+            int minGram = TestUtil.NextInt(Random(), 1, 2);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 2);
+            TestNGrams(minGram, maxGram, new string(chrs), " ");
+        }
+
+        [Test]
+        public virtual void TestFullUTF8Range()
+        {
+            int minGram = TestUtil.NextInt(Random(), 1, 100);
+            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
+            string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
+            TestNGrams(minGram, maxGram, s, "");
+            TestNGrams(minGram, maxGram, s, "abcdef");
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
new file mode 100644
index 0000000..c0683a6
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/NGram/TestNGramFilters.cs
@@ -0,0 +1,196 @@
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System.IO;
+using Reader = System.IO.TextReader;
+
+namespace Lucene.Net.Analysis.NGram
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Simple tests to ensure the NGram filter factories are working.
+    /// </summary>
+    public class TestNGramFilters : BaseTokenStreamFactoryTestCase
+    {
+        /// <summary>
+        /// Test NGramTokenizerFactory
+        /// </summary>
+        [Test]
+        public virtual void TestNGramTokenizer()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = TokenizerFactory("NGram").Create(reader);
+            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
+        }
+
+        /// <summary>
+        /// Test NGramTokenizerFactory with min and max gram options
+        /// </summary>
+        [Test]
+        public virtual void TestNGramTokenizer2()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = TokenizerFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(reader);
+            AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
+        }
+
+        /// <summary>
+        /// Test the NGramFilterFactory
+        /// </summary>
+        [Test]
+        public virtual void TestNGramFilter()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("NGram").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st", "t" });
+        }
+
+        /// <summary>
+        /// Test the NGramFilterFactory with min and max gram options
+        /// </summary>
+        [Test]
+        public virtual void TestNGramFilter2()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramTokenizerFactory
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramTokenizer()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = TokenizerFactory("EdgeNGram").Create(reader);
+            AssertTokenStreamContents(stream, new string[] { "t" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramTokenizerFactory with min and max gram size
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramTokenizer2()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = TokenizerFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(reader);
+            AssertTokenStreamContents(stream, new string[] { "t", "te" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramTokenizerFactory with side option
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramTokenizer3()
+        {
+            Reader reader = new StringReader("ready");
+#pragma warning disable 612, 618
+            TokenStream stream = TokenizerFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(reader);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(stream, new string[] { "y" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramFilterFactory
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramFilter()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("EdgeNGram").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "t" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramFilterFactory with min and max gram size
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramFilter2()
+        {
+            Reader reader = new StringReader("test");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            stream = TokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(stream);
+            AssertTokenStreamContents(stream, new string[] { "t", "te" });
+        }
+
+        /// <summary>
+        /// Test EdgeNGramFilterFactory with side option
+        /// </summary>
+        [Test]
+        public virtual void TestEdgeNGramFilter3()
+        {
+            Reader reader = new StringReader("ready");
+            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+#pragma warning disable 612, 618
+            stream = TokenFilterFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(stream);
+#pragma warning restore 612, 618
+            AssertTokenStreamContents(stream, new string[] { "y" });
+        }
+
+        /// <summary>
+        /// Test that bogus arguments result in exception </summary>
+        [Test]
+        public virtual void TestBogusArguments()
+        {
+            try
+            {
+                TokenizerFactory("NGram", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+
+            try
+            {
+                TokenizerFactory("EdgeNGram", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+
+            try
+            {
+                TokenFilterFactory("NGram", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+
+            try
+            {
+                TokenFilterFactory("EdgeNGram", "bogusArg", "bogusValue");
+                fail();
+            }
+            catch (System.ArgumentException expected)
+            {
+                assertTrue(expected.Message.Contains("Unknown parameters"));
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
deleted file mode 100644
index ea6fbd7..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilterTest.cs
+++ /dev/null
@@ -1,390 +0,0 @@
-using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.Shingle;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// Tests <seealso cref="EdgeNGramTokenFilter"/> for correctness.
-    /// </summary>
-    public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
-    {
-        private TokenStream input;
-
-        public override void SetUp()
-        {
-            base.SetUp();
-            input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput()
-        {
-            bool gotException = false;
-            try
-            {
-#pragma warning disable 612, 618
-                new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 0, 0);
-#pragma warning restore 612, 618
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput2()
-        {
-            bool gotException = false;
-            try
-            {
-#pragma warning disable 612, 618
-                new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 2, 1);
-#pragma warning restore 612, 618
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput3()
-        {
-            bool gotException = false;
-            try
-            {
-#pragma warning disable 612, 618
-                new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, -1, 2);
-#pragma warning restore 612, 618
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestFrontUnigram()
-        {
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 5 });
-        }
-
-        [Test]
-        public virtual void TestBackUnigram()
-        {
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 1);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 });
-        }
-
-        [Test]
-        public virtual void TestOversizedNgrams()
-        {
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0]);
-        }
-
-        [Test]
-        public virtual void TestFrontRangeOfNgrams()
-        {
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
-        }
-
-        [Test]
-        public virtual void TestBackRangeOfNgrams()
-        {
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, null, false);
-        }
-
-        [Test]
-        public virtual void TestFilterPositions()
-        {
-            TokenStream ts = new MockTokenizer(new StringReader("abcde vwxyz"), MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "v", "vw", "vwx" }, new int[] { 0, 0, 0, 6, 6, 6 }, new int[] { 5, 5, 5, 11, 11, 11 }, null, new int[] { 1, 0, 0, 1, 0, 0 }, null, null, false);
-        }
-
-        private class PositionFilter : TokenFilter
-        {
-
-            internal readonly IPositionIncrementAttribute posIncrAtt;
-            internal bool started;
-
-            internal PositionFilter(TokenStream input) : base(input)
-            {
-                posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-            }
-
-            public override sealed bool IncrementToken()
-            {
-                if (m_input.IncrementToken())
-                {
-                    if (started)
-                    {
-                        posIncrAtt.PositionIncrement = 0;
-                    }
-                    else
-                    {
-                        started = true;
-                    }
-                    return true;
-                }
-                else
-                {
-                    return false;
-                }
-            }
-
-            public override void Reset()
-            {
-                base.Reset();
-                started = false;
-            }
-        }
-
-        [Test]
-        public virtual void TestFirstTokenPositionIncrement()
-        {
-            TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
-            ts = new PositionFilter(ts); // All but first token will get 0 position increment
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, EdgeNGramTokenFilter.Side.FRONT, 2, 3);
-#pragma warning restore 612, 618
-            // The first token "a" will not be output, since it's smaller than the mingram size of 2.
-            // The second token on input to EdgeNGramTokenFilter will have position increment of 0,
-            // which should be increased to 1, since this is the first output token in the stream.
-            AssertTokenStreamContents(filter, new string[] { "ab", "abc" }, new int[] { 2, 2 }, new int[] { 5, 5 }, new int[] { 1, 0 });
-        }
-
-        [Test]
-        public virtual void TestSmallTokenInStream()
-        {
-            input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
-        }
-
-        [Test]
-        public virtual void TestReset()
-        {
-            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
-#pragma warning disable 612, 618
-            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
-            tokenizer.SetReader(new StringReader("abcde"));
-            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 5, 5, 5 });
-        }
-
-        // LUCENE-3642
-        // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
-        // wrt original text if a previous filter increases the length of the word (in this case æ -> ae)
-        // so in this case we behave like WDF, and preserve any modified offsets
-        [Test]
-        public virtual void TestInvalidOffsets()
-        {
-            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
-            AssertAnalyzesTo(analyzer, "mosfellsbær", new string[] { "mo", "mos", "mosf", "mosfe", "mosfel", "mosfell", "mosfells", "mosfellsb", "mosfellsba", "mosfellsbae", "mosfellsbaer" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 });
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper : Analyzer
-        {
-            private readonly EdgeNGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-                TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
-#pragma warning disable 612, 618
-                filters = new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, filters, EdgeNGramTokenFilter.Side.FRONT, 2, 15);
-#pragma warning restore 612, 618
-                return new TokenStreamComponents(tokenizer, filters);
-            }
-        }
-
-        /// <summary>
-        /// blast some random strings through the analyzer </summary>
-        [Test]
-        public virtual void TestRandomStrings()
-        {
-            for (int i = 0; i < 10; i++)
-            {
-                int min = TestUtil.NextInt(Random(), 2, 10);
-                int max = TestUtil.NextInt(Random(), min, 20);
-
-                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
-                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER);
-            }
-
-            Analyzer b = new AnalyzerAnonymousInnerClassHelper3(this);
-            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-        {
-            private readonly EdgeNGramTokenFilterTest outerInstance;
-
-            private int min;
-            private int max;
-
-            public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenFilterTest outerInstance, int min, int max)
-            {
-                this.outerInstance = outerInstance;
-                this.min = min;
-                this.max = max;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-                return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
-        {
-            private readonly EdgeNGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper3(EdgeNGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
-                return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 4));
-#pragma warning restore 612, 618 
-            }
-        }
-
-        [Test]
-        public virtual void TestEmptyTerm()
-        {
-            Random random = Random();
-            Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
-            CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
-
-            Analyzer b = new AnalyzerAnonymousInnerClassHelper5(this);
-            CheckAnalysisConsistency(random, b, random.nextBoolean(), "");
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
-        {
-            private readonly EdgeNGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper4(EdgeNGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new KeywordTokenizer(reader);
-#pragma warning disable 612, 618
-                return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, EdgeNGramTokenFilter.Side.FRONT, 2, 15));
-#pragma warning restore 612, 618
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
-        {
-            private readonly EdgeNGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper5(EdgeNGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new KeywordTokenizer(reader);
-#pragma warning disable 612, 618
-                return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(LuceneVersion.LUCENE_43, tokenizer, EdgeNGramTokenFilter.Side.BACK, 2, 15));
-#pragma warning restore 612, 618
-            }
-        }
-
-        [Test]
-        public virtual void TestGraphs()
-        {
-            TokenStream tk = new LetterTokenizer(TEST_VERSION_CURRENT, new StringReader("abc d efgh ij klmno p q"));
-            tk = new ShingleFilter(tk);
-            tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, 7, 10);
-            AssertTokenStreamContents(tk, new string[] { "efgh ij", "ij klmn", "ij klmno", "klmno p" }, new int[] { 6, 11, 11, 14 }, new int[] { 13, 19, 19, 21 }, new int[] { 3, 1, 0, 1 }, new int[] { 2, 2, 2, 2 }, 23);
-        }
-
-        [Test]
-        public virtual void TestSupplementaryCharacters()
-        {
-            string s = TestUtil.RandomUnicodeString(Random(), 10);
-            int codePointCount = s.CodePointCount(0, s.Length);
-            int minGram = TestUtil.NextInt(Random(), 1, 3);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 10);
-            TokenStream tk = new KeywordTokenizer(new StringReader(s));
-            tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
-            ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
-            IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
-            tk.Reset();
-            for (int i = minGram; i <= Math.Min(codePointCount, maxGram); ++i)
-            {
-                assertTrue(tk.IncrementToken());
-                assertEquals(0, offsetAtt.StartOffset);
-                assertEquals(s.Length, offsetAtt.EndOffset);
-                int end = Character.OffsetByCodePoints(s, 0, i);
-                assertEquals(s.Substring(0, end), termAtt.ToString());
-            }
-            assertFalse(tk.IncrementToken());
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
deleted file mode 100644
index 4ccecfa..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerTest.cs
+++ /dev/null
@@ -1,278 +0,0 @@
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-using Reader = System.IO.TextReader;
-using Version = Lucene.Net.Util.LuceneVersion;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// Tests <seealso cref="EdgeNGramTokenizer"/> for correctness.
-    /// </summary>
-    public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
-    {
-        private StringReader input;
-
-        public override void SetUp()
-        {
-            base.SetUp();
-            input = new StringReader("abcde");
-        }
-
-        [Test]
-        public virtual void TestInvalidInput()
-        {
-            bool gotException = false;
-            try
-            {
-                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 0, 0);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput2()
-        {
-            bool gotException = false;
-            try
-            {
-                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput3()
-        {
-            bool gotException = false;
-            try
-            {
-                new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, -1, 2);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestFrontUnigram()
-        {
-            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 1);
-            AssertTokenStreamContents(tokenizer, new string[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestBackUnigram()
-        {
-#pragma warning disable 612, 618
-            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 1);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestOversizedNgrams()
-        {
-            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 6, 6);
-            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestFrontRangeOfNgrams()
-        {
-            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestBackRangeOfNgrams()
-        {
-#pragma warning disable 612, 618
-            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.BACK, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, null, null, null, 5, false); // abcde
-        }
-
-        [Test]
-        public virtual void TestReset()
-        {
-            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, input, 1, 3);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
-            tokenizer.SetReader(new StringReader("abcde"));
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5); // abcde
-        }
-
-        /// <summary>
-        /// blast some random strings through the analyzer </summary>
-        [Test]
-        public virtual void TestRandomStrings()
-        {
-            for (int i = 0; i < 10; i++)
-            {
-                int min = TestUtil.NextInt(Random(), 2, 10);
-                int max = TestUtil.NextInt(Random(), min, 20);
-
-                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
-                CheckRandomData(Random(), a, 100 * RANDOM_MULTIPLIER, 20);
-                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 8192);
-            }
-
-            Analyzer b = new AnalyzerAnonymousInnerClassHelper2(this);
-            CheckRandomData(Random(), b, 1000 * RANDOM_MULTIPLIER, 20, false, false);
-            CheckRandomData(Random(), b, 100 * RANDOM_MULTIPLIER, 8192, false, false);
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper : Analyzer
-        {
-            private readonly EdgeNGramTokenizerTest outerInstance;
-
-            private int min;
-            private int max;
-
-            public AnalyzerAnonymousInnerClassHelper(EdgeNGramTokenizerTest outerInstance, int min, int max)
-            {
-                this.outerInstance = outerInstance;
-                this.min = min;
-                this.max = max;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
-            {
-                Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, min, max);
-                return new TokenStreamComponents(tokenizer, tokenizer);
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-        {
-            private readonly EdgeNGramTokenizerTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper2(EdgeNGramTokenizerTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
-            {
-#pragma warning disable 612, 618
-                Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, reader, Lucene43EdgeNGramTokenizer.Side.BACK, 2, 4);
-#pragma warning restore 612, 618
-                return new TokenStreamComponents(tokenizer, tokenizer);
-            }
-        }
-
-        [Test]
-        public virtual void TestTokenizerPositions()
-        {
-#pragma warning disable 612, 618
-            Tokenizer tokenizer = new Lucene43EdgeNGramTokenizer(Version.LUCENE_43, input, Lucene43EdgeNGramTokenizer.Side.FRONT, 1, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 0, 0 }, null, null, false);
-
-            tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"), 1, 3);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, null, new int[] { 1, 1, 1 }, null, null, false);
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
-        {
-            // LUCENENET TODO: Changed randomizing strategy - not sure if this is right...
-            //string s = RandomStrings.randomAsciiOfLength(Random(), length);
-            string s = TestUtil.RandomAnalysisString(Random(), length, true);
-            TestNGrams(minGram, maxGram, s, nonTokenChars);
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
-        {
-            NGramTokenizerTest.TestNGrams(minGram, maxGram, s, nonTokenChars, true);
-        }
-
-        [Test]
-        public virtual void TestLargeInput()
-        {
-            // test sliding
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
-        }
-
-        [Test]
-        public virtual void TestLargeMaxGram()
-        {
-            // test sliding with maxGram > 1024
-            int minGram = TestUtil.NextInt(Random(), 1290, 1300);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024), "");
-        }
-
-        [Test]
-        public virtual void TestPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
-        }
-
-        [Test]
-        public virtual void TestHeavyPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
-        }
-
-        [Test]
-        public virtual void TestFewTokenChars()
-        {
-            char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
-            Arrays.Fill(chrs, ' ');
-            for (int i = 0; i < chrs.Length; ++i)
-            {
-                if (Random().NextDouble() < 0.1)
-                {
-                    chrs[i] = 'a';
-                }
-            }
-            int minGram = TestUtil.NextInt(Random(), 1, 2);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 2);
-            TestNGrams(minGram, maxGram, new string(chrs), " ");
-        }
-
-        [Test]
-        public virtual void TestFullUTF8Range()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
-            TestNGrams(minGram, maxGram, s, "");
-            TestNGrams(minGram, maxGram, s, "abcdef");
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
deleted file mode 100644
index e485fc0..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenFilterTest.cs
+++ /dev/null
@@ -1,249 +0,0 @@
-using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Miscellaneous;
-using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// Tests <seealso cref="NGramTokenFilter"/> for correctness.
-    /// </summary>
-    public class NGramTokenFilterTest : BaseTokenStreamTestCase
-    {
-        private TokenStream input;
-
-        public override void SetUp()
-        {
-            base.SetUp();
-            input = new MockTokenizer(new StringReader("abcde"), MockTokenizer.WHITESPACE, false);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput2()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenFilter(TEST_VERSION_CURRENT, input, 0, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestUnigrams()
-        {
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 1);
-            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
-        }
-
-        [Test]
-        public virtual void TestBigrams()
-        {
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 2, 2);
-            AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de" }, new int[] { 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0 });
-        }
-
-        [Test]
-        public virtual void TestNgrams()
-        {
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
-            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
-        }
-
-        [Test]
-        public virtual void TestNgramsNoIncrement()
-        {
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 1, 3);
-            AssertTokenStreamContents(filter, new string[] { "a", "ab", "abc", "b", "bc", "bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }, null, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, null, null, false);
-        }
-
-        [Test]
-        public virtual void TestOversizedNgrams()
-        {
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 6, 7);
-            AssertTokenStreamContents(filter, new string[0], new int[0], new int[0]);
-        }
-
-        [Test]
-        public virtual void TestSmallTokenInStream()
-        {
-            input = new MockTokenizer(new StringReader("abc de fgh"), MockTokenizer.WHITESPACE, false);
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, input, 3, 3);
-            AssertTokenStreamContents(filter, new string[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 }, new int[] { 1, 2 });
-        }
-
-        [Test]
-        public virtual void TestReset()
-        {
-            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("abcde"));
-            NGramTokenFilter filter = new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 1, 1);
-            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
-            tokenizer.SetReader(new StringReader("abcde"));
-            AssertTokenStreamContents(filter, new string[] { "a", "b", "c", "d", "e" }, new int[] { 0, 0, 0, 0, 0 }, new int[] { 5, 5, 5, 5, 5 }, new int[] { 1, 0, 0, 0, 0 });
-        }
-
-        // LUCENE-3642
-        // EdgeNgram blindly adds term length to offset, but this can take things out of bounds
-        // wrt original text if a previous filter increases the length of the word (in this case æ -> ae)
-        // so in this case we behave like WDF, and preserve any modified offsets
-        [Test]
-        public virtual void TestInvalidOffsets()
-        {
-            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
-            AssertAnalyzesTo(analyzer, "mosfellsbær", new string[] { "mo", "os", "sf", "fe", "el", "ll", "ls", "sb", "ba", "ae", "er" }, new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, new int[] { 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 }, new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper : Analyzer
-        {
-            private readonly NGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper(NGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-                TokenFilter filters = new ASCIIFoldingFilter(tokenizer);
-                filters = new NGramTokenFilter(TEST_VERSION_CURRENT, filters, 2, 2);
-                return new TokenStreamComponents(tokenizer, filters);
-            }
-        }
-
-        /// <summary>
-        /// blast some random strings through the analyzer </summary>
-        [Test]
-        public virtual void TestRandomStrings()
-        {
-            for (int i = 0; i < 10; i++)
-            {
-                int min = TestUtil.NextInt(Random(), 2, 10);
-                int max = TestUtil.NextInt(Random(), min, 20);
-                Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, min, max);
-                CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
-        {
-            private readonly NGramTokenFilterTest outerInstance;
-
-            private int min;
-            private int max;
-
-            public AnalyzerAnonymousInnerClassHelper2(NGramTokenFilterTest outerInstance, int min, int max)
-            {
-                this.outerInstance = outerInstance;
-                this.min = min;
-                this.max = max;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-                return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, min, max));
-            }
-        }
-
-        [Test]
-        public virtual void TestEmptyTerm()
-        {
-            Random random = Random();
-            Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
-            CheckAnalysisConsistency(random, a, random.nextBoolean(), "");
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
-        {
-            private readonly NGramTokenFilterTest outerInstance;
-
-            public AnalyzerAnonymousInnerClassHelper3(NGramTokenFilterTest outerInstance)
-            {
-                this.outerInstance = outerInstance;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
-            {
-                Tokenizer tokenizer = new KeywordTokenizer(reader);
-                return new TokenStreamComponents(tokenizer, new NGramTokenFilter(TEST_VERSION_CURRENT, tokenizer, 2, 15));
-            }
-        }
-
-        [Test]
-        public virtual void TestLucene43()
-        {
-#pragma warning disable 612, 618
-            NGramTokenFilter filter = new NGramTokenFilter(LuceneVersion.LUCENE_43, input, 2, 3);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(filter, new string[] { "ab", "bc", "cd", "de", "abc", "bcd", "cde" }, new int[] { 0, 1, 2, 3, 0, 1, 2 }, new int[] { 2, 3, 4, 5, 3, 4, 5 }, null, new int[] { 1, 1, 1, 1, 1, 1, 1 }, null, null, false);
-        }
-
-        [Test]
-        public virtual void TestSupplementaryCharacters()
-        {
-            string s = TestUtil.RandomUnicodeString(Random(), 10);
-            int codePointCount = s.CodePointCount(0, s.Length);
-            int minGram = TestUtil.NextInt(Random(), 1, 3);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 10);
-            TokenStream tk = new KeywordTokenizer(new StringReader(s));
-            tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
-            ICharTermAttribute termAtt = tk.AddAttribute<ICharTermAttribute>();
-            IOffsetAttribute offsetAtt = tk.AddAttribute<IOffsetAttribute>();
-            tk.Reset();
-            for (int start = 0; start < codePointCount; ++start)
-            {
-                for (int end = start + minGram; end <= Math.Min(codePointCount, start + maxGram); ++end)
-                {
-                    assertTrue(tk.IncrementToken());
-                    assertEquals(0, offsetAtt.StartOffset);
-                    assertEquals(s.Length, offsetAtt.EndOffset);
-                    int startIndex = Character.OffsetByCodePoints(s, 0, start);
-                    int endIndex = Character.OffsetByCodePoints(s, 0, end);
-                    assertEquals(s.Substring(startIndex, endIndex - startIndex), termAtt.ToString());
-                }
-            }
-            assertFalse(tk.IncrementToken());
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message