lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [36/39] lucenenet git commit: Lucene.Net.Analysis.Ngram - renamed NGram in Git
Date Sat, 04 Feb 2017 20:32:55 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
deleted file mode 100644
index 2fc1356..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ /dev/null
@@ -1,303 +0,0 @@
-´╗┐using Lucene.Net.Analysis.TokenAttributes;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// Tests <seealso cref="NGramTokenizer"/> for correctness.
-    /// </summary>
-    public class NGramTokenizerTest : BaseTokenStreamTestCase
-    {
-        private StringReader input;
-
-        public override void SetUp()
-        {
-            base.SetUp();
-            input = new StringReader("abcde");
-        }
-
-        [Test]
-        public virtual void TestInvalidInput()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenizer(TEST_VERSION_CURRENT, input, 2, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestInvalidInput2()
-        {
-            bool gotException = false;
-            try
-            {
-                new NGramTokenizer(TEST_VERSION_CURRENT, input, 0, 1);
-            }
-            catch (System.ArgumentException)
-            {
-                gotException = true;
-            }
-            assertTrue(gotException);
-        }
-
-        [Test]
-        public virtual void TestUnigrams()
-        {
-            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1,
1);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" },
new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestBigrams()
-        {
-            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 2,
2);
-            AssertTokenStreamContents(tokenizer, new string[] { "ab", "bc", "cd", "de" },
new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5); // abcde
-        }
-
-        [Test]
-        public virtual void TestNgrams()
-        {
-            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1,
3);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "ab", "abc", "b", "bc",
"bcd", "c", "cd", "cde", "d", "de", "e" }, new int[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4
}, new int[] { 1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 5 }, null, null, null, 5, false); // abcde
-        }
-
-        [Test]
-        public virtual void TestOversizedNgrams()
-        {
-            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 6,
7);
-            AssertTokenStreamContents(tokenizer, new string[0], new int[0], new int[0], 5);
// abcde
-        }
-
-        [Test]
-        public virtual void TestReset()
-        {
-            NGramTokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, input, 1,
1);
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" },
new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
-            tokenizer.SetReader(new StringReader("abcde"));
-            AssertTokenStreamContents(tokenizer, new string[] { "a", "b", "c", "d", "e" },
new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5); // abcde
-        }
-
-        /// <summary>
-        /// blast some random strings through the analyzer </summary>
-        [Test]
-        public virtual void TestRandomStrings()
-        {
-            for (int i = 0; i < 10; i++)
-            {
-                int min = TestUtil.NextInt(Random(), 2, 10);
-                int max = TestUtil.NextInt(Random(), min, 20);
-                Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, min, max);
-                CheckRandomData(Random(), a, 200 * RANDOM_MULTIPLIER, 20);
-                CheckRandomData(Random(), a, 10 * RANDOM_MULTIPLIER, 1027);
-            }
-        }
-
-        private class AnalyzerAnonymousInnerClassHelper : Analyzer
-        {
-            private readonly NGramTokenizerTest outerInstance;
-
-            private int min;
-            private int max;
-
-            public AnalyzerAnonymousInnerClassHelper(NGramTokenizerTest outerInstance, int
min, int max)
-            {
-                this.outerInstance = outerInstance;
-                this.min = min;
-                this.max = max;
-            }
-
-            protected internal override TokenStreamComponents CreateComponents(string fieldName,
TextReader reader)
-            {
-                Tokenizer tokenizer = new NGramTokenizer(TEST_VERSION_CURRENT, reader, min,
max);
-                return new TokenStreamComponents(tokenizer, tokenizer);
-            }
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, int length, string nonTokenChars)
-        {
-            //string s = RandomStrings.randomAsciiOfLength(Random(), length);
-            string s = TestUtil.RandomAnalysisString(Random(), length, true);
-            TestNGrams(minGram, maxGram, s, nonTokenChars);
-        }
-
-        private static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars)
-        {
-            TestNGrams(minGram, maxGram, s, nonTokenChars, false);
-        }
-
-        internal static int[] toCodePoints(string s)
-        {
-            int[] codePoints = new int[Character.CodePointCount(s, 0, s.Length)];
-            for (int i = 0, j = 0; i < s.Length; ++j)
-            {
-                codePoints[j] = Character.CodePointAt(s, i);
-                i += Character.CharCount(codePoints[j]);
-            }
-            return codePoints;
-        }
-
-        internal static bool isTokenChar(string nonTokenChars, int codePoint)
-        {
-            for (int i = 0; i < nonTokenChars.Length;)
-            {
-                int cp = char.ConvertToUtf32(nonTokenChars, i);
-                if (cp == codePoint)
-                {
-                    return false;
-                }
-                i += Character.CharCount(cp);
-            }
-            return true;
-        }
-
-        internal static void TestNGrams(int minGram, int maxGram, string s, string nonTokenChars,
bool edgesOnly)
-        {
-            // convert the string to code points
-            int[] codePoints = toCodePoints(s);
-            int[] offsets = new int[codePoints.Length + 1];
-            for (int i = 0; i < codePoints.Length; ++i)
-            {
-                offsets[i + 1] = offsets[i] + Character.CharCount(codePoints[i]);
-            }
-            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT,
new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
-            ICharTermAttribute termAtt = grams.AddAttribute<ICharTermAttribute>();
-            IPositionIncrementAttribute posIncAtt = grams.AddAttribute<IPositionIncrementAttribute>();
-            IPositionLengthAttribute posLenAtt = grams.AddAttribute<IPositionLengthAttribute>();
-            IOffsetAttribute offsetAtt = grams.AddAttribute<IOffsetAttribute>();
-            grams.Reset();
-            for (int start = 0; start < codePoints.Length; ++start)
-            {
-                for (int end = start + minGram; end <= start + maxGram && end
<= codePoints.Length; ++end)
-                {
-                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars,
codePoints[start - 1]))
-                    {
-                        // not on an edge
-                        goto nextGramContinue;
-                    }
-                    for (int j = start; j < end; ++j)
-                    {
-                        if (!isTokenChar(nonTokenChars, codePoints[j]))
-                        {
-                            goto nextGramContinue;
-                        }
-                    }
-                    assertTrue(grams.IncrementToken());
-                    assertArrayEquals(Arrays.CopyOfRange(codePoints, start, end), toCodePoints(termAtt.ToString()));
-                    assertEquals(1, posIncAtt.PositionIncrement);
-                    assertEquals(1, posLenAtt.PositionLength);
-                    assertEquals(offsets[start], offsetAtt.StartOffset);
-                    assertEquals(offsets[end], offsetAtt.EndOffset);
-                    nextGramContinue:;
-                }
-                //nextGramBreak:;
-            }
-            assertFalse(grams.IncrementToken());
-            grams.End();
-            assertEquals(s.Length, offsetAtt.StartOffset);
-            assertEquals(s.Length, offsetAtt.EndOffset);
-        }
-
-        private class NGramTokenizerAnonymousInnerClassHelper : NGramTokenizer
-        {
-            private string nonTokenChars;
-
-            public NGramTokenizerAnonymousInnerClassHelper(LuceneVersion TEST_VERSION_CURRENT,
StringReader java, int minGram, int maxGram, bool edgesOnly, string nonTokenChars)
-                  : base(TEST_VERSION_CURRENT, java, minGram, maxGram, edgesOnly)
-            {
-                this.nonTokenChars = nonTokenChars;
-            }
-
-            protected override bool IsTokenChar(int chr)
-            {
-                return nonTokenChars.IndexOf((char)chr) < 0;
-            }
-        }
-
-        [Test]
-        public virtual void TestLargeInput()
-        {
-            // test sliding
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024),
"");
-        }
-
-        [Test]
-        public virtual void TestLargeMaxGram()
-        {
-            // test sliding with maxGram > 1024
-            int minGram = TestUtil.NextInt(Random(), 1290, 1300);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 1300);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 3 * 1024, 4 * 1024),
"");
-        }
-
-        [Test]
-        public virtual void TestPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "a");
-        }
-
-        [Test]
-        public virtual void TestHeavyPreTokenization()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            TestNGrams(minGram, maxGram, TestUtil.NextInt(Random(), 0, 4 * 1024), "abcdef");
-        }
-
-        [Test]
-        public virtual void TestFewTokenChars()
-        {
-            char[] chrs = new char[TestUtil.NextInt(Random(), 4000, 5000)];
-            Arrays.Fill(chrs, ' ');
-            for (int i = 0; i < chrs.Length; ++i)
-            {
-                if (Random().NextDouble() < 0.1)
-                {
-                    chrs[i] = 'a';
-                }
-            }
-            int minGram = TestUtil.NextInt(Random(), 1, 2);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 2);
-            TestNGrams(minGram, maxGram, new string(chrs), " ");
-        }
-
-        [Test]
-        public virtual void TestFullUTF8Range()
-        {
-            int minGram = TestUtil.NextInt(Random(), 1, 100);
-            int maxGram = TestUtil.NextInt(Random(), minGram, 100);
-            string s = TestUtil.RandomUnicodeString(Random(), 4 * 1024);
-            TestNGrams(minGram, maxGram, s, "");
-            TestNGrams(minGram, maxGram, s, "abcdef");
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ab81d913/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
deleted file mode 100644
index c0683a6..0000000
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/TestNGramFilters.cs
+++ /dev/null
@@ -1,196 +0,0 @@
-´╗┐using Lucene.Net.Analysis.Util;
-using Lucene.Net.Util;
-using NUnit.Framework;
-using System.IO;
-using Reader = System.IO.TextReader;
-
-namespace Lucene.Net.Analysis.NGram
-{
-    /*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
-
-    /// <summary>
-    /// Simple tests to ensure the NGram filter factories are working.
-    /// </summary>
-    public class TestNGramFilters : BaseTokenStreamFactoryTestCase
-    {
-        /// <summary>
-        /// Test NGramTokenizerFactory
-        /// </summary>
-        [Test]
-        public virtual void TestNGramTokenizer()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("NGram").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st",
"t" });
-        }
-
-        /// <summary>
-        /// Test NGramTokenizerFactory with min and max gram options
-        /// </summary>
-        [Test]
-        public virtual void TestNGramTokenizer2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("NGram", "minGramSize", "2", "maxGramSize",
"3").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st"
});
-        }
-
-        /// <summary>
-        /// Test the NGramFilterFactory
-        /// </summary>
-        [Test]
-        public virtual void TestNGramFilter()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("NGram").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t", "te", "e", "es", "s", "st",
"t" });
-        }
-
-        /// <summary>
-        /// Test the NGramFilterFactory with min and max gram options
-        /// </summary>
-        [Test]
-        public virtual void TestNGramFilter2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("NGram", "minGramSize", "2", "maxGramSize", "3").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "te", "tes", "es", "est", "st"
});
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("EdgeNGram").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory with min and max gram size
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = TokenizerFactory("EdgeNGram", "minGramSize", "1", "maxGramSize",
"2").Create(reader);
-            AssertTokenStreamContents(stream, new string[] { "t", "te" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramTokenizerFactory with side option
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramTokenizer3()
-        {
-            Reader reader = new StringReader("ready");
-#pragma warning disable 612, 618
-            TokenStream stream = TokenizerFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side",
"back").Create(reader);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(stream, new string[] { "y" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("EdgeNGram").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory with min and max gram size
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter2()
-        {
-            Reader reader = new StringReader("test");
-            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-            stream = TokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").Create(stream);
-            AssertTokenStreamContents(stream, new string[] { "t", "te" });
-        }
-
-        /// <summary>
-        /// Test EdgeNGramFilterFactory with side option
-        /// </summary>
-        [Test]
-        public virtual void TestEdgeNGramFilter3()
-        {
-            Reader reader = new StringReader("ready");
-            TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
-#pragma warning disable 612, 618
-            stream = TokenFilterFactory("EdgeNGram", LuceneVersion.LUCENE_43, "side", "back").Create(stream);
-#pragma warning restore 612, 618
-            AssertTokenStreamContents(stream, new string[] { "y" });
-        }
-
-        /// <summary>
-        /// Test that bogus arguments result in exception </summary>
-        [Test]
-        public virtual void TestBogusArguments()
-        {
-            try
-            {
-                TokenizerFactory("NGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenizerFactory("EdgeNGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenFilterFactory("NGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-
-            try
-            {
-                TokenFilterFactory("EdgeNGram", "bogusArg", "bogusValue");
-                fail();
-            }
-            catch (System.ArgumentException expected)
-            {
-                assertTrue(expected.Message.Contains("Unknown parameters"));
-            }
-        }
-    }
-}
\ No newline at end of file


Mime
View raw message