lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From si...@apache.org
Subject svn commit: r1347076 [9/9] - in /incubator/lucene.net/trunk: src/contrib/Analyzers/ src/contrib/Analyzers/Hunspell/ test/contrib/Analyzers/ test/contrib/Analyzers/Hunspell/ test/contrib/Analyzers/Hunspell/Dictionaries/
Date Wed, 06 Jun 2012 19:46:00 GMT
Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/nl_NL.dic
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs
(added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs
Wed Jun  6 19:45:59 2012
@@ -0,0 +1,44 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis.Hunspell;
+
+namespace Lucene.Net.Analyzers.Hunspell {
+    public static class HunspellDictionaryLoader {
+        public static Stream Stream(String contentName) {
+            var resourceName = "Lucene.Net.Analyzers.Hunspell.Dictionaries." + contentName;
+
+            var stream = typeof(HunspellDictionaryLoader).Assembly.GetManifestResourceStream(resourceName);
+            if (stream == null)
+                throw new ArgumentException(String.Format("Failed to read resource '{0}'",
resourceName));
+
+            return stream;
+        }
+
+        public static HunspellDictionary Dictionary(String baseName) {
+            using (var affixStream = Stream(baseName + ".aff"))
+            using (var dictStream = Stream(baseName + ".dic"))
+                return new HunspellDictionary(affixStream, dictStream);
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs Wed
Jun  6 19:45:59 2012
@@ -0,0 +1,42 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Linq;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Hunspell {
+    [TestFixture]
+    public class TestHunspellDictionary {
+        [Test(Description = "en_US affix and dict files are loaded without error, with 2
suffixes for 'ings' being loaded, 2 prefixes for 'in' and 1 word for 'drink' ")]
+        public void TestHunspellDictionary_LoadEnUSDict() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");
+
+            Assert.AreEqual(2, dictionary.LookupSuffix(new[] { 'i', 'n', 'g', 's' }, 0, 4).Count());
+            Assert.AreEqual(1, dictionary.LookupPrefix(new[] { 'i', 'n' }, 0, 2).Count());
+            Assert.AreEqual(1, dictionary.LookupWord("drink").Count());
+        }
+
+        [Test(Description = "fr-moderne affix and dict files are loaded without error")]
+        public void TestHunspellDictionary_LoadFrModerneDict() {
+            Assert.DoesNotThrow(() => HunspellDictionaryLoader.Dictionary("fr-moderne"));
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs Wed
Jun  6 19:45:59 2012
@@ -0,0 +1,92 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Hunspell;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Test.Analysis;
+using NUnit.Framework;
+using LuceneVersion = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analyzers.Hunspell {
+    [TestFixture]
+    public class TestHunspellStemFilter : BaseTokenStreamTestCase {
+        private class DutchAnalyzer : Analyzer {
+            private readonly HunspellDictionary _dictionary;
+
+            public DutchAnalyzer() {
+                _dictionary = HunspellDictionaryLoader.Dictionary("nl_NL");
+            }
+
+            public override TokenStream TokenStream(String fieldName, TextReader reader)
{
+                TokenStream stream = new StandardTokenizer(LuceneVersion.LUCENE_29, reader);
+                stream = new LowerCaseFilter(stream);
+                stream = new HunspellStemFilter(stream, _dictionary);
+                return stream;
+            }
+
+            public override TokenStream ReusableTokenStream(string fieldName, TextReader
reader) {
+                var streams = (SavedStreams)PreviousTokenStream;
+                if (streams == null) {
+                    streams = new SavedStreams();
+                    streams.Tokenizer = new StandardTokenizer(LuceneVersion.LUCENE_29, reader);
+                    streams.Filter = new HunspellStemFilter(new LowerCaseFilter(streams.Tokenizer),
_dictionary);
+                    PreviousTokenStream = streams;
+                } else {
+                    streams.Tokenizer.Reset(reader);
+                    streams.Filter.Reset();
+                }
+
+                return streams.Filter;
+            }
+
+            #region Nested type: SavedStreams
+
+            private class SavedStreams {
+                public Tokenizer Tokenizer { get; set; }
+
+                public TokenStream Filter { get; set; }
+            }
+
+            #endregion
+        };
+
+        private readonly DutchAnalyzer _dutchAnalyzer = new DutchAnalyzer();
+
+        [Test]
+        public void TestDutch() {
+            AssertAnalyzesTo(_dutchAnalyzer, "huizen",
+                new[] { "huizen", "huis" },
+                new[] { 1, 0 });
+            AssertAnalyzesTo(_dutchAnalyzer, "huis",
+                new[] { "huis", "hui" },
+                new[] { 1, 0 });
+            AssertAnalyzesToReuse(_dutchAnalyzer, "huizen huis",
+                new[] { "huizen", "huis", "huis", "hui" },
+                new[] { 1, 0, 1, 0 });
+            AssertAnalyzesToReuse(_dutchAnalyzer, "huis huizen",
+                new[] { "huis", "hui", "huizen", "huis" },
+                new[] { 1, 0, 1, 0 });
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs Wed
Jun  6 19:45:59 2012
@@ -0,0 +1,96 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System.Linq;
+using Lucene.Net.Analysis.Hunspell;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analyzers.Hunspell {
+    [TestFixture]
+    public class TestHunspellStemmer {
+        [Test(Description = "Word 'drinkable' should be stemmed to 'drink' with the suffix
'able' being stripped")]
+        public void TestStem_SimpleSuffix_EnUS() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");
+
+            var stemmer = new HunspellStemmer(dictionary);
+            var stems = stemmer.Stem("drinkable").ToList();
+
+            Assert.AreEqual(2, stems.Count);
+            Assert.AreEqual("drinkable", stems[0].Stem);
+            Assert.AreEqual("drink", stems[1].Stem);
+        }
+
+        [Test(Description = "Word 'remove' should be stemmed to 'move' with the prefix 're'
being stripped")]
+        public void TestStem_SimplePrefix_EnUS() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");
+
+            var stemmer = new HunspellStemmer(dictionary);
+            var stems = stemmer.Stem("remove").ToList();
+
+            Assert.AreEqual(1, stems.Count);
+            Assert.AreEqual("move", stems[0].Stem);
+        }
+
+        [Test(Description = "Word 'drinkables' should be stemmed to 'drink' with the suffixes
's' and 'able' being removed recursively")]
+        public void TestStem_RecursiveSuffix_EnUS() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("en_US");
+
+            var stemmer = new HunspellStemmer(dictionary);
+            var stems = stemmer.Stem("drinkables").ToList();
+
+            Assert.AreEqual(1, stems.Count);
+            Assert.AreEqual("drink", stems[0].Stem);
+        }
+
+        [Test(Description = "Word 'fietsen' should be stemmed to 'fiets' ('en' suffix stripped)
while fiets should be stemmed to itself")]
+        public void TestStem_fietsenFiets_NlNL() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("nl_NL");
+
+            var stemmer = new HunspellStemmer(dictionary);
+            var stems = stemmer.Stem("fietsen").ToList();
+
+            Assert.AreEqual(2, stems.Count);
+            Assert.AreEqual("fietsen", stems[0].Stem);
+            Assert.AreEqual("fiets", stems[1].Stem);
+
+            stems = stemmer.Stem("fiets").ToList();
+            Assert.AreEqual(1, stems.Count);
+            Assert.AreEqual("fiets", stems[0].Stem);
+        }
+
+        [Test(Description = "Word 'huizen' should be stemmed to 'huis' ('en' suffix stripped)
while huis should be stemmed to huis and hui")]
+        public void TestStem_huizenHuis_NlNL() {
+            var dictionary = HunspellDictionaryLoader.Dictionary("nl_NL");
+
+            var stemmer = new HunspellStemmer(dictionary);
+            var stems = stemmer.Stem("huizen").ToList();
+
+            Assert.AreEqual(2, stems.Count);
+            Assert.AreEqual("huizen", stems[0].Stem);
+            Assert.AreEqual("huis", stems[1].Stem);
+
+            stems = stemmer.Stem("huis").ToList();
+            Assert.AreEqual(2, stems.Count);
+            Assert.AreEqual("huis", stems[0].Stem);
+            Assert.AreEqual("hui", stems[1].Stem);
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message