lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject [Lucene.Net] svn commit: r1147679 [2/2] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers: ./ Filters/ Miscellaneous/ Properties/ Shingle/
Date Sun, 17 Jul 2011 16:32:30 GMT
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs?rev=1147679&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs
(added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Shingle/TestShingleMatrixFilter.cs
Sun Jul 17 16:32:29 2011
@@ -0,0 +1,594 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Miscellaneous;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Analysis.Shingle.Codec;
+using Lucene.Net.Analysis.Shingle.Matrix;
+using NUnit.Framework;
+using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
+
+namespace Lucene.Net.Analysis.Shingle
+{
+    public class TestShingleMatrixFilter : BaseTokenStreamTestCase
+    {
+        public TestShingleMatrixFilter() : this(typeof (TestShingleMatrixFilter).Name)
+        {
+        }
+
+        // use this ctor, because SingleTokenTokenStream only uses next(Token), so exclude
it
+        public TestShingleMatrixFilter(String name) :
+            base(
+            name,
+            new Hashtable(
+                new Dictionary<string, string[]>
+                    {
+                        {
+                            "TestShingleMatrixFilter",
+                            new[]
+                                {
+                                    "testBehavingAsShingleFilter",
+                                    "testMatrix",
+                                    "testIterator"
+                                }
+                            }
+                    }
+                ))
+        {
+        }
+
+        [Test]
+        public void TestIterator()
+        {
+            var wst = new WhitespaceTokenizer(new StringReader("one two three four five"));
+            var smf = new ShingleMatrixFilter(wst, 2, 2, '_', false,
+                                              new OneDimensionalNonWeightedTokenSettingsCodec());
+
+            int i;
+            for (i = 0; smf.IncrementToken(); i++) { }
+
+            Assert.AreEqual(4, i);
+
+            // call next once more. this should return false again rather than throwing an
exception (LUCENE-1939)
+            Assert.IsFalse(smf.IncrementToken());
+
+            //System.DateTime.Now;
+        }
+
+        [Test]
+        public void TestBehavingAsShingleFilter()
+        {
+            ShingleMatrixFilter.DefaultSettingsCodec = null;
+
+            TokenStream ts = new ShingleMatrixFilter(new EmptyTokenStream(), 1, 2, ' ', false,
+                                                     new OneDimensionalNonWeightedTokenSettingsCodec
+                                                         ());
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test a plain old token stream with synonyms translated to rows.
+
+            var tokens = new LinkedList<Token>();
+            tokens.AddLast(CreateToken("please", 0, 6));
+            tokens.AddLast(CreateToken("divide", 7, 13));
+            tokens.AddLast(CreateToken("this", 14, 18));
+            tokens.AddLast(CreateToken("sentence", 19, 27));
+            tokens.AddLast(CreateToken("into", 28, 32));
+            tokens.AddLast(CreateToken("shingles", 33, 39));
+
+            var tls = new TokenListStream(tokens);
+
+            // bi-grams
+
+            ts = new ShingleMatrixFilter(tls, 1, 2, ' ', false, new OneDimensionalNonWeightedTokenSettingsCodec());
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
+            //{
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement()
+ ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString())
+ "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertTokenStreamContents(ts,
+                                      new[]
+                                          {
+                                              "please", "please divide", "divide", "divide
this",
+                                              "this", "this sentence", "sentence", "sentence
into", "into",
+                                              "into shingles", "shingles"
+                                          },
+                                      new[] {0, 0, 7, 7, 14, 14, 19, 19, 28, 28, 33},
+                                      new[] {6, 13, 13, 18, 18, 27, 27, 32, 32, 39, 39});
+        }
+
+
+        /// <summary>
+        /// Extracts a matrix from a token stream.
+        /// </summary>
+        [Test]
+        public void TestTokenStream()
+        {
+            ShingleMatrixFilter.DefaultSettingsCodec = null;
+            //new ShingleMatrixFilter.SimpleThreeDimensionalTokenSettingsCodec();
+
+            // test a plain old token stream with synonyms tranlated to rows.
+
+            var tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 0, 4));
+            tokens.AddLast(TokenFactory("greetings", 0, 0, 4));
+            tokens.AddLast(TokenFactory("world", 1, 5, 10));
+            tokens.AddLast(TokenFactory("earth", 0, 5, 10));
+            tokens.AddLast(TokenFactory("tellus", 0, 5, 10));
+
+            TokenStream tls = new TokenListStream(tokens);
+
+            // bi-grams
+
+            TokenStream ts = new ShingleMatrixFilter(tls, 2, 2, '_', false,
+                                                     new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
+
+            AssertNext(ts, "hello_world");
+            AssertNext(ts, "greetings_world");
+            AssertNext(ts, "hello_earth");
+            AssertNext(ts, "greetings_earth");
+            AssertNext(ts, "hello_tellus");
+            AssertNext(ts, "greetings_tellus");
+            Assert.IsFalse(ts.IncrementToken());
+
+            // bi-grams with no spacer character, start offset, end offset
+
+            tls.Reset();
+            ts = new ShingleMatrixFilter(tls, 2, 2, null, false,
+                                         new TwoDimensionalNonWeightedSynonymTokenSettingsCodec());
+            AssertNext(ts, "helloworld", 0, 10);
+            AssertNext(ts, "greetingsworld", 0, 10);
+            AssertNext(ts, "helloearth", 0, 10);
+            AssertNext(ts, "greetingsearth", 0, 10);
+            AssertNext(ts, "hellotellus", 0, 10);
+            AssertNext(ts, "greetingstellus", 0, 10);
+            Assert.IsFalse(ts.IncrementToken());
+
+
+            // add ^_prefix_and_suffix_$
+            //
+            // using 3d codec as it supports weights
+
+            ShingleMatrixFilter.DefaultSettingsCodec =
+                new SimpleThreeDimensionalTokenSettingsCodec();
+
+            tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("greetings", 0, 1f, 0, 4, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("earth", 0, 1f, 5, 10, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("tellus", 0, 1f, 5, 10, TokenPositioner.NewRow));
+
+            tls = new TokenListStream(tokens);
+
+            // bi-grams, position incrememnt, weight, start offset, end offset
+
+            ts = new PrefixAndSuffixAwareTokenFilter(
+                new SingleTokenTokenStream(TokenFactory("^", 1, 100f, 0, 0)),
+                tls,
+                new SingleTokenTokenStream(TokenFactory("$", 1, 50f, 0, 0))
+                );
+            tls = new CachingTokenFilter(ts);
+
+            ts = new ShingleMatrixFilter(tls, 2, 2, '_', false);
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
{
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement()
+ ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString())
+ "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test unlimited size and allow single boundary token as shingle
+            tls.Reset();
+
+            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', false);
+
+
+            //for (Token token = ts.Next(new Token()); token != null; token = ts.Next(token))
+            //{
+            //    Console.Out.WriteLine("AssertNext(ts, \"" + token.Term() + "\", " + token.GetPositionIncrement()
+ ", " + (token.GetPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.GetPayload().GetData()).ToString())
+ "f, " + token.StartOffset() + ", " + token.EndOffset() + ");");
+            //    token.Clear();
+            //}
+
+            AssertNext(ts, "^", 1, 10.0f, 0, 0);
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "world", 1, 1.0f, 5, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "$", 1, 7.071068f, 10, 10);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            // test unlimited size but don't allow single boundary token as shingle
+
+            tls.Reset();
+            ts = new ShingleMatrixFilter(tls, 1, Int32.MaxValue, '_', true);
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token))
{
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement()
+ ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData()))
+ "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            AssertNext(ts, "^_hello", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_hello_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello", 1, 1.0f, 0, 4);
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "world", 1, 1.0f, 5, 10);
+            AssertNext(ts, "world_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings", 1, 10.049875f, 0, 4);
+            AssertNext(ts, "^_greetings_world", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_world_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings", 1, 1.0f, 0, 4);
+            AssertNext(ts, "greetings_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_world_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "earth", 1, 1.0f, 5, 10);
+            AssertNext(ts, "earth_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_earth", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_earth_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_earth_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "^_hello_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_hello_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus_$", 1, 7.2111025f, 0, 10);
+            AssertNext(ts, "tellus", 1, 1.0f, 5, 10);
+            AssertNext(ts, "tellus_$", 1, 7.1414285f, 5, 10);
+            AssertNext(ts, "^_greetings_tellus", 1, 10.099504f, 0, 10);
+            AssertNext(ts, "^_greetings_tellus_$", 1, 12.328828f, 0, 10);
+            AssertNext(ts, "greetings_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_tellus_$", 1, 7.2111025f, 0, 10);
+
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            //System.currentTimeMillis();
+
+            // multi-token synonyms
+            //
+            // Token[][][] {
+            //    {{hello}, {greetings, and, salutations},
+            //    {{world}, {earth}, {tellus}}
+            // }
+            //
+
+
+            tokens = new LinkedList<Token>();
+            tokens.AddLast(TokenFactory("hello", 1, 1f, 0, 4, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("greetings", 1, 1f, 0, 4, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("and", 1, 1f, 0, 4, TokenPositioner.SameRow));
+            tokens.AddLast(TokenFactory("salutations", 1, 1f, 0, 4, TokenPositioner.SameRow));
+            tokens.AddLast(TokenFactory("world", 1, 1f, 5, 10, TokenPositioner.NewColumn));
+            tokens.AddLast(TokenFactory("earth", 1, 1f, 5, 10, TokenPositioner.NewRow));
+            tokens.AddLast(TokenFactory("tellus", 1, 1f, 5, 10, TokenPositioner.NewRow));
+
+            tls = new TokenListStream(tokens);
+
+            // 2-3 grams
+
+            ts = new ShingleMatrixFilter(tls, 2, 3, '_', false);
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token))
{
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement()
+ ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData()))
+ "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            // shingle, position increment, weight, start offset, end offset
+
+            AssertNext(ts, "hello_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "greetings_and", 1, 1.4142135f, 0, 4);
+            AssertNext(ts, "greetings_and_salutations", 1, 1.7320508f, 0, 4);
+            AssertNext(ts, "and_salutations", 1, 1.4142135f, 0, 4);
+            AssertNext(ts, "and_salutations_world", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_world", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "and_salutations_earth", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_earth", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "hello_tellus", 1, 1.4142135f, 0, 10);
+            AssertNext(ts, "and_salutations_tellus", 1, 1.7320508f, 0, 10);
+            AssertNext(ts, "salutations_tellus", 1, 1.4142135f, 0, 10);
+
+            Assert.IsFalse(ts.IncrementToken());
+
+            //System.currentTimeMillis();
+        }
+
+        /// <summary>
+        /// Tests creat shingles from a pre-assembled matrix
+        /// 
+        /// Tests the row token z-axis, multi token synonyms. 
+        /// </summary>
+        [Test]
+        public void TestMatrix()
+        {
+            // some other tests set this to null.
+            // set it here in case tests are run out of the usual order.
+            ShingleMatrixFilter.DefaultSettingsCodec = new SimpleThreeDimensionalTokenSettingsCodec();
+
+            var matrix = new Matrix.Matrix();
+
+            new Column(TokenFactory("no", 1), matrix);
+            new Column(TokenFactory("surprise", 1), matrix);
+            new Column(TokenFactory("to", 1), matrix);
+            new Column(TokenFactory("see", 1), matrix);
+            new Column(TokenFactory("england", 1), matrix);
+            new Column(TokenFactory("manager", 1), matrix);
+
+            var col = new Column(matrix);
+
+            // sven göran eriksson is a multi token synonym to svennis
+            new Row(col).Tokens.AddLast(TokenFactory("svennis", 1));
+
+            var row = new Row(col);
+            row.Tokens.AddLast(TokenFactory("sven", 1));
+            row.Tokens.AddLast(TokenFactory("göran", 1));
+            row.Tokens.AddLast(TokenFactory("eriksson", 1));
+
+            new Column(TokenFactory("in", 1), matrix);
+            new Column(TokenFactory("the", 1), matrix);
+            new Column(TokenFactory("croud", 1), matrix);
+
+            TokenStream ts = new ShingleMatrixFilter(matrix, 2, 4, '_', true,
+                                                     new SimpleThreeDimensionalTokenSettingsCodec());
+
+            //  for (Token token = ts.next(new Token()); token != null; token = ts.next(token))
{
+            //      System.out.println("assertNext(ts, \"" + token.term() + "\", " + token.getPositionIncrement()
+ ", " + (token.getPayload() == null ? "1.0" : PayloadHelper.decodeFloat(token.getPayload().getData()))
+ "f, " + token.startOffset() + ", " + token.endOffset() + ");");
+            //      token.clear();
+            //    }
+
+            AssertNext(ts, "no_surprise", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "no_surprise_to", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "no_surprise_to_see", 1, 2.0f, 0, 0);
+            AssertNext(ts, "surprise_to", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "surprise_to_see", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "surprise_to_see_england", 1, 2.0f, 0, 0);
+            AssertNext(ts, "to_see", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "to_see_england", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "to_see_england_manager", 1, 2.0f, 0, 0);
+            AssertNext(ts, "see_england", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "see_england_manager", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "see_england_manager_svennis", 1, 2.0f, 0, 0);
+            AssertNext(ts, "england_manager", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "england_manager_svennis", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "england_manager_svennis_in", 1, 2.0f, 0, 0);
+            AssertNext(ts, "manager_svennis", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "manager_svennis_in", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "manager_svennis_in_the", 1, 2.0f, 0, 0);
+            AssertNext(ts, "svennis_in", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "svennis_in_the", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "svennis_in_the_croud", 1, 2.0f, 0, 0);
+            AssertNext(ts, "in_the", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "in_the_croud", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "the_croud", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "see_england_manager_sven", 1, 2.0f, 0, 0);
+            AssertNext(ts, "england_manager_sven", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "england_manager_sven_göran", 1, 2.0f, 0, 0);
+            AssertNext(ts, "manager_sven", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "manager_sven_göran", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "manager_sven_göran_eriksson", 1, 2.0f, 0, 0);
+            AssertNext(ts, "sven_göran", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "sven_göran_eriksson", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "sven_göran_eriksson_in", 1, 2.0f, 0, 0);
+            AssertNext(ts, "göran_eriksson", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "göran_eriksson_in", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "göran_eriksson_in_the", 1, 2.0f, 0, 0);
+            AssertNext(ts, "eriksson_in", 1, 1.4142135f, 0, 0);
+            AssertNext(ts, "eriksson_in_the", 1, 1.7320508f, 0, 0);
+            AssertNext(ts, "eriksson_in_the_croud", 1, 2.0f, 0, 0);
+
+            Assert.IsFalse(ts.IncrementToken());
+        }
+
+        private static Token TokenFactory(String text, int posIncr, int startOffset, int
endOffset)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            return token;
+        }
+        
+        private static Token TokenFactory(String text, int posIncr)
+        {
+            return TokenFactory(text, posIncr, 1f, 0, 0);
+        }
+
+        private static Token TokenFactory(String text, int posIncr, float weight, int startOffset,
int endOffset)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight);
+
+            return token;
+        }
+
+        private static Token TokenFactory(String text, int posIncr, float weight, int startOffset,
int endOffset,
+                                          TokenPositioner positioner)
+        {
+            var token = new Token(startOffset, endOffset);
+
+            token.SetTermBuffer(text);
+            token.SetPositionIncrement(posIncr);
+
+            ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight);
+            ShingleMatrixFilter.DefaultSettingsCodec.SetTokenPositioner(token, positioner);
+
+            return token;
+        }
+
+        // assert-methods start here
+
+        private static void AssertNext(TokenStream ts, String text)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+        }
+
+        private static void AssertNext(TokenStream ts, String text, int positionIncrement,
float boost, int startOffset,
+                                       int endOffset)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+            var posIncrAtt = (PositionIncrementAttribute) ts.AddAttribute(typeof (PositionIncrementAttribute));
+            var payloadAtt = (PayloadAttribute) ts.AddAttribute(typeof (PayloadAttribute));
+            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+            Assert.AreEqual(positionIncrement, posIncrAtt.GetPositionIncrement());
+            Assert.AreEqual(boost,
+                            payloadAtt.GetPayload() == null
+                                ? 1f
+                                : PayloadHelper.DecodeFloat(payloadAtt.GetPayload().GetData()),
0);
+            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
+            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
+        }
+
+        private static void AssertNext(TokenStream ts, String text, int startOffset, int
endOffset)
+        {
+            var termAtt = (TermAttribute) ts.AddAttribute(typeof (TermAttribute));
+            var offsetAtt = (OffsetAttribute) ts.AddAttribute(typeof (OffsetAttribute));
+
+            Assert.IsTrue(ts.IncrementToken());
+            Assert.AreEqual(text, termAtt.Term());
+            Assert.AreEqual(startOffset, offsetAtt.StartOffset());
+            Assert.AreEqual(endOffset, offsetAtt.EndOffset());
+        }
+
+        private static Token CreateToken(String term, int start, int offset)
+        {
+            var token = new Token(start, offset);
+            token.SetTermBuffer(term);
+            return token;
+        }
+
+        #region Nested type: TokenListStream
+
+        public sealed class TokenListStream : TokenStream
+        {
+            private readonly FlagsAttribute _flagsAtt;
+            private readonly OffsetAttribute _offsetAtt;
+            private readonly PayloadAttribute _payloadAtt;
+            private readonly PositionIncrementAttribute _posIncrAtt;
+            private readonly TermAttribute _termAtt;
+            private readonly ICollection<Token> _tokens;
+            private readonly TypeAttribute _typeAtt;
+
+            private IEnumerator<Token> _iterator;
+
+            public TokenListStream(ICollection<Token> tokens)
+            {
+                _tokens = tokens;
+                _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+                _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+                _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
+                _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+                _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+                _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            }
+
+            public override bool IncrementToken()
+            {
+                if (_iterator == null)
+                    _iterator = _tokens.GetEnumerator();
+
+                if (!_iterator.MoveNext())
+                    return false;
+
+                Token prototype = _iterator.Current;
+
+                ClearAttributes();
+
+                _termAtt.SetTermBuffer(prototype.TermBuffer(), 0, prototype.TermLength());
+                _posIncrAtt.SetPositionIncrement(prototype.GetPositionIncrement());
+                _flagsAtt.SetFlags(prototype.GetFlags());
+                _offsetAtt.SetOffset(prototype.StartOffset(), prototype.EndOffset());
+                _typeAtt.SetType(prototype.Type());
+                _payloadAtt.SetPayload(prototype.GetPayload());
+
+                return true;
+            }
+
+
+            public override void Reset()
+            {
+                _iterator = null;
+            }
+        }
+
+        #endregion
+    }
+}
\ No newline at end of file



Mime
View raw message