lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject [Lucene.Net] svn commit: r1147678 [1/2] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers: ./ AR/ Filters/ Miscellaneous/ Payloads/ Properties/ Shingle/ Shingle/Codec/ Shingle/Matrix/
Date Sun, 17 Jul 2011 16:31:31 GMT
Author: digy
Date: Sun Jul 17 16:31:29 2011
New Revision: 1147678

URL: http://svn.apache.org/viewvc?rev=1147678&view=rev
Log:
[LUCENENET-437] for 2.9.4g

Added:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/TokenPositioner.cs
Removed:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Filters/
Modified:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs Sun Jul 17 16:31:29 2011
@@ -80,7 +80,7 @@ namespace Lucene.Net.Analysis.AR
         {
             this.matchVersion = matchVersion;
 
-            using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analyzers.AR." + DEFAULT_STOPWORD_FILE)))
+            using (StreamReader reader = new StreamReader(System.Reflection.Assembly.GetAssembly(this.GetType()).GetManifestResourceStream("Lucene.Net.Analysis.AR." + DEFAULT_STOPWORD_FILE)))
             {
                 while (!reader.EndOfStream)
                 {

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj Sun Jul 17 16:31:29 2011
@@ -8,7 +8,7 @@
     <ProjectGuid>{4286E961-9143-4821-B46D-3D39D3736386}</ProjectGuid>
     <OutputType>Library</OutputType>
     <AppDesignerFolder>Properties</AppDesignerFolder>
-    <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
+    <RootNamespace>Lucene.Net.Analysis</RootNamespace>
     <AssemblyName>Lucene.Net.Analyzers</AssemblyName>
     <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
     <FileAlignment>512</FileAlignment>
@@ -22,7 +22,7 @@
     <DebugSymbols>true</DebugSymbols>
     <DebugType>full</DebugType>
     <Optimize>false</Optimize>
-    <OutputPath>..\..\..\bin\contrib\Analyzers\Debug\</OutputPath>
+    <OutputPath>..\..\..\bin\contrib\Analyzers\</OutputPath>
     <DefineConstants>DEBUG;TRACE</DefineConstants>
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
@@ -60,10 +60,15 @@
     <Compile Include="De\GermanStemFilter.cs" />
     <Compile Include="De\GermanStemmer.cs" />
     <Compile Include="De\WordlistLoader.cs" />
-    <Compile Include="Filters\ChainedFilter.cs" />
+    <Compile Include="Miscellaneous\ChainedFilter.cs" />
     <Compile Include="Fr\FrenchAnalyzer.cs" />
     <Compile Include="Fr\FrenchStemFilter.cs" />
     <Compile Include="Fr\FrenchStemmer.cs" />
+    <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
+    <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
+    <Compile Include="Miscellaneous\PrefixAndSuffixAwareTokenFilter.cs" />
+    <Compile Include="Miscellaneous\PrefixAwareTokenStream.cs" />
+    <Compile Include="Miscellaneous\SingleTokenTokenStream.cs" />
     <Compile Include="NGram\EdgeNGramTokenFilter.cs" />
     <Compile Include="NGram\EdgeNGramTokenizer.cs" />
     <Compile Include="NGram\NGramTokenFilter.cs" />
@@ -72,6 +77,7 @@
     <Compile Include="Nl\DutchStemFilter.cs" />
     <Compile Include="Nl\DutchStemmer.cs" />
     <Compile Include="Nl\WordlistLoader.cs" />
+    <Compile Include="Payloads\PayloadHelper.cs" />
     <Compile Include="Ru\RussianAnalyzer.cs" />
     <Compile Include="Ru\RussianCharsets.cs" />
     <Compile Include="Ru\RussianLetterTokenizer.cs" />
@@ -79,6 +85,18 @@
     <Compile Include="Ru\RussianStemFilter.cs" />
     <Compile Include="Ru\RussianStemmer.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Shingle\Codec\OneDimensionalNonWeightedTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\SimpleThreeDimensionalTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\TokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Codec\TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs" />
+    <Compile Include="Shingle\Matrix\Column.cs" />
+    <Compile Include="Shingle\Matrix\Matrix.cs" />
+    <Compile Include="Shingle\Matrix\MatrixPermutationIterator.cs" />
+    <Compile Include="Shingle\Matrix\Row.cs" />
+    <Compile Include="Shingle\ShingleAnalyzerWrapper.cs" />
+    <Compile Include="Shingle\ShingleFilter.cs" />
+    <Compile Include="Shingle\ShingleMatrixFilter.cs" />
+    <Compile Include="Shingle\TokenPositioner.cs" />
     <Compile Include="WordlistLoader.cs" />
   </ItemGroup>
   <ItemGroup>
@@ -90,6 +108,7 @@
       <Name>Lucene.Net</Name>
     </ProjectReference>
   </ItemGroup>
+  <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/ChainedFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+
+    ///<summary>
+    ///* <p>
+    /// * Allows multiple {@link Filter}s to be chained.
+    /// * Logical operations such as <b>NOT</b> and <b>XOR</b>
+    /// * are applied between filters. One operation can be used
+    /// * for all filters, or a specific operation can be declared
+    /// * for each filter.
+    /// * </p>
+    /// * <p>
+    /// * Order in which filters are called depends on
+    /// * the position of the filter in the chain. It's probably
+    /// * more efficient to place the most restrictive filters
+    /// * /least computationally-intensive filters first.
+    /// * </p>
+    ///</summary>
+    public class ChainedFilter : Filter
+    {
+        public enum Logic
+        {
+            NONE = -1,
+            OR = 0,
+            AND = 1,
+            ANDNOT = 2,
+            XOR = 3
+        };
+
+        ///<summary>Logical operation when none is declared. Defaults to OR</summary>
+        public const Logic DEFAULT = Logic.OR;
+
+        /** The filter chain */
+        private Filter[] chain = null;
+
+        private Logic[] logicArray;
+
+        private Logic logic = Logic.NONE;
+
+        ///<summary>Ctor</summary><param name="chain">The chain of filters</param>
+        public ChainedFilter(Filter[] chain)
+        {
+            this.chain = chain;
+        }
+
+        ///<summary>ctor</summary>
+        ///<param name="chain">The chain of filters</param>
+        ///<param name="logicArray">Logical operations to apply between filters</param>
+        public ChainedFilter(Filter[] chain, Logic[] logicArray)
+        {
+            this.chain = chain;
+            this.logicArray = logicArray;
+        }
+
+        ///<summary>ctor</summary>
+        ///<param name="chain">The chain of filters</param>
+        ///<param name="logic">Logical operation to apply to ALL filters</param>
+        public ChainedFilter(Filter[] chain, Logic logic)
+        {
+            this.chain = chain;
+            this.logic = logic;
+        }
+
+        ///<see cref="Filter#getDocIdSet"/>
+        public override DocIdSet GetDocIdSet(IndexReader reader)
+        {
+            int[] index = new int[1]; // use array as reference to modifiable int; 
+            index[0] = 0;             // an object attribute would not be thread safe.
+            if (logic != Logic.NONE)
+                return GetDocIdSet(reader, logic, index);
+            else if (logicArray != null)
+                return GetDocIdSet(reader, logicArray, index);
+            else
+                return GetDocIdSet(reader, DEFAULT, index);
+        }
+
+        private DocIdSetIterator GetDISI(Filter filter, IndexReader reader)
+        {
+            DocIdSet docIdSet = filter.GetDocIdSet(reader);
+            if (docIdSet == null)
+            {
+                return DocIdSet.EMPTY_DOCIDSET.Iterator();
+            }
+            else
+            {
+                DocIdSetIterator iter = docIdSet.Iterator();
+                if (iter == null)
+                {
+                    return DocIdSet.EMPTY_DOCIDSET.Iterator();
+                }
+                else
+                {
+                    return iter;
+                }
+            }
+        }
+
+        private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index)
+        {
+            OpenBitSetDISI result;
+            /**
+             * First AND operation takes place against a completely false
+             * bitset and will always return zero results.
+             */
+            if (logic == Logic.AND)
+            {
+                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+                ++index[0];
+            }
+            else if (logic == Logic.ANDNOT)
+            {
+                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
+                result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs.
+                ++index[0];
+            }
+            else
+            {
+                result = new OpenBitSetDISI(reader.MaxDoc());
+            }
+            return result;
+        }
+
+
+        ///<summary>
+        ///  * Provide a SortedVIntList when it is definitely
+        ///  * smaller than an OpenBitSet
+        ///  * @deprecated Either use CachingWrapperFilter, or
+        ///  * switch to a different DocIdSet implementation yourself.
+        ///  * This method will be removed in Lucene 4.0 
+        ///</summary>
+        protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
+        {
+            return result;
+        }
+
+
+        /**
+         * Delegates to each filter in the chain.
+         * @param reader IndexReader
+         * @param logic Logical operation
+         * @return DocIdSet
+         */
+        private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index)
+        {
+            OpenBitSetDISI result = InitialResult(reader, logic, index);
+            for (; index[0] < chain.Length; index[0]++)
+            {
+                DoChain(result, logic, chain[index[0]].GetDocIdSet(reader));
+            }
+            return FinalResult(result, reader.MaxDoc());
+        }
+
+        /**
+         * Delegates to each filter in the chain.
+         * @param reader IndexReader
+         * @param logic Logical operation
+         * @return DocIdSet
+         */
+        private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index)
+        {
+            if (logic.Length != chain.Length)
+                throw new ArgumentException("Invalid number of elements in logic array");
+
+            OpenBitSetDISI result = InitialResult(reader, logic[0], index);
+            for (; index[0] < chain.Length; index[0]++)
+            {
+                DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader));
+            }
+            return FinalResult(result, reader.MaxDoc());
+        }
+
+        public override String ToString()
+        {
+            StringBuilder sb = new StringBuilder();
+            sb.Append("ChainedFilter: [");
+            for (int i = 0; i < chain.Length; i++)
+            {
+                sb.Append(chain[i]);
+                sb.Append(' ');
+            }
+            sb.Append(']');
+            return sb.ToString();
+        }
+
+        private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis)
+        {
+
+            if (dis is OpenBitSet)
+            {
+                // optimized case for OpenBitSets
+                switch (logic)
+                {
+                    case Logic.OR:
+                        result.Or((OpenBitSet)dis);
+                        break;
+                    case Logic.AND:
+                        result.And((OpenBitSet)dis);
+                        break;
+                    case Logic.ANDNOT:
+                        result.AndNot((OpenBitSet)dis);
+                        break;
+                    case Logic.XOR:
+                        result.Xor((OpenBitSet)dis);
+                        break;
+                    default:
+                        DoChain(result, DEFAULT, dis);
+                        break;
+                }
+            }
+            else
+            {
+                DocIdSetIterator disi;
+                if (dis == null)
+                {
+                    disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+                }
+                else
+                {
+                    disi = dis.Iterator();
+                    if (disi == null)
+                    {
+                        disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
+                    }
+                }
+
+                switch (logic)
+                {
+                    case Logic.OR:
+                        result.InPlaceOr(disi);
+                        break;
+                    case Logic.AND:
+                        result.InPlaceAnd(disi);
+                        break;
+                    case Logic.ANDNOT:
+                        result.InPlaceNot(disi);
+                        break;
+                    case Logic.XOR:
+                        result.InPlaceXor(disi);
+                        break;
+                    default:
+                        DoChain(result, DEFAULT, dis);
+                        break;
+                }
+            }
+        }
+
+    }
+
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    public class EmptyTokenStream : TokenStream
+    {
+        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+        public override Token Next(Token reusableToken)
+        {
+            return null;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    public class InjectablePrefixAwareTokenFilter : PrefixAwareTokenFilter
+    {
+        public InjectablePrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+        {
+        }
+
+        public Func<Token, Token, Token> UpdateAction { get; set; }
+
+        public override Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+        {
+            return UpdateAction(suffixToken, lastPrefixToken);
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /// <summary>
+    /// Links two PrefixAwareTokenFilter.
+    /// <p/>
+    /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+    /// the ones located in Lucene.Net.Analysis.Tokenattributes.  
+    /// </summary>
+    public class PrefixAndSuffixAwareTokenFilter : TokenStream
+    {
+        private readonly PrefixAwareTokenFilter _suffix;
+
+        public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+        {
+            _suffix =
+                new InjectablePrefixAwareTokenFilter(
+                    new InjectablePrefixAwareTokenFilter(prefix, input)
+                        {
+                            UpdateAction = UpdateInputToken
+                        },
+                    suffix)
+                    {
+                        UpdateAction = UpdateSuffixToken
+                    };
+        }
+
+        public Token UpdateInputToken(Token inputToken, Token lastPrefixToken)
+        {
+            inputToken.SetStartOffset(lastPrefixToken.EndOffset() + inputToken.StartOffset());
+            inputToken.SetEndOffset(lastPrefixToken.EndOffset() + inputToken.EndOffset());
+            return inputToken;
+        }
+
+        public Token UpdateSuffixToken(Token suffixToken, Token lastInputToken)
+        {
+            suffixToken.SetStartOffset(lastInputToken.EndOffset() + suffixToken.StartOffset());
+            suffixToken.SetEndOffset(lastInputToken.EndOffset() + suffixToken.EndOffset());
+            return suffixToken;
+        }
+
+
+        public override sealed bool IncrementToken()
+        {
+            return _suffix.IncrementToken();
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
+        /// </summary>
+        /// <param name="reusableToken"></param>
+        /// <returns></returns>
+        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+        public override sealed Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
+        /// </summary>
+        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+        public override sealed Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset()
+        {
+            _suffix.Reset();
+        }
+
+
+        public override void Close()
+        {
+            _suffix.Close();
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Index;
+using FlagsAttribute = Lucene.Net.Analysis.Tokenattributes.FlagsAttribute;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /// <summary>
+    /// Joins two token streams and leaves the last token of the first stream available
+    /// to be used when updating the token values in the second stream based on that token.
+    /// 
+    /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+    /// <p/>
+    /// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+    /// the ones located in Lucene.Net.Analysis.TokenAttributes. 
+    /// </summary>
+    public class PrefixAwareTokenFilter : TokenStream
+    {
+        private readonly FlagsAttribute _flagsAtt;
+        private readonly OffsetAttribute _offsetAtt;
+        private readonly FlagsAttribute _pFlagsAtt;
+
+        private readonly OffsetAttribute _pOffsetAtt;
+        private readonly PayloadAttribute _pPayloadAtt;
+        private readonly PositionIncrementAttribute _pPosIncrAtt;
+        private readonly TermAttribute _pTermAtt;
+        private readonly TypeAttribute _pTypeAtt;
+        private readonly PayloadAttribute _payloadAtt;
+        private readonly PositionIncrementAttribute _posIncrAtt;
+
+        private readonly Token _previousPrefixToken = new Token();
+        private readonly Token _reusableToken = new Token();
+        private readonly TermAttribute _termAtt;
+        private readonly TypeAttribute _typeAtt;
+
+        private bool _prefixExhausted;
+
+        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+        {
+            Suffix = suffix;
+            Prefix = prefix;
+            _prefixExhausted = false;
+
+            // ReSharper disable DoNotCallOverridableMethodsInConstructor
+            _termAtt = (TermAttribute) AddAttribute(typeof (TermAttribute));
+            _posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof (PositionIncrementAttribute));
+            _payloadAtt = (PayloadAttribute) AddAttribute(typeof (PayloadAttribute));
+            _offsetAtt = (OffsetAttribute) AddAttribute(typeof (OffsetAttribute));
+            _typeAtt = (TypeAttribute) AddAttribute(typeof (TypeAttribute));
+            _flagsAtt = (FlagsAttribute) AddAttribute(typeof (FlagsAttribute));
+            // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+            _pTermAtt = (TermAttribute) prefix.AddAttribute(typeof (TermAttribute));
+            _pPosIncrAtt = (PositionIncrementAttribute) prefix.AddAttribute(typeof (PositionIncrementAttribute));
+            _pPayloadAtt = (PayloadAttribute) prefix.AddAttribute(typeof (PayloadAttribute));
+            _pOffsetAtt = (OffsetAttribute) prefix.AddAttribute(typeof (OffsetAttribute));
+            _pTypeAtt = (TypeAttribute) prefix.AddAttribute(typeof (TypeAttribute));
+            _pFlagsAtt = (FlagsAttribute) prefix.AddAttribute(typeof (FlagsAttribute));
+        }
+
+        public TokenStream Prefix { get; set; }
+
+        public TokenStream Suffix { get; set; }
+
+        public override sealed bool IncrementToken()
+        {
+            if (!_prefixExhausted)
+            {
+                Token nextToken = GetNextPrefixInputToken(_reusableToken);
+                if (nextToken == null)
+                {
+                    _prefixExhausted = true;
+                }
+                else
+                {
+                    _previousPrefixToken.Reinit(nextToken);
+                    // Make it a deep copy
+                    Payload p = _previousPrefixToken.GetPayload();
+                    if (p != null)
+                    {
+                        _previousPrefixToken.SetPayload((Payload) p.Clone());
+                    }
+                    SetCurrentToken(nextToken);
+                    return true;
+                }
+            }
+
+            Token nextSuffixToken = GetNextSuffixInputToken(_reusableToken);
+            if (nextSuffixToken == null)
+            {
+                return false;
+            }
+
+            nextSuffixToken = UpdateSuffixToken(nextSuffixToken, _previousPrefixToken);
+            SetCurrentToken(nextSuffixToken);
+            return true;
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+        /// </summary>
+        /// <param name="reusableToken"></param>
+        /// <returns></returns>
+        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+        public override sealed Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+        /// </summary>
+        /// <returns></returns>
+        [Obsolete("The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API.")]
+        public override sealed Token Next()
+        {
+            return base.Next();
+        }
+
+        private void SetCurrentToken(Token token)
+        {
+            if (token == null) return;
+            ClearAttributes();
+            _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
+            _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
+            _flagsAtt.SetFlags(token.GetFlags());
+            _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
+            _typeAtt.SetType(token.Type());
+            _payloadAtt.SetPayload(token.GetPayload());
+        }
+
+        private Token GetNextPrefixInputToken(Token token)
+        {
+            if (!Prefix.IncrementToken()) return null;
+            token.SetTermBuffer(_pTermAtt.TermBuffer(), 0, _pTermAtt.TermLength());
+            token.SetPositionIncrement(_pPosIncrAtt.GetPositionIncrement());
+            token.SetFlags(_pFlagsAtt.GetFlags());
+            token.SetOffset(_pOffsetAtt.StartOffset(), _pOffsetAtt.EndOffset());
+            token.SetType(_pTypeAtt.Type());
+            token.SetPayload(_pPayloadAtt.GetPayload());
+            return token;
+        }
+
+        private Token GetNextSuffixInputToken(Token token)
+        {
+            if (!Suffix.IncrementToken()) return null;
+            token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
+            token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement());
+            token.SetFlags(_flagsAtt.GetFlags());
+            token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset());
+            token.SetType(_typeAtt.Type());
+            token.SetPayload(_payloadAtt.GetPayload());
+            return token;
+        }
+
+        /// <summary>
+        /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+        /// </summary>
+        /// <param name="suffixToken">a token from the suffix stream</param>
+        /// <param name="lastPrefixToken">the last token from the prefix stream</param>
+        /// <returns>consumer token</returns>
+        public virtual Token UpdateSuffixToken(Token suffixToken, Token lastPrefixToken)
+        {
+            suffixToken.SetStartOffset(lastPrefixToken.EndOffset() + suffixToken.StartOffset());
+            suffixToken.SetEndOffset(lastPrefixToken.EndOffset() + suffixToken.EndOffset());
+            return suffixToken;
+        }
+
+        public override void Close()
+        {
+            Prefix.Close();
+            Suffix.Close();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+
+            if (Prefix != null)
+            {
+                _prefixExhausted = false;
+                Prefix.Reset();
+            }
+
+            if (Suffix != null)
+                Suffix.Reset();
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Diagnostics;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+    /// <summary>
+    /// A TokenStream containing a single token.
+    /// </summary>
+    public class SingleTokenTokenStream : TokenStream
+    {
+        private readonly AttributeImpl _tokenAtt;
+        private bool _exhausted;
+
+        // The token needs to be immutable, so work with clones!
+        private Token _singleToken;
+
+        public SingleTokenTokenStream(Token token)
+        {
+            Debug.Assert(token != null, "Token was null!");
+            _singleToken = (Token) token.Clone();
+
+            // ReSharper disable DoNotCallOverridableMethodsInConstructor
+            _tokenAtt = (AttributeImpl) AddAttribute(typeof (TermAttribute));
+            // ReSharper restore DoNotCallOverridableMethodsInConstructor
+
+            Debug.Assert(_tokenAtt is Token || _tokenAtt.GetType().Name.Equals(typeof (TokenWrapper).Name),
+                         "Token Attribute is the wrong type! Type was: " + _tokenAtt.GetType().Name + " but expected " +
+                         typeof (TokenWrapper).Name);
+        }
+
+        public override sealed bool IncrementToken()
+        {
+            if (_exhausted)
+                return false;
+
+            ClearAttributes();
+            _singleToken.CopyTo(_tokenAtt);
+            _exhausted = true;
+
+            return true;
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.
+        /// </summary>
+        /// <param name="reusableToken"></param>
+        /// <returns></returns>
+        [Obsolete("The new IncrementToken() and AttributeSource APIs should be used instead.")]
+        public override sealed Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /// <summary>
+        /// @deprecated Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer. 
+        /// </summary>
+        /// <returns></returns>
+        [Obsolete(
+            "The returned Token is a \"full private copy\" (not re-used across calls to Next()) but will be slower than calling {@link #Next(Token)} or using the new IncrementToken() method with the new AttributeSource API."
+            )]
+        public override sealed Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset()
+        {
+            _exhausted = false;
+        }
+
+        public Token GetToken()
+        {
+            return (Token) _singleToken.Clone();
+        }
+
+        public void SetToken(Token token)
+        {
+            _singleToken = (Token) token.Clone();
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Payloads/PayloadHelper.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Payloads
+{
+    /// <summary>
+    /// Utility methods for encoding payloads.
+    /// </summary>
+    public static class PayloadHelper
+    {
+        public static byte[] EncodeFloat(float payload)
+        {
+            return EncodeFloat(payload, new byte[4], 0);
+        }
+
+        public static byte[] EncodeFloat(float payload, byte[] data, int offset)
+        {
+            return EncodeInt(FloatToIntBits(payload), data, offset);
+        }
+
+        public static byte[] EncodeInt(int payload)
+        {
+            return EncodeInt(payload, new byte[4], 0);
+        }
+
+        public static byte[] EncodeInt(int payload, byte[] data, int offset)
+        {
+            data[offset] = (byte) (payload >> 24);
+            data[offset + 1] = (byte) (payload >> 16);
+            data[offset + 2] = (byte) (payload >> 8);
+            data[offset + 3] = (byte) payload;
+            return data;
+        }
+
+        /// <summary>
+        /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+        /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+        /// </summary>
+        /// <param name="bytes">The bytes to decode</param>
+        /// <returns>the decoded float</returns>
+        public static float DecodeFloat(byte[] bytes)
+        {
+            return DecodeFloat(bytes, 0);
+        }
+
+        /// <summary>
+        /// <p>Decode the payload that was encoded using encodeFloat(float)</p>
+        /// <p>NOTE: the length of the array must be at least offset + 4 long.</p>
+        /// </summary>
+        /// <param name="bytes">The bytes to decode</param>
+        /// <param name="offset">The offset into the array.</param>
+        /// <returns>The float that was encoded</returns>
+        public static float DecodeFloat(byte[] bytes, int offset)
+        {
+            return IntBitsToFloat(DecodeInt(bytes, offset));
+        }
+
+        public static int DecodeInt(byte[] bytes, int offset)
+        {
+            return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) << 16)
+                   | ((bytes[offset + 2] & 0xFF) << 8) | (bytes[offset + 3] & 0xFF);
+        }
+
+        
+        static int FloatToIntBits(float value)
+        {
+            return BitConverter.ToInt32(BitConverter.GetBytes(value), 0);
+        }
+
+        static float IntBitsToFloat(int value)
+        {
+            return BitConverter.ToSingle(BitConverter.GetBytes(value), 0);
+        }
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs?rev=1147678&r1=1147677&r2=1147678&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Properties/AssemblyInfo.cs Sun Jul 17 16:31:29 2011
@@ -5,11 +5,11 @@ using System.Runtime.InteropServices;
 // General Information about an assembly is controlled through the following 
 // set of attributes. Change these attribute values to modify the information
 // associated with an assembly.
-[assembly: AssemblyTitle("Lucene.Net.Contrib.Analyzers")]
+[assembly: AssemblyTitle("Lucene.Net.Analyzers")]
 [assembly: AssemblyDescription("")]
 [assembly: AssemblyConfiguration("")]
 [assembly: AssemblyCompany("The Apache Software Foundation")]
-[assembly: AssemblyProduct("Lucene.Net.Contrib.Analyzers")]
+[assembly: AssemblyProduct("Lucene.Net.Analyzers")]
 [assembly: AssemblyCopyright("Copyright 2006 - 2011 The Apache Software Foundation")]
 [assembly: AssemblyTrademark("Copyright 2006 - 2011 The Apache Software Foundation")]
 [assembly: AssemblyCulture("")]
@@ -32,5 +32,5 @@ using System.Runtime.InteropServices;
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("2.9.2.1")]
-[assembly: AssemblyFileVersion("2.9.2.1")]
+[assembly: AssemblyVersion("2.9.4.2")]
+[assembly: AssemblyFileVersion("2.9.4.2")]

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+    /// <summary>
+    /// Using this codec makes a ShingleMatrixFilter act like ShingleFilter.
+    /// It produces the most simple sort of shingles, ignoring token position increments, etc.
+    /// 
+    /// It adds each token as a new column.
+    /// </summary>
+    public class OneDimensionalNonWeightedTokenSettingsCodec : TokenSettingsCodec
+    {
+        public override TokenPositioner GetTokenPositioner(Token token)
+        {
+            return TokenPositioner.NewColumn;
+        }
+
+        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+        {
+        }
+
+        public override float GetWeight(Token token)
+        {
+            return 1f;
+        }
+
+        public override void SetWeight(Token token, float weight)
+        {
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Payloads;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+    /// <summary>
+    /// A full featured codec not to be used for something serious.
+    /// 
+    /// It takes complete control of
+    /// payload for weight
+    /// and the bit flags for positioning in the matrix.
+    /// 
+    /// Mainly exist for demonstrational purposes.
+    /// </summary>
+    public class SimpleThreeDimensionalTokenSettingsCodec : TokenSettingsCodec
+    {
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <param name="token"></param>
+        /// <returns>the token flags int value as TokenPosition</returns>
+        public override TokenPositioner GetTokenPositioner(Token token)
+        {
+            switch (token.GetFlags())
+            {
+                case 0:
+                    return TokenPositioner.NewColumn;
+                case 1:
+                    return TokenPositioner.NewRow;
+                case 2:
+                    return TokenPositioner.SameRow;
+            }
+            throw new IOException("Unknown matrix positioning of token " + token);
+        }
+
+        /// <summary>
+        /// Sets the TokenPositioner as token flags int value.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <param name="tokenPositioner"></param>
+        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+        {
+            token.SetFlags(tokenPositioner.Index);
+        }
+
+        /// <summary>
+        /// Returns a 32 bit float from the payload, or 1f it null.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <returns></returns>
+        public override float GetWeight(Token token)
+        {
+            if (token.GetPayload() == null || token.GetPayload().GetData() == null)
+                return 1f;
+
+            return PayloadHelper.DecodeFloat(token.GetPayload().GetData());
+        }
+
+        /// <summary>
+        /// Stores a 32 bit float in the payload, or set it to null if 1f;
+        /// </summary>
+        /// <param name="token"></param>
+        /// <param name="weight"></param>
+        public override void SetWeight(Token token, float weight)
+        {
+            token.SetPayload(
+                weight == 1f
+                    ? null
+                    : new Payload(PayloadHelper.EncodeFloat(weight))
+                );
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+    /// <summary>
+    /// Strategy used to code and decode meta data of the tokens from the input stream
+    /// regarding how to position the tokens in the matrix, set and retreive weight, etc.
+    /// </summary>
+    public abstract class TokenSettingsCodec
+    {
+        /// <summary>
+        /// Retrieves information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <returns></returns>
+        public abstract TokenPositioner GetTokenPositioner(Token token);
+
+        /// <summary>
+        /// Sets information on how a Token is to be inserted to a ShingleMatrixFilter.Matrix.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <param name="tokenPositioner"></param>
+        public abstract void SetTokenPositioner(Token token, TokenPositioner tokenPositioner);
+
+        /// <summary>
+        /// Have this method return 1f in order to 'disable' weights.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <returns></returns>
+        public abstract float GetWeight(Token token);
+
+        /// <summary>
+        /// Have this method do nothing in order to 'disable' weights.
+        /// </summary>
+        /// <param name="token"></param>
+        /// <param name="weight"></param>
+        public abstract void SetWeight(Token token, float weight);
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Codec
+{
+    /// <summary>
+    /// A codec that creates a two dimensional matrix
+    /// by treating tokens from the input stream with 0 position increment
+    /// as new rows to the current column.
+    /// </summary>
+    public class TwoDimensionalNonWeightedSynonymTokenSettingsCodec : TokenSettingsCodec
+    {
+        public override TokenPositioner GetTokenPositioner(Token token)
+        {
+            return
+                token.GetPositionIncrement() == 0
+                    ? TokenPositioner.NewRow
+                    : TokenPositioner.NewColumn;
+        }
+
+        public override void SetTokenPositioner(Token token, TokenPositioner tokenPositioner)
+        {
+            throw new NotSupportedException();
+        }
+
+        public override float GetWeight(Token token)
+        {
+            return 1f;
+        }
+
+        public override void SetWeight(Token token, float weight)
+        {
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Column.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+    public class Column
+    {
+        public Column(Token token, Matrix enclosingInstance)
+            : this(enclosingInstance)
+        {
+            var row = new Row(this);
+            row.Tokens.AddLast(token);
+        }
+
+        public Column(Matrix enclosingInstance)
+        {
+            Rows = new List<Row>();
+            Matrix = enclosingInstance;
+
+            lock (Matrix)
+            {
+                if (Matrix.Columns.Count == 0)
+                    IsFirst = true;
+            }
+            Matrix.Columns.Add(this);
+        }
+
+        public Matrix Matrix { get; private set; }
+
+        public List<Row> Rows { get; private set; }
+
+        public int Index
+        {
+            get { return Matrix.Columns.IndexOf(this); }
+        }
+
+        public bool IsFirst { get; set; }
+
+        public bool IsLast { get; set; }
+
+        public override String ToString()
+        {
+            return "Column{" +
+                   "first=" + IsFirst +
+                   ", last=" + IsLast +
+                   ", rows=" + Rows +
+                   '}';
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+    /// <summary>
+    /// A column focused matrix in three dimensions:
+    /// 
+    /// <pre>
+    /// Token[column][row][z-axis] {
+    ///     {{hello}, {greetings, and, salutations}},
+    ///     {{world}, {earth}, {tellus}}
+    /// };
+    /// </pre>
+    /// 
+    /// todo consider row groups
+    /// to indicate that shingles is only to contain permutations with texts in that same row group.
+    /// </summary>
+    public class Matrix
+    {
+        public Matrix()
+        {
+            Columns = new List<Column>();
+        }
+
+        public List<Column> Columns { get; private set; }
+
+        public MatrixPermutationIterator PermutationIterator()
+        {
+            return new MatrixPermutationIterator(this);
+        }
+
+        public override string ToString()
+        {
+            return "Matrix{" +
+                   "columns=" + Columns +
+                   '}';
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+    public class MatrixPermutationIterator
+    {
+        private readonly Matrix _enclosingInstance;
+
+        private readonly int[] _columnRowCounters;
+
+        public MatrixPermutationIterator(Matrix enclosingInstance)
+        {
+            _enclosingInstance = enclosingInstance;
+            _columnRowCounters = new int[_enclosingInstance.Columns.Count];
+        }
+
+        public bool HasNext()
+        {
+            var s = _columnRowCounters.Length;
+            var n = _enclosingInstance.Columns.Count;
+            return s != 0 && n >= s && _columnRowCounters[s - 1] < _enclosingInstance.Columns[s - 1].Rows.Count;
+        }
+
+        public Row[] Next()
+        {
+            if (!HasNext())
+                throw new Exception("no more elements");
+
+            var rows = new Row[_columnRowCounters.Length];
+
+            for (int i = 0; i < _columnRowCounters.Length; i++)
+            {
+                rows[i] = _enclosingInstance.Columns[i].Rows[_columnRowCounters[i]];
+            }
+
+            IncrementColumnRowCounters();
+
+            return rows;
+        }
+
+        private void IncrementColumnRowCounters()
+        {
+            for (int i = 0; i < _columnRowCounters.Length; i++)
+            {
+                _columnRowCounters[i]++;
+
+                if (_columnRowCounters[i] != _enclosingInstance.Columns[i].Rows.Count ||
+                    i >= _columnRowCounters.Length - 1)
+                    break;
+
+                _columnRowCounters[i] = 0;
+            }
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/Matrix/Row.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+
+namespace Lucene.Net.Analysis.Shingle.Matrix
+{
+    public class Row
+    {
+        public Row(Column enclosingInstance)
+        {
+            Tokens = new LinkedList<Token>();
+            Column = enclosingInstance;
+            Column.Rows.Add(this);
+        }
+
+        public Column Column { get; private set; }
+
+        public int Index
+        {
+            get { return Column.Rows.IndexOf(this); }
+        }
+
+        public LinkedList<Token> Tokens { get; set; }
+
+        public override string ToString()
+        {
+            return "Row{" +
+                   "index=" + Index +
+                   ", tokens=" + Tokens +
+                   '}';
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs?rev=1147678&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs Sun Jul 17 16:31:29 2011
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+
+namespace Lucene.Net.Analysis.Shingle
+{
+    /// <summary>
+    /// A ShingleAnalyzerWrapper wraps a ShingleFilter around another Analyzer.
+    /// 
+    /// <p>A shingle is another name for a token based n-gram.</p>
+    /// </summary>
+    public class ShingleAnalyzerWrapper : Analyzer
+    {
+        protected Analyzer DefaultAnalyzer;
+        protected int MaxShingleSize = 2;
+        protected bool OutputUnigrams = true;
+
+        /// <summary>
+        /// Wraps StandardAnalyzer. 
+        /// </summary>
+        public ShingleAnalyzerWrapper()
+        {
+            DefaultAnalyzer = new StandardAnalyzer();
+            SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+        }
+
+        public ShingleAnalyzerWrapper(int nGramSize)
+            : this()
+        {
+            MaxShingleSize = nGramSize;
+        }
+
+        public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer)
+        {
+            DefaultAnalyzer = defaultAnalyzer;
+            SetOverridesTokenStreamMethod(typeof (ShingleAnalyzerWrapper));
+        }
+
+        public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) : this(defaultAnalyzer)
+        {
+            MaxShingleSize = maxShingleSize;
+        }
+
+        /// <summary>
+        /// The max shingle (ngram) size
+        /// </summary>
+        /// <returns></returns>
+        public int GetMaxShingleSize()
+        {
+            return MaxShingleSize;
+        }
+
+        /// <summary>
+        /// Set the maximum size of output shingles
+        /// </summary>
+        /// <param name="maxShingleSize">max shingle size</param>
+        public void SetMaxShingleSize(int maxShingleSize)
+        {
+            MaxShingleSize = maxShingleSize;
+        }
+
+        public bool IsOutputUnigrams()
+        {
+            return OutputUnigrams;
+        }
+
+        /// <summary>
+        /// Shall the filter pass the original tokens (the "unigrams") to the output
+        /// stream?
+        /// </summary>
+        /// <param name="outputUnigrams">Whether or not the filter shall pass the original tokens to the output stream</param>
+        public void SetOutputUnigrams(bool outputUnigrams)
+        {
+            OutputUnigrams = outputUnigrams;
+        }
+
+        public override TokenStream TokenStream(String fieldName, TextReader reader)
+        {
+            TokenStream wrapped;
+            try
+            {
+                wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+            }
+            catch (IOException)
+            {
+                wrapped = DefaultAnalyzer.TokenStream(fieldName, reader);
+            }
+
+            var filter = new ShingleFilter(wrapped);
+            filter.SetMaxShingleSize(MaxShingleSize);
+            filter.SetOutputUnigrams(OutputUnigrams);
+
+            return filter;
+        }
+
+        public override TokenStream ReusableTokenStream(String fieldName, TextReader reader)
+        {
+            if (overridesTokenStreamMethod)
+            {
+                // LUCENE-1678: force fallback to tokenStream() if we
+                // have been subclassed and that subclass overrides
+                // tokenStream but not reusableTokenStream
+                return TokenStream(fieldName, reader);
+            }
+
+            var streams = (SavedStreams) GetPreviousTokenStream();
+
+            if (streams == null)
+            {
+                streams = new SavedStreams
+                              {
+                                  Wrapped = DefaultAnalyzer.ReusableTokenStream(fieldName, reader)
+                              };
+                streams.Shingle = new ShingleFilter(streams.Wrapped);
+                SetPreviousTokenStream(streams);
+            }
+            else
+            {
+                var result = DefaultAnalyzer.ReusableTokenStream(fieldName, reader);
+                if (result == streams.Wrapped)
+                {
+                    // the wrapped analyzer reused the stream 
+                    streams.Shingle.Reset();
+                }
+                else
+                {
+                    // the wrapped analyzer did not, create a new shingle around the new one 
+                    streams.Wrapped = result;
+                    streams.Shingle = new ShingleFilter(streams.Wrapped);
+                }
+            }
+
+            streams.Shingle.SetMaxShingleSize(MaxShingleSize);
+            streams.Shingle.SetOutputUnigrams(OutputUnigrams);
+
+            return streams.Shingle;
+        }
+
+        #region Nested type: SavedStreams
+
+        private class SavedStreams
+        {
+            public ShingleFilter Shingle;
+            public TokenStream Wrapped;
+        } ;
+
+        #endregion
+    }
+}
\ No newline at end of file



Mime
View raw message