lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject [Lucene.Net] svn commit: r1103482 [1/2] - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: src/contrib/Analyzers/ src/contrib/Analyzers/AR/ src/contrib/Analyzers/BR/ src/contrib/Analyzers/CJK/ src/contrib/Analyzers/Cz/ src/contrib/Analyzers/De/ src/contrib/Anal...
Date Sun, 15 May 2011 17:51:58 GMT
Author: digy
Date: Sun May 15 17:51:57 2011
New Revision: 1103482

URL: http://svn.apache.org/viewvc?rev=1103482&view=rev
Log:
[LUCENENET-405] contrib/Analysis.NGram
[LUCENENET-412] Required changes for Contrib/Analyzers (stopword lists changed from hashtable to ICollection<string> )

Added:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenizer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestNGramTokenizer.cs
Modified:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianStemFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/CJK/CJKAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchStemFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/AR/ArabicAnalyzer.cs Sun May 15 17:51:57 2011
@@ -17,6 +17,7 @@
 
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 using Lucene.Net.Analysis;
 using Lucene.Net.Util;
@@ -54,7 +55,7 @@ namespace Lucene.Net.Analysis.AR
         /**
          * Contains the stopwords used with the StopFilter.
          */
-        private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
         /**
          * The comment character in the stopwords file.  All lines prefixed with this will be ignored  
          */
@@ -84,7 +85,7 @@ namespace Lucene.Net.Analysis.AR
                 while (!reader.EndOfStream)
                 {
                     string word = reader.ReadLine();
-                    stoptable.Add(word, word);
+                    stoptable.Add(word);
                 }
             }
         }
@@ -112,16 +113,16 @@ namespace Lucene.Net.Analysis.AR
          *
          * @deprecated Use {@link #ArabicAnalyzer(Version, Hashtable)} instead
          */
-        public ArabicAnalyzer(Hashtable stopwords) : this(Version.LUCENE_24, stopwords)
+        public ArabicAnalyzer(ICollection<string> stopwords) : this(Version.LUCENE_24, stopwords)
         {
         }
 
         /**
          * Builds an analyzer with the given stop words.
          */
-        public ArabicAnalyzer(Version matchVersion, Hashtable stopwords)
+        public ArabicAnalyzer(Version matchVersion, ICollection<string> stopwords)
         {
-            stoptable = new Hashtable(stopwords);
+            stoptable = new List<string>(stopwords);
             this.matchVersion = matchVersion;
         }
 

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianAnalyzer.cs Sun May 15 17:51:57 2011
@@ -16,6 +16,7 @@
  */
 
 using System.Collections;
+using System.Collections.Generic;
 
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.Standard;
@@ -60,12 +61,12 @@ namespace Lucene.Net.Analysis.BR
         /**
          * Contains the stopwords used with the StopFilter.
          */
-        private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
         /**
          * Contains words that should be indexed but not stemmed.
          */
-        private Hashtable excltable = new Hashtable();
+        private ICollection<string> excltable = new List<string>();
 
         /**
          * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
@@ -86,7 +87,7 @@ namespace Lucene.Net.Analysis.BR
         /**
          * Builds an analyzer with the given stop words.
          */
-        public BrazilianAnalyzer(Hashtable stopwords)
+        public BrazilianAnalyzer(ICollection<string> stopwords)
         {
             stoptable = stopwords;
         }
@@ -109,7 +110,7 @@ namespace Lucene.Net.Analysis.BR
         /**
          * Builds an exclusionlist from a Hashtable.
          */
-        public void SetStemExclusionTable(Hashtable exclusionlist)
+        public void SetStemExclusionTable(ICollection<string> exclusionlist)
         {
             excltable = exclusionlist;
         }

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianStemFilter.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/BR/BrazilianStemFilter.cs Sun May 15 17:51:57 2011
@@ -17,6 +17,7 @@
 
 using Lucene.Net.Analysis;
 using System.Collections;
+using System.Collections.Generic;
 
 
 /**
@@ -33,7 +34,7 @@ namespace Lucene.Net.Analysis.BR
          * The actual token in the input stream.
          */
         private BrazilianStemmer stemmer = null;
-        private Hashtable exclusions = null;
+        private ICollection<string> exclusions = null;
 
         public BrazilianStemFilter(TokenStream input)
             : base(input)
@@ -41,7 +42,7 @@ namespace Lucene.Net.Analysis.BR
             stemmer = new BrazilianStemmer();
         }
 
-        public BrazilianStemFilter(TokenStream input, Hashtable exclusiontable)
+        public BrazilianStemFilter(TokenStream input, ICollection<string> exclusiontable)
             : this(input)
         {
             this.exclusions = exclusiontable;

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/CJK/CJKAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/CJK/CJKAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/CJK/CJKAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/CJK/CJKAnalyzer.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 using Lucene.Net.Analysis;
 
 namespace Lucene.Net.Analysis.CJK
@@ -91,7 +92,7 @@ namespace Lucene.Net.Analysis.CJK
 		/// <summary>
 		/// stop word list
 		/// </summary>
-		private Hashtable stopTable;
+        private ICollection<string> stopTable;
 
 		//~ Constructors -----------------------------------------------------------
 

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Contrib.Analyzers.csproj Sun May 15 17:51:57 2011
@@ -9,7 +9,7 @@
     <OutputType>Library</OutputType>
     <AppDesignerFolder>Properties</AppDesignerFolder>
     <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
-    <AssemblyName>Lucene.Net.Contrib.Analyzers</AssemblyName>
+    <AssemblyName>Lucene.Net.Analyzers</AssemblyName>
     <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
     <FileAlignment>512</FileAlignment>
     <FileUpgradeFlags>
@@ -63,6 +63,10 @@
     <Compile Include="Fr\FrenchAnalyzer.cs" />
     <Compile Include="Fr\FrenchStemFilter.cs" />
     <Compile Include="Fr\FrenchStemmer.cs" />
+    <Compile Include="NGram\EdgeNGramTokenFilter.cs" />
+    <Compile Include="NGram\EdgeNGramTokenizer.cs" />
+    <Compile Include="NGram\NGramTokenFilter.cs" />
+    <Compile Include="NGram\NGramTokenizer.cs" />
     <Compile Include="Nl\DutchAnalyzer.cs" />
     <Compile Include="Nl\DutchStemFilter.cs" />
     <Compile Include="Nl\DutchStemmer.cs" />

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Cz/CzechAnalyzer.cs Sun May 15 17:51:57 2011
@@ -2,6 +2,7 @@ using System;
 using System.IO;
 using System.Text;
 using System.Collections;
+using System.Collections.Generic;
 
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.De;
@@ -102,7 +103,7 @@ namespace Lucene.Net.Analysis.Cz
 		/// <summary>
 		/// Contains the stopwords used with the StopFilter.
 		/// </summary>
-		private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
 		/// <summary>
 		/// Builds an analyzer.
@@ -123,7 +124,7 @@ namespace Lucene.Net.Analysis.Cz
 		/// <summary>
 		/// Builds an analyzer with the given stop words.
 		/// </summary>
-		public CzechAnalyzer( Hashtable stopwords ) 
+        public CzechAnalyzer(ICollection<string> stopwords) 
 		{
 			stoptable = stopwords;
 		}
@@ -145,13 +146,13 @@ namespace Lucene.Net.Analysis.Cz
 		{
 			if ( wordfile == null ) 
 			{
-				stoptable = new Hashtable();
+				stoptable = new List<string>();
 				return;
 			}
 			try 
 			{
 				// clear any previous table (if present)
-				stoptable = new Hashtable();
+				stoptable = new List<string>();
 
 				StreamReader isr;
 				if (encoding == null)
@@ -162,7 +163,7 @@ namespace Lucene.Net.Analysis.Cz
 				String word;
 				while ( ( word = isr.ReadLine() ) != null ) 
 				{
-					stoptable[word] = word;
+					stoptable.Add(word);
 				}
 
 			} 

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanAnalyzer.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 using Lucene.Net.Analysis.Standard;
 using Lucene.Net.Analysis;
 
@@ -37,12 +38,12 @@ namespace Lucene.Net.Analysis.De
 		/// <summary>
 		/// Contains the stopwords used with the StopFilter. 
 		/// </summary>
-		private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
 		/// <summary>
 		/// Contains words that should be indexed but not stemmed. 
 		/// </summary>
-		private Hashtable excltable = new Hashtable();
+        private ICollection<string> excltable = new List<string>();
 
 		/// <summary>
 		/// Builds an analyzer. 
@@ -65,7 +66,7 @@ namespace Lucene.Net.Analysis.De
 		/// Builds an analyzer with the given stop words. 
 		/// </summary>
 		/// <param name="stopwords"></param>
-		public GermanAnalyzer( Hashtable stopwords )
+        public GermanAnalyzer(ICollection<string> stopwords)
 		{
 			stoptable = stopwords;
 		}
@@ -92,7 +93,7 @@ namespace Lucene.Net.Analysis.De
 		/// Builds an exclusionlist from a Hashtable. 
 		/// </summary>
 		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( Hashtable exclusionlist )
+        public void SetStemExclusionTable(ICollection<string> exclusionlist)
 		{
 			excltable = exclusionlist;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/GermanStemFilter.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.De
 {
@@ -16,7 +17,7 @@ namespace Lucene.Net.Analysis.De
 		/// </summary>
 		private Token token = null;
 		private GermanStemmer stemmer = null;
-		private Hashtable exclusions = null;
+        private ICollection<string> exclusions = null;
     
 		public GermanStemFilter( TokenStream _in ) : base(_in)
 		{
@@ -28,7 +29,7 @@ namespace Lucene.Net.Analysis.De
 		/// </summary>
 		/// <param name="_in"></param>
 		/// <param name="exclusiontable"></param>
-		public GermanStemFilter( TokenStream _in, Hashtable exclusiontable ): this(_in)
+        public GermanStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
 		{
 			exclusions = exclusiontable;
 		}
@@ -77,7 +78,7 @@ namespace Lucene.Net.Analysis.De
 		/// Set an alternative exclusion list for this filter. 
 		/// </summary>
 		/// <param name="exclusiontable"></param>
-		public void SetExclusionTable( Hashtable exclusiontable )
+        public void SetExclusionTable(ICollection<string> exclusiontable)
 		{
 			exclusions = exclusiontable;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/De/WordlistLoader.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.De
 {
@@ -16,11 +17,11 @@ namespace Lucene.Net.Analysis.De
 		/// <param name="path">Path to the wordlist</param>
 		/// <param name="wordfile">Name of the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( String path, String wordfile ) 
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
 		{
 			if ( path == null || wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable(new FileInfo(path + "\\" + wordfile));
 		}
@@ -29,11 +30,11 @@ namespace Lucene.Net.Analysis.De
 		/// </summary>
 		/// <param name="wordfile">Complete path to the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( String wordfile ) 
+        public static ICollection<string> GetWordtable(String wordfile) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable( new FileInfo( wordfile ) );
 		}
@@ -43,13 +44,14 @@ namespace Lucene.Net.Analysis.De
 		/// </summary>
 		/// <param name="wordfile">File containing the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( FileInfo wordfile ) 
+        public static ICollection<string> GetWordtable(FileInfo wordfile) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
-			Hashtable result = null;
+
+            ICollection<string> result = null;
 			try 
 			{
 				StreamReader lnr = new StreamReader(wordfile.FullName);
@@ -72,7 +74,7 @@ namespace Lucene.Net.Analysis.De
 				// On error, use an empty table
 			catch (IOException) 
 			{
-				result = new Hashtable();
+				result = new List<string>();
 			}
 			return result;
 		}
@@ -83,12 +85,12 @@ namespace Lucene.Net.Analysis.De
 		/// <param name="words">Word that where read</param>
 		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
 		/// <returns></returns>
-		private static Hashtable MakeWordTable( String[] words, int length ) 
+        private static ICollection<string> MakeWordTable(String[] words, int length) 
 		{
-			Hashtable table = new Hashtable( length );
+			List<string> table = new List<string>( length );
 			for ( int i = 0; i < length; i++ ) 
 			{
-				table.Add(words[i], words[i]);
+				table.Add(words[i]);
 			}
 			return table;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs Sun May 15 17:51:57 2011
@@ -2,6 +2,7 @@ using System;
 using System.IO;
 using System.Text;
 using System.Collections;
+using System.Collections.Generic;
 
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.De;
@@ -108,12 +109,12 @@ namespace Lucene.Net.Analysis.Fr
 		/// <summary>
 		/// Contains the stopwords used with the StopFilter.
 		/// </summary>
-		private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
 		/// <summary>
 		/// Contains words that should be indexed but not stemmed.
 		/// </summary>
-		private Hashtable excltable = new Hashtable();
+        private ICollection<string> excltable = new List<string>();
 
 		/// <summary>
 		/// Builds an analyzer.
@@ -134,7 +135,7 @@ namespace Lucene.Net.Analysis.Fr
 		/// <summary>
 		/// Builds an analyzer with the given stop words.
 		/// </summary>
-		public FrenchAnalyzer( Hashtable stopwords ) 
+        public FrenchAnalyzer(ICollection<string> stopwords) 
 		{
 			stoptable = stopwords;
 		}
@@ -158,7 +159,7 @@ namespace Lucene.Net.Analysis.Fr
 		/// <summary>
 		/// Builds an exclusionlist from a Hashtable.
 		/// </summary>
-		public void SetStemExclusionTable( Hashtable exclusionlist ) 
+        public void SetStemExclusionTable(ICollection<string> exclusionlist) 
 		{
 			excltable = exclusionlist;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchStemFilter.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Fr/FrenchStemFilter.cs Sun May 15 17:51:57 2011
@@ -2,6 +2,7 @@ using System;
 using System.IO;
 using System.Text;
 using System.Collections;
+using System.Collections.Generic;
 
 using Lucene.Net.Analysis;
 
@@ -77,7 +78,7 @@ namespace Lucene.Net.Analysis.Fr
 		/// </summary>
 		private Token token = null;
 		private FrenchStemmer stemmer = null;
-		private Hashtable exclusions = null;
+        private ICollection<string> exclusions = null;
 
 		public FrenchStemFilter( TokenStream _in ) : base(_in)
 		{
@@ -87,7 +88,7 @@ namespace Lucene.Net.Analysis.Fr
 		/// <summary>
 		/// Builds a FrenchStemFilter that uses an exclusiontable.
 		/// </summary>
-		public FrenchStemFilter( TokenStream _in, Hashtable exclusiontable ) : 	this( _in )
+        public FrenchStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
 		{
 			exclusions = exclusiontable;
 		}
@@ -135,7 +136,7 @@ namespace Lucene.Net.Analysis.Fr
 		/// <summary>
 		/// Set an alternative exclusion list for this filter.
 		/// </summary>
-		public void SetExclusionTable( Hashtable exclusiontable ) 
+        public void SetExclusionTable(ICollection<string> exclusiontable) 
 		{
 			exclusions = exclusiontable;
 		}

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tokenizes the given token into n-grams of given size(s).
+     * <p>
+     * This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
+     * </p>
+     */
+    public class EdgeNGramTokenFilter : TokenFilter
+    {
+        public static Side DEFAULT_SIDE = Side.FRONT;
+        public static int DEFAULT_MAX_GRAM_SIZE = 1;
+        public static int DEFAULT_MIN_GRAM_SIZE = 1;
+
+        // Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified
+        /** Specifies which side of the input the n-gram should be generated from */
+        public class Side
+        {
+            private string label;
+
+            /** Get the n-gram from the front of the input */
+            public static Side FRONT = new Side("front");
+
+            /** Get the n-gram from the end of the input */
+            public static Side BACK = new Side("back");
+
+            // Private ctor
+            private Side(string label) { this.label = label; }
+
+            public string getLabel() { return label; }
+
+            // Get the appropriate Side from a string
+            public static Side getSide(string sideName)
+            {
+                if (FRONT.getLabel().Equals(sideName))
+                {
+                    return FRONT;
+                }
+                else if (BACK.getLabel().Equals(sideName))
+                {
+                    return BACK;
+                }
+                return null;
+            }
+        }
+
+        private int minGram;
+        private int maxGram;
+        private Side side;
+        private char[] curTermBuffer;
+        private int curTermLength;
+        private int curGramSize;
+        private int tokStart;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+
+
+        protected EdgeNGramTokenFilter(TokenStream input) : base(input)
+        {
+            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+        }
+
+        /**
+         * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+         *
+         * @param input {@link TokenStream} holding the input to be tokenized
+         * @param side the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenFilter(TokenStream input, Side side, int minGram, int maxGram)
+            : base(input)
+        {
+
+
+            if (side == null)
+            {
+                throw new System.ArgumentException("sideLabel must be either front or back");
+            }
+
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+            this.side = side;
+            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+        }
+
+        /**
+         * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
+         *
+         * @param input {@link TokenStream} holding the input to be tokenized
+         * @param sideLabel the name of the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenFilter(TokenStream input, string sideLabel, int minGram, int maxGram)
+            : this(input, Side.getSide(sideLabel), minGram, maxGram)
+        {
+
+        }
+
+        public override bool IncrementToken()
+        {
+            while (true)
+            {
+                if (curTermBuffer == null)
+                {
+                    if (!input.IncrementToken())
+                    {
+                        return false;
+                    }
+                    else
+                    {
+                        curTermBuffer = (char[])termAtt.TermBuffer().Clone();
+                        curTermLength = termAtt.TermLength();
+                        curGramSize = minGram;
+                        tokStart = offsetAtt.StartOffset();
+                    }
+                }
+                if (curGramSize <= maxGram)
+                {
+                    if (!(curGramSize > curTermLength         // if the remaining input is too short, we can't generate any n-grams
+                        || curGramSize > maxGram))
+                    {       // if we have hit the end of our n-gram size range, quit
+                        // grab gramSize chars from front or back
+                        int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
+                        int end = start + curGramSize;
+                        ClearAttributes();
+                        offsetAtt.SetOffset(tokStart + start, tokStart + end);
+                        termAtt.SetTermBuffer(curTermBuffer, start, curGramSize);
+                        curGramSize++;
+                        return true;
+                    }
+                }
+                curTermBuffer = null;
+            }
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override  Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            curTermBuffer = null;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tokenizes the input from an edge into n-grams of given size(s).
+     * <p>
+     * This {@link Tokenizer} create n-grams from the beginning edge or ending edge of a input token.
+     * MaxGram can't be larger than 1024 because of limitation.
+     * </p>
+     */
+    public class EdgeNGramTokenizer : Tokenizer
+    {
+        public static Side DEFAULT_SIDE = Side.FRONT;
+        public static int DEFAULT_MAX_GRAM_SIZE = 1;
+        public static int DEFAULT_MIN_GRAM_SIZE = 1;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+
+        // Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified
+        /** Specifies which side of the input the n-gram should be generated from */
+        public class Side
+        {
+            private string label;
+
+            /** Get the n-gram from the front of the input */
+            public static Side FRONT = new Side("front");
+
+            /** Get the n-gram from the end of the input */
+            public static Side BACK = new Side("back");
+
+            // Private ctor
+            private Side(string label) { this.label = label; }
+
+
+            public string getLabel() { return label; }
+
+            // Get the appropriate Side from a string
+            public static Side getSide(string sideName)
+            {
+                if (FRONT.getLabel().Equals(sideName))
+                {
+                    return FRONT;
+                }
+                else if (BACK.getLabel().Equals(sideName))
+                {
+                    return BACK;
+                }
+                return null;
+            }
+        }
+
+        private int minGram;
+        private int maxGram;
+        private int gramSize;
+        private Side side;
+        private bool started = false;
+        private int inLen;
+        private string inStr;
+
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         *
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param side the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(TextReader input, Side side, int minGram, int maxGram)
+            : base(input)
+        {
+            init(side, minGram, maxGram);
+        }
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         *
+         * @param source {@link AttributeSource} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param side the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(AttributeSource source, TextReader input, Side side, int minGram, int maxGram)
+            : base(source, input)
+        {
+
+            init(side, minGram, maxGram);
+        }
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         * 
+         * @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param side the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(AttributeFactory factory, TextReader input, Side side, int minGram, int maxGram)
+            : base(factory, input)
+        {
+
+            init(side, minGram, maxGram);
+        }
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         *
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param sideLabel the name of the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(TextReader input, string sideLabel, int minGram, int maxGram)
+            : this(input, Side.getSide(sideLabel), minGram, maxGram)
+        {
+
+        }
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         *
+         * @param source {@link AttributeSource} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param sideLabel the name of the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(AttributeSource source, TextReader input, string sideLabel, int minGram, int maxGram)
+            : this(source, input, Side.getSide(sideLabel), minGram, maxGram)
+        {
+
+        }
+
+        /**
+         * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
+         * 
+         * @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param sideLabel the name of the {@link Side} from which to chop off an n-gram
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public EdgeNGramTokenizer(AttributeFactory factory, TextReader input, string sideLabel, int minGram, int maxGram) :
+            this(factory, input, Side.getSide(sideLabel), minGram, maxGram)
+        {
+        }
+
+        private void init(Side side, int minGram, int maxGram)
+        {
+            if (side == null)
+            {
+                throw new System.ArgumentException("sideLabel must be either front or back");
+            }
+
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+            this.side = side;
+
+            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+
+        }
+
+        /** Returns the next token in the stream, or null at EOS. */
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            // if we are just starting, read the whole input
+            if (!started)
+            {
+                started = true;
+                char[] chars = new char[1024];
+                inStr = input.ReadToEnd().Trim();  // remove any leading or trailing spaces
+                inLen = inStr.Length;
+                gramSize = minGram;
+            }
+
+            // if the remaining input is too short, we can't generate any n-grams
+            if (gramSize > inLen)
+            {
+                return false;
+            }
+
+            // if we have hit the end of our n-gram size range, quit
+            if (gramSize > maxGram)
+            {
+                return false;
+            }
+
+            // grab gramSize chars from front or back
+            int start = side == Side.FRONT ? 0 : inLen - gramSize;
+            int end = start + gramSize;
+            termAtt.SetTermBuffer(inStr, start, gramSize);
+            offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
+            gramSize++;
+            return true;
+        }
+
+        public override void End()
+        {
+            // set offset
+            int finalOffset = inLen;
+            this.offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset(TextReader input)
+        {
+            base.Reset(input);
+            Reset();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            started = false;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenFilter.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenFilter.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tokenizes the input into n-grams of the given size(s).
+     */
+    public class NGramTokenFilter : TokenFilter
+    {
+        public static int DEFAULT_MIN_NGRAM_SIZE = 1;
+        public static int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+        private int minGram, maxGram;
+
+        private char[] curTermBuffer;
+        private int curTermLength;
+        private int curGramSize;
+        private int curPos;
+        private int tokStart;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+
+        /**
+         * Creates NGramTokenFilter with given min and max n-grams.
+         * @param input {@link TokenStream} holding the input to be tokenized
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public NGramTokenFilter(TokenStream input, int minGram, int maxGram)
+            : base(input)
+        {
+
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+
+            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+        }
+
+        /**
+         * Creates NGramTokenFilter with default min and max n-grams.
+         * @param input {@link TokenStream} holding the input to be tokenized
+         */
+        public NGramTokenFilter(TokenStream input)
+            : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+        {
+
+        }
+
+        /** Returns the next token in the stream, or null at EOS. */
+        public override bool IncrementToken()
+        {
+            while (true)
+            {
+                if (curTermBuffer == null)
+                {
+                    if (!input.IncrementToken())
+                    {
+                        return false;
+                    }
+                    else
+                    {
+                        curTermBuffer = (char[])termAtt.TermBuffer().Clone();
+                        curTermLength = termAtt.TermLength();
+                        curGramSize = minGram;
+                        curPos = 0;
+                        tokStart = offsetAtt.StartOffset();
+                    }
+                }
+                while (curGramSize <= maxGram)
+                {
+                    while (curPos + curGramSize <= curTermLength)
+                    {     // while there is input
+                        ClearAttributes();
+                        termAtt.SetTermBuffer(curTermBuffer, curPos, curGramSize);
+                        offsetAtt.SetOffset(tokStart + curPos, tokStart + curPos + curGramSize);
+                        curPos++;
+                        return true;
+                    }
+                    curGramSize++;                         // increase n-gram size
+                    curPos = 0;
+                }
+                curTermBuffer = null;
+            }
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            curTermBuffer = null;
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenizer.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenizer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/NGram/NGramTokenizer.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tokenizes the input into n-grams of the given size(s).
+     */
+    public class NGramTokenizer : Tokenizer
+    {
+        public static int DEFAULT_MIN_NGRAM_SIZE = 1;
+        public static int DEFAULT_MAX_NGRAM_SIZE = 2;
+
+        private int minGram, maxGram;
+        private int gramSize;
+        private int pos = 0;
+        private int inLen;
+        private string inStr;
+        private bool started = false;
+
+        private TermAttribute termAtt;
+        private OffsetAttribute offsetAtt;
+
+        /**
+         * Creates NGramTokenizer with given min and max n-grams.
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public NGramTokenizer(TextReader input, int minGram, int maxGram)
+            : base(input)
+        {
+            init(minGram, maxGram);
+        }
+
+        /**
+         * Creates NGramTokenizer with given min and max n-grams.
+         * @param source {@link AttributeSource} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public NGramTokenizer(AttributeSource source, TextReader input, int minGram, int maxGram)
+            : base(source, input)
+        {
+            init(minGram, maxGram);
+        }
+
+        /**
+         * Creates NGramTokenizer with given min and max n-grams.
+         * @param factory {@link org.apache.lucene.util.AttributeSource.AttributeFactory} to use
+         * @param input {@link Reader} holding the input to be tokenized
+         * @param minGram the smallest n-gram to generate
+         * @param maxGram the largest n-gram to generate
+         */
+        public NGramTokenizer(AttributeFactory factory, TextReader input, int minGram, int maxGram)
+            : base(factory, input)
+        {
+            init(minGram, maxGram);
+        }
+
+        /**
+         * Creates NGramTokenizer with default min and max n-grams.
+         * @param input {@link Reader} holding the input to be tokenized
+         */
+        public NGramTokenizer(TextReader input)
+            : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
+        {
+
+        }
+
+        private void init(int minGram, int maxGram)
+        {
+            if (minGram < 1)
+            {
+                throw new System.ArgumentException("minGram must be greater than zero");
+            }
+            if (minGram > maxGram)
+            {
+                throw new System.ArgumentException("minGram must not be greater than maxGram");
+            }
+            this.minGram = minGram;
+            this.maxGram = maxGram;
+
+            this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
+            this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute));
+        }
+
+        /** Returns the next token in the stream, or null at EOS. */
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            if (!started)
+            {
+                started = true;
+                gramSize = minGram;
+                char[] chars = new char[1024];
+                inStr = input.ReadToEnd();  // remove any trailing empty strings 
+                inLen = inStr.Length;
+            }
+
+            if (pos + gramSize > inLen)
+            {            // if we hit the end of the string
+                pos = 0;                           // reset to beginning of string
+                gramSize++;                        // increase n-gram size
+                if (gramSize > maxGram)            // we are done
+                    return false;
+                if (pos + gramSize > inLen)
+                    return false;
+            }
+
+            int oldPos = pos;
+            pos++;
+            termAtt.SetTermBuffer(inStr, oldPos, gramSize);
+            offsetAtt.SetOffset(CorrectOffset(oldPos), CorrectOffset(oldPos + gramSize));
+            return true;
+        }
+
+        public override void End()
+        {
+            // set offset
+            int finalOffset = inLen;
+            this.offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next(Token reusableToken)
+        {
+            return base.Next(reusableToken);
+        }
+
+        /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
+         * not be overridden. Delegates to the backwards compatibility layer. */
+        [System.Obsolete("Will be removed in Lucene 3.0. This method is final, as it should not be overridden. Delegates to the backwards compatibility layer.")]
+        public override Token Next()
+        {
+            return base.Next();
+        }
+
+        public override void Reset(TextReader input)
+        {
+            base.Reset(input);
+            Reset();
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            started = false;
+            pos = 0;
+        }
+    }
+}
\ No newline at end of file

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchAnalyzer.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 using Lucene.Net.Analysis.Standard;
 
 namespace Lucene.Net.Analysis.Nl
@@ -94,14 +95,14 @@ namespace Lucene.Net.Analysis.Nl
 		/// <summary>
 		/// Contains the stopwords used with the StopFilter. 
 		/// </summary>
-		private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
 		/// <summary>
 		/// Contains words that should be indexed but not stemmed. 
 		/// </summary>
-		private Hashtable excltable = new Hashtable();
+        private ICollection<string> excltable = new List<string>();
 
-		private Hashtable _stemdict = new Hashtable();
+        private Dictionary<string,string> _stemdict = new Dictionary<string,string>();
 
 		/// <summary>
 		/// Builds an analyzer. 
@@ -128,7 +129,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// Builds an analyzer with the given stop words. 
 		/// </summary>
 		/// <param name="stopwords"></param>
-		public DutchAnalyzer( Hashtable stopwords )
+        public DutchAnalyzer(ICollection<string> stopwords)
 		{
 			stoptable = stopwords;
 		}
@@ -155,7 +156,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// Builds an exclusionlist from a Hashtable. 
 		/// </summary>
 		/// <param name="exclusionlist"></param>
-		public void SetStemExclusionTable( Hashtable exclusionlist )
+        public void SetStemExclusionTable(ICollection<string> exclusionlist)
 		{
 			excltable = exclusionlist;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemFilter.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Nl
 {
@@ -74,7 +75,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// </summary>
 		private Token token = null;
 		private DutchStemmer stemmer = null;
-		private Hashtable exclusions = null;
+        private ICollection<string> exclusions = null;
     
 		public DutchStemFilter( TokenStream _in ) : base(_in)
 		{
@@ -86,7 +87,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// </summary>
 		/// <param name="_in"></param>
 		/// <param name="exclusiontable"></param>
-		public DutchStemFilter( TokenStream _in, Hashtable exclusiontable ): this(_in)
+        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable) : this(_in)
 		{
 			exclusions = exclusiontable;
 		}
@@ -97,7 +98,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// <param name="_in"></param>
 		/// <param name="exclusiontable"></param>
 		/// <param name="stemdictionary">Dictionary of word stem pairs, that overrule the algorithm</param>
-		public DutchStemFilter( TokenStream _in, Hashtable exclusiontable , Hashtable stemdictionary): this(_in, exclusiontable)
+        public DutchStemFilter(TokenStream _in, ICollection<string> exclusiontable, Dictionary<string,string> stemdictionary) : this(_in, exclusiontable)
 		{
 			stemmer.SetStemDictionary(stemdictionary);
 		}
@@ -146,7 +147,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// Set an alternative exclusion list for this filter. 
 		/// </summary>
 		/// <param name="exclusiontable"></param>
-		public void SetExclusionTable( Hashtable exclusiontable )
+        public void SetExclusionTable(ICollection<string> exclusiontable)
 		{
 			exclusions = exclusiontable;
 		}
@@ -156,7 +157,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// so you can correct for a particular unwanted word-stem pair.
 		/// </summary>
 		/// <param name="dict"></param>
-		public void SetStemDictionary(Hashtable dict)
+		public void SetStemDictionary(Dictionary<string,string> dict)
 		{
 			if (stemmer != null)
 				stemmer.SetStemDictionary(dict);

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/DutchStemmer.cs Sun May 15 17:51:57 2011
@@ -2,6 +2,7 @@ using System;
 using System.IO;
 using System.Text;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Nl
 {
@@ -75,7 +76,7 @@ namespace Lucene.Net.Analysis.Nl
 		/// </summary>
 		private StringBuilder sb = new StringBuilder();
 		private bool _removedE;
-		private Hashtable _stemDict;
+		private Dictionary<string,string> _stemDict;
 
 
 		private int _R1;
@@ -92,7 +93,7 @@ namespace Lucene.Net.Analysis.Nl
 			term = term.ToLower();
 			if ( !IsStemmable( term ) )
 				return term;
-			if (_stemDict != null && _stemDict.Contains(term))
+			if (_stemDict != null && _stemDict.ContainsKey(term))
 				return _stemDict[term] as string;
 			// Reset the StringBuilder.
 			sb.Remove(0, sb.Length);
@@ -477,7 +478,7 @@ namespace Lucene.Net.Analysis.Nl
 			return false;
 		}
 
-		internal void SetStemDictionary(Hashtable dict)
+		internal void SetStemDictionary(Dictionary<string,string> dict)
 		{
 			_stemDict = dict;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Nl/WordlistLoader.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Nl
 {
@@ -72,21 +73,21 @@ namespace Lucene.Net.Analysis.Nl
 		/// <param name="path">Path to the wordlist</param>
 		/// <param name="wordfile">Name of the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( String path, String wordfile ) 
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
 		{
 			if ( path == null || wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable(new FileInfo(path + "\\" + wordfile));
 		}
 
 		/// <param name="wordfile">Complete path to the wordlist</param>
-		public static Hashtable GetWordtable( String wordfile ) 
+        public static ICollection<string> GetWordtable(String wordfile) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable( new FileInfo( wordfile ) );
 		}
@@ -98,13 +99,13 @@ namespace Lucene.Net.Analysis.Nl
 		/// </summary>
 		/// <param name="wordstemfile"></param>
 		/// <returns>Stem dictionary that overrules, the stemming algorithm</returns>
-		public static Hashtable GetStemDict( FileInfo wordstemfile)
+        public static Dictionary<string,string> GetStemDict(FileInfo wordstemfile)
 		{
 			if ( wordstemfile == null ) 
 			{
-				return new Hashtable();
+				return new Dictionary<string,string>();
 			}
-			Hashtable result = new Hashtable();
+			Dictionary<string,string> result = new Dictionary<string,string>();
 			try 
 			{
 				StreamReader lnr = new StreamReader(wordstemfile.FullName);
@@ -127,13 +128,13 @@ namespace Lucene.Net.Analysis.Nl
 		/// </summary>
 		/// <param name="wordfile">File containing the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( FileInfo wordfile ) 
+        public static ICollection<string> GetWordtable(FileInfo wordfile) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
-			Hashtable result = null;
+            ICollection<string> result = null;
 			try 
 			{
 				StreamReader lnr = new StreamReader(wordfile.FullName);
@@ -156,7 +157,7 @@ namespace Lucene.Net.Analysis.Nl
 				// On error, use an empty table
 			catch (IOException) 
 			{
-				result = new Hashtable();
+				result = new List<string>();
 			}
 			return result;
 		}
@@ -167,12 +168,12 @@ namespace Lucene.Net.Analysis.Nl
 		/// <param name="words">Word that where read</param>
 		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
 		/// <returns></returns>
-		private static Hashtable MakeWordTable( String[] words, int length ) 
+        private static ICollection<string> MakeWordTable(String[] words, int length) 
 		{
-			Hashtable table = new Hashtable( length );
+            List<string> table = new List<string>(length);
 			for ( int i = 0; i < length; i++ ) 
 			{
-				table.Add(words[i], words[i]);
+				table.Add(words[i]);
 			}
 			return table;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/Ru/RussianAnalyzer.cs Sun May 15 17:51:57 2011
@@ -2,6 +2,7 @@ using System;
 using System.Text;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 using Lucene.Net.Analysis;
 
 namespace Lucene.Net.Analysis.Ru
@@ -157,7 +158,7 @@ namespace Lucene.Net.Analysis.Ru
 		/// <summary>
 		/// Contains the stopwords used with the StopFilter.
 		/// </summary>
-		private Hashtable stoptable = new Hashtable();
+        private ICollection<string> stoptable = new List<string>();
 
 		/// <summary>
 		/// Charset for Russian letters.
@@ -224,7 +225,7 @@ namespace Lucene.Net.Analysis.Ru
 		/// </summary>
 		/// <param name="charset"></param>
 		/// <param name="stopwords"></param>
-		public RussianAnalyzer(char[] charset, Hashtable stopwords)
+        public RussianAnalyzer(char[] charset, ICollection<string> stopwords)
 		{
 			this.charset = charset;
 			stoptable = stopwords;

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/Analyzers/WordlistLoader.cs Sun May 15 17:51:57 2011
@@ -1,6 +1,7 @@
 using System;
 using System.IO;
 using System.Collections;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis
 {
@@ -17,11 +18,11 @@ namespace Lucene.Net.Analysis
 		/// <param name="path">Path to the wordlist</param>
 		/// <param name="wordfile">Name of the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( String path, String wordfile ) 
+        public static ICollection<string> GetWordtable(String path, String wordfile) 
 		{
 			if ( path == null || wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable(new FileInfo(path + "\\" + wordfile));
 		}
@@ -31,11 +32,11 @@ namespace Lucene.Net.Analysis
 		/// </summary>
 		/// <param name="wordfile">Complete path to the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( String wordfile ) 
+        public static ICollection<string> GetWordtable(String wordfile) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}
 			return GetWordtable( new FileInfo( wordfile ) );
 		}
@@ -45,11 +46,11 @@ namespace Lucene.Net.Analysis
 		/// </summary>
 		/// <param name="wordfile">File containing the wordlist</param>
 		/// <returns></returns>
-		public static Hashtable GetWordtable( FileInfo wordfile ) 
+		public static ICollection<string> GetWordtable( FileInfo wordfile ) 
 		{
 			if ( wordfile == null ) 
 			{
-				return new Hashtable();
+				return new List<string>();
 			}			
 			StreamReader lnr = new StreamReader(wordfile.FullName);
 			return GetWordtable(lnr);
@@ -63,23 +64,23 @@ namespace Lucene.Net.Analysis
 		/// </summary>
 		/// <param name="reader">Reader containing the wordlist</param>
 		/// <returns>A Hashtable with the reader's words</returns>
-		public static Hashtable GetWordtable(TextReader reader)
+		public static ICollection<string> GetWordtable(TextReader reader)
 		{
-			Hashtable result = new Hashtable();			
+			ICollection<string> result = new List<string>();			
 			try 
 			{				
-				ArrayList stopWords = new ArrayList();
+				List<string> stopWords = new List<string>();
 				String word = null;
 				while ( ( word = reader.ReadLine() ) != null ) 
 				{
 					stopWords.Add(word.Trim());
 				}
-				result = MakeWordTable( (String[])stopWords.ToArray(typeof(string)), stopWords.Count);
+				result = MakeWordTable(stopWords.ToArray(), stopWords.Count);
 			}
 				// On error, use an empty table
 			catch (IOException) 
 			{
-				result = new Hashtable();
+				result = new List<string>();
 			}
 			return result;
 		}
@@ -91,12 +92,12 @@ namespace Lucene.Net.Analysis
 		/// <param name="words">Word that where read</param>
 		/// <param name="length">Amount of words that where read into <tt>words</tt></param>
 		/// <returns></returns>
-		private static Hashtable MakeWordTable( String[] words, int length ) 
+		private static ICollection<string> MakeWordTable( String[] words, int length ) 
 		{
-			Hashtable table = new Hashtable( length );
+			List<string> table = new List<string>( length );
 			for ( int i = 0; i < length; i++ ) 
 			{
-				table.Add(words[i], words[i]);
+				table.Add(words[i]);
 			}
 			return table;
 		}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj?rev=1103482&r1=1103481&r2=1103482&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj Sun May 15 17:51:57 2011
@@ -9,7 +9,7 @@
     <OutputType>Library</OutputType>
     <AppDesignerFolder>Properties</AppDesignerFolder>
     <RootNamespace>Lucene.Net.Analyzers</RootNamespace>
-    <AssemblyName>Lucene.Net.Contrib.Analyzers.Test</AssemblyName>
+    <AssemblyName>Lucene.Net.Analyzers.Test</AssemblyName>
     <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
     <FileAlignment>512</FileAlignment>
     <FileUpgradeFlags>
@@ -60,6 +60,10 @@
     <Compile Include="AR\TestArabicAnalyzer.cs" />
     <Compile Include="AR\TestArabicNormalizationFilter.cs" />
     <Compile Include="AR\TestArabicStemFilter.cs" />
+    <Compile Include="NGram\TestEdgeNGramTokenFilter.cs" />
+    <Compile Include="NGram\TestEdgeNGramTokenizer.cs" />
+    <Compile Include="NGram\TestNGramTokenFilter.cs" />
+    <Compile Include="NGram\TestNGramTokenizer.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenFilter.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tests {@link EdgeNGramTokenFilter} for correctness.
+     */
+    [TestFixture]
+    public class TestEdgeNGramTokenFilter : BaseTokenStreamTestCase
+    {
+        private TokenStream input;
+
+        [SetUp]
+        public void SetUp()
+        {
+            base.SetUp();
+            input = new WhitespaceTokenizer(new StringReader("abcde"));
+        }
+
+        [Test]
+        public void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 0, 0);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 2, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput3()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, -1, 2);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestFrontUnigram()
+        {
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "a" }, new int[] { 0 }, new int[] { 1 });
+        }
+
+        [Test]
+        public void TestBackUnigram()
+        {
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "e" }, new int[] { 4 }, new int[] { 5 });
+        }
+
+        [Test]
+        public void TestOversizedNgrams()
+        {
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 6, 6);
+            AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0]);
+        }
+
+        [Test]
+        public void TestFrontRangeOfNgrams()
+        {
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
+        }
+
+        [Test]
+        public void TestBackRangeOfNgrams()
+        {
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.BACK, 1, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 });
+        }
+
+        [Test]
+        public void TestSmallTokenInStream()
+        {
+            input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
+            EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
+        }
+
+        [Test]
+        public void TestReset()
+        {
+            WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
+            EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, EdgeNGramTokenFilter.Side.FRONT, 1, 3);
+            AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
+            tokenizer.Reset(new StringReader("abcde"));
+            AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs?rev=1103482&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/Analyzers/NGram/TestEdgeNGramTokenizer.cs Sun May 15 17:51:57 2011
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using System.Collections;
+
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+namespace Lucene.Net.Analysis.NGram
+{
+
+    /**
+     * Tests {@link EdgeNGramTokenizer} for correctness.
+     */
+    [TestFixture]
+    public class TestEdgeNGramTokenizer : BaseTokenStreamTestCase
+    {
+        private StringReader input;
+
+        [SetUp]
+        public void SetUp()
+        {
+            base.SetUp();
+            input = new StringReader("abcde");
+        }
+
+        [Test]
+        public void TestInvalidInput()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 0, 0);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput2()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 2, 1);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestInvalidInput3()
+        {
+            bool gotException = false;
+            try
+            {
+                new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, -1, 2);
+            }
+            catch (System.ArgumentException e)
+            {
+                gotException = true;
+            }
+            Assert.IsTrue(gotException);
+        }
+
+        [Test]
+        public void TestFrontUnigram()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestBackUnigram()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.BACK, 1, 1);
+            AssertTokenStreamContents(tokenizer, new String[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestOversizedNgrams()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 6, 6);
+            AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestFrontRangeOfNgrams()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestBackRangeOfNgrams()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.BACK, 1, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, 5 /* abcde */);
+        }
+
+        [Test]
+        public void TestReset()
+        {
+            EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, 3);
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
+            tokenizer.Reset(new StringReader("abcde"));
+            AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
+        }
+    }
+}
\ No newline at end of file



Mime
View raw message