lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aro...@apache.org
Subject svn commit: r411501 [6/30] - in /incubator/lucene.net/trunk/C#/src: ./ Demo/DeleteFiles/ Demo/DemoLib/ Demo/DemoLib/HTML/ Demo/IndexFiles/ Demo/IndexHtml/ Demo/SearchFiles/ Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Docum...
Date Sun, 04 Jun 2006 02:41:25 GMT
Modified: incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Demo/SearchFiles/SearchFiles.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.cs Sat Jun  3 19:41:13 2006
@@ -13,85 +13,189 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Analyzer = Lucene.Net.Analysis.Analyzer;
 using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
 using Document = Lucene.Net.Documents.Document;
+using FilterIndexReader = Lucene.Net.Index.FilterIndexReader;
+using IndexReader = Lucene.Net.Index.IndexReader;
 using QueryParser = Lucene.Net.QueryParsers.QueryParser;
 using Hits = Lucene.Net.Search.Hits;
 using IndexSearcher = Lucene.Net.Search.IndexSearcher;
 using Query = Lucene.Net.Search.Query;
 using Searcher = Lucene.Net.Search.Searcher;
+
 namespace Lucene.Net.Demo
 {
 	
 	class SearchFiles
 	{
+		
+		/// <summary>Use the norms from one field for all fields.  Norms are read into memory,
+		/// using a byte of memory per document per searched field.  This can cause
+		/// search of large collections with a large number of fields to run out of
+		/// memory.  If all of the fields contain only a single token, then the norms
+		/// are all identical, then single norm vector may be shared. 
+		/// </summary>
+		private class OneNormsReader:FilterIndexReader
+		{
+			private System.String field;
+			
+			public OneNormsReader(IndexReader in_Renamed, System.String field):base(in_Renamed)
+			{
+				this.field = field;
+			}
+			
+			public override byte[] Norms(System.String field)
+			{
+				return in_Renamed.Norms(this.field);
+			}
+		}
+		
 		[STAThread]
 		public static void  Main(System.String[] args)
 		{
-			try
+			System.String usage = "Usage: " + typeof(SearchFiles) + " [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field]";
+			if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
 			{
-				Searcher searcher = new IndexSearcher(@"index");
-				Analyzer analyzer = new StandardAnalyzer();
-				
-				System.IO.StreamReader in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.Default).CurrentEncoding);
-				while (true)
+				System.Console.Out.WriteLine(usage);
+				System.Environment.Exit(0);
+			}
+			
+			System.String index = "index";
+			System.String field = "contents";
+			System.String queries = null;
+			int repeat = 0;
+			bool raw = false;
+			System.String normsField = null;
+			
+			for (int i = 0; i < args.Length; i++)
+			{
+				if ("-index".Equals(args[i]))
+				{
+					index = args[i + 1];
+					i++;
+				}
+				else if ("-field".Equals(args[i]))
+				{
+					field = args[i + 1];
+					i++;
+				}
+				else if ("-queries".Equals(args[i]))
+				{
+					queries = args[i + 1];
+					i++;
+				}
+				else if ("-repeat".Equals(args[i]))
 				{
+					repeat = System.Int32.Parse(args[i + 1]);
+					i++;
+				}
+				else if ("-raw".Equals(args[i]))
+				{
+					raw = true;
+				}
+				else if ("-norms".Equals(args[i]))
+				{
+					normsField = args[i + 1];
+					i++;
+				}
+			}
+			
+			IndexReader reader = IndexReader.Open(index);
+			
+			if (normsField != null)
+				reader = new OneNormsReader(reader, normsField);
+			
+			Searcher searcher = new IndexSearcher(reader);
+			Analyzer analyzer = new StandardAnalyzer();
+			
+			System.IO.StreamReader in_Renamed = null;
+			if (queries != null)
+			{
+				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding);
+			}
+			else
+			{
+				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding);
+			}
+			
+			while (true)
+			{
+				if (queries == null)
+    				// prompt the user
 					System.Console.Out.Write("Query: ");
-					System.String line = in_Renamed.ReadLine();
-					
-					if (line.Length == - 1)
-						break;
-					
-					Query query = QueryParser.Parse(line, "contents", analyzer);
-					System.Console.Out.WriteLine("Searching for: " + query.ToString("contents"));
-					
-					Hits hits = searcher.Search(query);
-					System.Console.Out.WriteLine(hits.Length() + " total matching documents");
-					
-					int HITS_PER_PAGE = 10;
-					for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
+				
+				System.String line = in_Renamed.ReadLine();
+				
+				if (line == null || line.Length == - 1)
+					break;
+				
+				Query query = QueryParser.Parse(line, field, analyzer);
+				System.Console.Out.WriteLine("Searching for: " + query.ToString(field));
+				
+				Hits hits = searcher.Search(query);
+				
+				if (repeat > 0)
+				{
+					// repeat & time as benchmark
+					System.DateTime start = System.DateTime.Now;
+					for (int i = 0; i < repeat; i++)
+					{
+						hits = searcher.Search(query);
+					}
+					System.DateTime end = System.DateTime.Now;
+					System.Console.Out.WriteLine("Time: " + (end.Ticks - start.Ticks) + "ms");
+				}
+				
+				System.Console.Out.WriteLine(hits.Length() + " total matching documents");
+				
+				int HITS_PER_PAGE = 10;
+				for (int start = 0; start < hits.Length(); start += HITS_PER_PAGE)
+				{
+					int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
+					for (int i = start; i < end; i++)
 					{
-						int end = System.Math.Min(hits.Length(), start + HITS_PER_PAGE);
-						for (int i = start; i < end; i++)
+						
+						if (raw)
 						{
-							Document doc = hits.Doc(i);
-							System.String path = doc.Get("path");
-							if (path != null)
-							{
-								System.Console.Out.WriteLine(i + ". " + path);
-							}
-							else
+							// output raw format
+							System.Console.Out.WriteLine("doc=" + hits.Id(i) + " score=" + hits.Score(i));
+							continue;
+						}
+						
+						Document doc = hits.Doc(i);
+						System.String path = doc.Get("path");
+						if (path != null)
+						{
+							System.Console.Out.WriteLine((i + 1) + ". " + path);
+							System.String title = doc.Get("title");
+							if (title != null)
 							{
-								System.String url = doc.Get("url");
-								if (url != null)
-								{
-									System.Console.Out.WriteLine(i + ". " + url);
-									System.Console.Out.WriteLine("   - " + doc.Get("title"));
-								}
-								else
-								{
-									System.Console.Out.WriteLine(i + ". " + "No path nor URL for this document");
-								}
+								System.Console.Out.WriteLine("   Title: " + doc.Get("title"));
 							}
 						}
-						
-						if (hits.Length() > end)
+						else
 						{
-							System.Console.Out.Write("more (y/n) ? ");
-							line = in_Renamed.ReadLine();
-							if (line.Length == 0 || line[0] == 'n')
-								break;
+							System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
 						}
 					}
+					
+					if (queries != null)
+    					// non-interactive
+						break;
+					
+					if (hits.Length() > end)
+					{
+						System.Console.Out.Write("more (y/n) ? ");
+						line = in_Renamed.ReadLine();
+						if (line.Length == 0 || line[0] == 'n')
+							break;
+					}
 				}
-				searcher.Close();
-			}
-			catch (System.Exception e)
-			{
-				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
 			}
+			reader.Close();
 		}
 	}
-}
\ No newline at end of file
+}

Modified: incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Demo/SearchFiles/SearchFiles.csproj?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj (original)
+++ incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj Sat Jun  3 19:41:13 2006
@@ -87,7 +87,7 @@
                 <Reference
                     Name = "Lucene.Net"
                     AssemblyName = "Lucene.Net"
-                    HintPath = "..\..\bin\Release\Lucene.Net.dll"
+                    HintPath = "..\..\Lucene.Net\bin\Debug\Lucene.Net.dll"
                 />
             </References>
         </Build>

Modified: incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj.user
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Demo/SearchFiles/SearchFiles.csproj.user?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj.user (original)
+++ incubator/lucene.net/trunk/C#/src/Demo/SearchFiles/SearchFiles.csproj.user Sat Jun  3 19:41:13 2006
@@ -1,7 +1,7 @@
 <VisualStudioProject>
     <CSHARP LastOpenVersion = "7.10.3077" >
         <Build>
-            <Settings ReferencePath = "E:\Aroush.proj\Lucene.Net-1.4.3.final-002\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3.final-001\bin\Release\;E:\Aroush.proj\Lucene.Net-1.4.3.Final-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3.RC2-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3.RC1-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3-beta.002\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3-beta.001\bin\Release\;E:\Aroush.proj\Lucene.Net-1.4.3-beta.001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.RC1-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.beta-003\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.beta-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-006\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-005\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-004\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-003\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-002\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.alpha-001\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.3\bin\Debug\;E:\Aroush.proj\Highlight.Net-1.4.
 3.RC1-001\Highlight.Net\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.4.3.final-002\bin\Release\;E:\Aroush.proj\Lucene.Net-1.4.3.final-003\bin\Debug\" >
+            <Settings ReferencePath = "E:\Aroush.proj\Lucene.Net-1.9.rc1-002.src\Lucene.Net\bin\Debug\;E:\Aroush.proj\Lucene.Net-1.9.rc1-003.src\Lucene.Net\bin\Debug\" >
                 <Config
                     Name = "Debug"
                     EnableASPDebugging = "false"
@@ -11,12 +11,12 @@
                     RemoteDebugEnabled = "false"
                     RemoteDebugMachine = ""
                     StartAction = "Project"
-                    StartArguments = ""
+                    StartArguments = "-index E:\Aroush.proj\Lucene.Net-1.9.rc1-002.src\Demo\IndexFiles\bin\Debug\index"
                     StartPage = ""
                     StartProgram = ""
                     StartURL = ""
                     StartWorkingDirectory = ""
-                    StartWithIE = "true"
+                    StartWithIE = "false"
                 />
                 <Config
                     Name = "Release"
@@ -32,7 +32,7 @@
                     StartProgram = ""
                     StartURL = ""
                     StartWorkingDirectory = ""
-                    StartWithIE = "true"
+                    StartWithIE = "false"
                 />
             </Settings>
         </Build>

Modified: incubator/lucene.net/trunk/C#/src/HISTORY.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/HISTORY.txt?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/HISTORY.txt (original)
+++ incubator/lucene.net/trunk/C#/src/HISTORY.txt Sat Jun  3 19:41:13 2006
@@ -1,6 +1,34 @@
 Lucene.Net History
 ------------------
 
+
+03Jun06:
+	- Release Lucene.Net.1.9 RC1 build 003 Alpha
+	- Note: This is the first release of Lucene.Net 1.9 to SVN
+	- Note: Added ZIP compression support via reflection.  Thanks to Eyal Post
+	- Note: Fixed bugs in the code which were expose via the NUnit "Test" code
+	- Note: NUnit "Test" code has been ported to Lucene.Net.  Out of 307 tests 58 are failing
+	- Note: There are still some remaing port work to be done; look for thext "Aroush" in the code
+	- Issue: There are still some code not fully ported; search for "Aroush" to find those codes
+	- Issue: The NUnit test code has not been ported yet
+	- Issue: Demo.IndexHtml won't work due to some bug in the area of in-memory stream porting
+
+
+07Feb06:
+	- Release: Lucene.Net.1.9 RC1 build 002 Alpha
+	- Note: This release is based on the current Java code release of 1.9 RC1
+	- Note: This release contains all the fixes currently implemented for 1.4.3 build 004
+	- Note: There are still some remaing port work to be done; look for thext "Aroush" in the code
+	- Issue: The NUnit test code has not been ported yet
+	- Issue: Demo.IndexHtml won't work due to some bug in the area of in-memory stream porting
+
+
+26May05:
+	- Release: Lucene.Net.1.9 RC1 build 001 Alpha
+	- Issue: There are still some code not fully ported; search for "Aroush" to find those codes.
+	- Issue: The NUnit test code has not been ported yet (the current code is still based on 1.4.3 final)
+
+
 15Dec05:
 	- Release: Lucene.Net.1.4.3 final build 004 to the Apache incubator site
 	- Clean-up: VS.Net project settings.
@@ -16,6 +44,12 @@
 	- Release: Lucene.Net.1.4.3 final build 003
 	- Fix: Memory leak -- Thanks to Marcus.
 	- Fix: Remoting class and other misused of System.Object cast -- Thanks to Jason
+
+
+26May05:
+	- Release: Lucene.Net.1.9 RC1 build 001 Alpha
+	- Issue: There are still some code not fully ported; search for "Aroush" to find those codes.
+	- Issue: The NUnit test code has not been ported yet (the current code is still based on 1.4.3 final)
 
 
 22Feb05:

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Analyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Analyzer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Analyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Analyzer.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	
@@ -23,19 +25,17 @@
 	/// Typical implementations first build a Tokenizer, which breaks the stream of
 	/// characters from the Reader into raw Tokens.  One or more TokenFilters may
 	/// then be applied to the output of the Tokenizer.
-	/// </p>
 	/// <p>
 	/// WARNING: You must override one of the methods defined by this class in your
 	/// subclass or the Analyzer will enter an infinite loop.
-	/// </p>
 	/// </summary>
 	public abstract class Analyzer
 	{
 		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
 		/// Reader.  Default implementation forwards to tokenStream(Reader) for 
 		/// compatibility with older version.  Override to allow Analyzer to choose 
-		/// strategy based on document and/or Field.  Must be able to handle null
-		/// Field name for backward compatibility. 
+		/// strategy based on document and/or field.  Must be able to handle null
+		/// field name for backward compatibility. 
 		/// </summary>
 		public virtual TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 		{
@@ -46,13 +46,32 @@
 		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
 		/// Reader.  Provided for backward compatibility only.
 		/// </summary>
-		/// <deprecated> use TokenStream(String, Reader) instead.
+		/// <deprecated> use tokenStream(String, Reader) instead.
 		/// </deprecated>
-		/// <seealso cref="Reader)">
+		/// <seealso cref="TokenStream(String, Reader)">
 		/// </seealso>
 		public virtual TokenStream TokenStream(System.IO.TextReader reader)
 		{
 			return TokenStream(null, reader);
+		}
+		
+		/// <summary> Invoked before indexing a Field instance if
+		/// terms have already been added to that field.  This allows custom
+		/// analyzers to place an automatic position increment gap between
+		/// Field instances using the same field name.  The default value
+		/// position increment gap is 0.  With a 0 position increment gap and
+		/// the typical default token position increment of 1, all terms in a field,
+		/// including across Field instances, are in successive positions, allowing
+		/// exact PhraseQuery matches, for instance, across Field instance boundaries.
+		/// 
+		/// </summary>
+		/// <param name="fieldName">Field name being indexed.
+		/// </param>
+		/// <returns> position increment gap, added to the next token emitted from {@link #TokenStream(String,Reader)}
+		/// </returns>
+		public virtual int GetPositionIncrementGap(System.String fieldName)
+		{
+			return 0;
 		}
 	}
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/CharTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/CharTokenizer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/CharTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/CharTokenizer.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/ISOLatin1AccentFilter.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> A filter that replaces accented characters in the ISO Latin 1 character set 
+	/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+	/// <p>
+	/// For instance, '&agrave;' will be replaced by 'a'.
+	/// <p>
+	/// </summary>
+	public class ISOLatin1AccentFilter : TokenFilter
+	{
+		public ISOLatin1AccentFilter(TokenStream input) : base(input)
+		{
+		}
+		
+		public override Token Next()
+		{
+			Token t = input.Next();
+			if (t == null)
+				return null;
+			// Return a token with filtered characters.
+			return new Token(RemoveAccents(t.TermText()), t.StartOffset(), t.EndOffset(), t.Type());
+		}
+		
+		/// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
+		public static System.String RemoveAccents(System.String input)
+		{
+			System.Text.StringBuilder output = new System.Text.StringBuilder();
+			for (int i = 0; i < input.Length; i++)
+			{
+                long val = input[i];
+
+				switch (input[i])
+				{
+					
+					case '\u00C0':  // À
+					case '\u00C1':  // Ã?
+					case '\u00C2':  // Â
+					case '\u00C3':  // Ã
+					case '\u00C4':  // Ä
+					case '\u00C5':  // Ã…
+						output.Append("A");
+						break;
+					
+					case '\u00C6':  // Æ
+						output.Append("AE");
+						break;
+					
+					case '\u00C7':  // Ç
+						output.Append("C");
+						break;
+					
+					case '\u00C8':  // È
+					case '\u00C9':  // É
+					case '\u00CA':  // Ê
+					case '\u00CB':  // Ë
+						output.Append("E");
+						break;
+					
+					case '\u00CC':  // Ì
+					case '\u00CD':  // Ã?
+					case '\u00CE':  // ÃŽ
+					case '\u00CF':  // Ã?
+						output.Append("I");
+						break;
+					
+					case '\u00D0':  // Ã?
+						output.Append("D");
+						break;
+					
+					case '\u00D1':  // Ñ
+						output.Append("N");
+						break;
+					
+					case '\u00D2':  // Ã’
+					case '\u00D3':  // Ó
+					case '\u00D4':  // Ô
+					case '\u00D5':  // Õ
+					case '\u00D6':  // Ö
+					case '\u00D8':  // Ø
+						output.Append("O");
+						break;
+					
+					case '\u0152':  // Å’
+						output.Append("OE");
+						break;
+					
+					case '\u00DE':  // Þ
+						output.Append("TH");
+						break;
+					
+					case '\u00D9':  // Ù
+					case '\u00DA':  // Ú
+					case '\u00DB':  // Û
+					case '\u00DC':  // Ü
+						output.Append("U");
+						break;
+					
+					case '\u00DD':  // Ã?
+					case '\u0178':  // Ÿ
+						output.Append("Y");
+						break;
+					
+					case '\u00E0':  // à
+					case '\u00E1':  // á
+					case '\u00E2':  // â
+					case '\u00E3':  // ã
+					case '\u00E4':  // ä
+					case '\u00E5':  // å
+						output.Append("a");
+						break;
+					
+					case '\u00E6':  // æ
+						output.Append("ae");
+						break;
+					
+					case '\u00E7':  // ç
+						output.Append("c");
+						break;
+					
+					case '\u00E8':  // è
+					case '\u00E9':  // é
+					case '\u00EA':  // ê
+					case '\u00EB':  // ë
+						output.Append("e");
+						break;
+					
+					case '\u00EC':  // ì
+					case '\u00ED':  // í
+					case '\u00EE':  // î
+					case '\u00EF':  // ï
+						output.Append("i");
+						break;
+					
+					case '\u00F0':  // ð
+						output.Append("d");
+						break;
+					
+					case '\u00F1':  // ñ
+						output.Append("n");
+						break;
+					
+					case '\u00F2':  // ò
+					case '\u00F3':  // ó
+					case '\u00F4':  // ô
+					case '\u00F5':  // õ
+					case '\u00F6':  // ö
+					case '\u00F8':  // ø
+						output.Append("o");
+						break;
+					
+					case '\u0153':  // Å“
+						output.Append("oe");
+						break;
+					
+					case '\u00DF':  // ß
+						output.Append("ss");
+						break;
+					
+					case '\u00FE':  // þ
+						output.Append("th");
+						break;
+					
+					case '\u00F9':  // ù
+					case '\u00FA':  // ú
+					case '\u00FB':  // û
+					case '\u00FC':  // ü
+						output.Append("u");
+						break;
+					
+					case '\u00FD':  // ý
+					case '\u00FF':  // ÿ
+						output.Append("y");
+						break;
+					
+					default: 
+						output.Append(input[i]);
+						break;
+					
+				}
+			}
+			return output.ToString();
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/KeywordAnalyzer.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordAnalyzer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordAnalyzer.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> "Tokenizes" the entire stream as a single token. This is useful
+	/// for data like zip codes, ids, and some product names.
+	/// </summary>
+	public class KeywordAnalyzer : Analyzer
+	{
+		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+		{
+			return new KeywordTokenizer(reader);
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/KeywordTokenizer.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordTokenizer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/KeywordTokenizer.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2004-2005 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Emits the entire input as a single token.</summary>
+	public class KeywordTokenizer : Tokenizer
+	{
+		
+		private const int DEFAULT_BUFFER_SIZE = 256;
+		
+		private bool done;
+		private char[] buffer;
+		
+		public KeywordTokenizer(System.IO.TextReader input) : this(input, DEFAULT_BUFFER_SIZE)
+		{
+		}
+		
+		public KeywordTokenizer(System.IO.TextReader input, int bufferSize) : base(input)
+		{
+			this.buffer = new char[bufferSize];
+			this.done = false;
+		}
+		
+		public override Token Next()
+		{
+			if (!done)
+			{
+				done = true;
+				System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+				int length;
+				while (true)
+				{
+					length = input.Read((System.Char[]) this.buffer, 0, this.buffer.Length);
+					if (length <= 0)
+						break;
+					
+					buffer.Append(this.buffer, 0, length);
+				}
+				System.String text = buffer.ToString();
+				return new Token(text, 0, text.Length);
+			}
+			return null;
+		}
+	}
+}
\ No newline at end of file

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LengthFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/LengthFilter.cs?rev=411501&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LengthFilter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LengthFilter.cs Sat Jun  3 19:41:13 2006
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2004 The Apache Software Foundation
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Analysis
+{
+	
+	/// <summary> Removes words that are too long and too short from the stream.
+	/// 
+	/// </summary>
+	/// <author>  David Spencer
+	/// </author>
+	/// <version>  $Id: LengthFilter.java 347992 2005-11-21 21:41:43Z dnaber $
+	/// </version>
+	public sealed class LengthFilter : TokenFilter
+	{
+		
+		internal int min;
+		internal int max;
+		
+		/// <summary> Build a filter that removes words that are too long or too
+		/// short from the text.
+		/// </summary>
+		public LengthFilter(TokenStream in_Renamed, int min, int max) : base(in_Renamed)
+		{
+			this.min = min;
+			this.max = max;
+		}
+		
+		/// <summary> Returns the next input Token whose termText() is the right len</summary>
+		public override Token Next()
+		{
+			// return the first non-stop word found
+			for (Token token = input.Next(); token != null; token = input.Next())
+			{
+				int len = token.TermText().Length;
+				if (len >= min && len <= max)
+				{
+					return token;
+				}
+				// note: else we ignore it but should we index each part of it?
+			}
+			// reached EOS -- return null
+			return null;
+		}
+	}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LetterTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/LetterTokenizer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LetterTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LetterTokenizer.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/LowerCaseFilter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseFilter.cs Sat Jun  3 19:41:13 2006
@@ -13,14 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	
 	/// <summary> Normalizes token text to lower case.
 	/// 
 	/// </summary>
-	/// <version>  $Id: LowerCaseFilter.java,v 1.4 2004/03/29 22:48:00 cutting Exp $
+	/// <version>  $Id: LowerCaseFilter.java 150259 2004-03-29 22:48:07Z cutting $
 	/// </version>
 	public sealed class LowerCaseFilter : TokenFilter
 	{

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/LowerCaseTokenizer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/LowerCaseTokenizer.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PerFieldAnalyzerWrapper.cs Sat Jun  3 19:41:13 2006
@@ -13,14 +13,30 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	
 	/// <summary> This analyzer is used to facilitate scenarios where different
 	/// fields require different analysis techniques.  Use {@link #addAnalyzer}
-	/// to add a non-default analyzer on a Field name basis.
-	/// See TestPerFieldAnalyzerWrapper.java for example usage.
+	/// to add a non-default analyzer on a field name basis.
+	/// 
+	/// <p>Example usage:
+	/// 
+	/// <pre>
+	/// PerFieldAnalyzerWrapper aWrapper =
+	/// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+	/// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+	/// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+	/// </pre>
+	/// 
+	/// <p>In this example, StandardAnalyzer will be used for all fields except "firstname"
+	/// and "lastname", for which KeywordAnalyzer will be used.
+	/// 
+	/// <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+	/// and query parsing.
 	/// </summary>
 	public class PerFieldAnalyzerWrapper:Analyzer
 	{
@@ -39,18 +55,18 @@
 			this.defaultAnalyzer = defaultAnalyzer;
 		}
 		
-		/// <summary> Defines an analyzer to use for the specified Field.
+		/// <summary> Defines an analyzer to use for the specified field.
 		/// 
 		/// </summary>
-		/// <param name="fieldName">Field name requiring a non-default analyzer.
+		/// <param name="fieldName">field name requiring a non-default analyzer
 		/// </param>
-		/// <param name="analyzer">non-default analyzer to use for Field
+		/// <param name="analyzer">non-default analyzer to use for field
 		/// </param>
 		public virtual void  AddAnalyzer(System.String fieldName, Analyzer analyzer)
 		{
 			analyzerMap[fieldName] = analyzer;
 		}
-
+		
 		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 		{
 			Analyzer analyzer = (Analyzer) analyzerMap[fieldName];
@@ -60,6 +76,12 @@
 			}
 			
 			return analyzer.TokenStream(fieldName, reader);
+		}
+		
+		public override System.String ToString()
+		{
+			// {{Aroush-1.9}} 'analyzerMap.ToString()' may return a different value then Java.
+			return "PerFieldAnalyzerWrapper(" + analyzerMap.ToString() + ", default=" + defaultAnalyzer + ")";
 		}
 	}
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/PorterStemFilter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemFilter.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/PorterStemmer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/PorterStemmer.cs Sat Jun  3 19:41:13 2006
@@ -38,7 +38,9 @@
 [ This version is derived from Release 3, modified by Brian Goetz to
 optimize for fewer object creations.  ]
 */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	
@@ -65,7 +67,7 @@
 		}
 		
 		/// <summary> reset() resets the stemmer so it can stem another word.  If you invoke
-		/// the stemmer by calling add(char) and then stem(), you must call reset()
+		/// the stemmer by calling add(char) and then Stem(), you must call reset()
 		/// before starting another word.
 		/// </summary>
 		public virtual void  Reset()
@@ -74,7 +76,7 @@
 		}
 		
 		/// <summary> Add a character to the word being stemmed.  When you are finished
-		/// adding characters, you can call stem(void) to process the word.
+		/// adding characters, you can call Stem(void) to process the word.
 		/// </summary>
 		public virtual void  Add(char ch)
 		{
@@ -185,7 +187,7 @@
 		
 		/* vowelinstem() is true <=> k0,...j contains a vowel */
 		
-		private bool vowelinstem()
+		private bool Vowelinstem()
 		{
 			int i;
 			for (i = k0; i <= j; i++)
@@ -283,7 +285,7 @@
 		
 		*/
 		
-		private void  step1()
+		private void  Step1()
 		{
 			if (b[k] == 's')
 			{
@@ -299,7 +301,7 @@
 				if (M() > 0)
 					k--;
 			}
-			else if ((Ends("ed") || Ends("ing")) && vowelinstem())
+			else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
 			{
 				k = j;
 				if (Ends("at"))
@@ -321,9 +323,9 @@
 		
 		/* step2() turns terminal y to i when there is another vowel in the stem. */
 		
-		private void  step2()
+		private void  Step2()
 		{
-			if (Ends("y") && vowelinstem())
+			if (Ends("y") && Vowelinstem())
 			{
 				b[k] = 'i';
 				dirty = true;
@@ -334,7 +336,7 @@
 		-ation) maps to -ize etc. note that the string before the suffix must give
 		m() > 0. */
 		
-		private void  step3()
+		private void  Step3()
 		{
 			if (k == k0)
 				return ; /* For Bug 1 */
@@ -453,7 +455,7 @@
 		
 		/* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
 		
-		private void  step4()
+		private void  Step4()
 		{
 			switch (b[k])
 			{
@@ -502,7 +504,7 @@
 		
 		/* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
 		
-		private void  step5()
+		private void  Step5()
 		{
 			if (k == k0)
 				return ; /* for Bug 1 */
@@ -593,7 +595,7 @@
 		
 		/* step6() removes a final -e if m() > 1. */
 		
-		private void  step6()
+		private void  Step6()
 		{
 			j = k;
 			if (b[k] == 'e')
@@ -672,7 +674,7 @@
 			k0 = i0;
 			if (k > k0 + 1)
 			{
-				step1(); step2(); step3(); step4(); step5(); step6();
+				Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
 			}
 			// Also, a word is considered dirty if we lopped off letters
 			// Thanks to Ifigenia Vairelles for pointing this out.
@@ -695,7 +697,7 @@
 			{
 				try
 				{
-					System.IO.BinaryReader in_Renamed = new System.IO.BinaryReader(System.IO.File.Open(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read));
+                    System.IO.BinaryReader in_Renamed = new System.IO.BinaryReader(System.IO.File.Open(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read));
 					byte[] buffer = new byte[1024];
 					int bufferLen, offset, ch;
 					
@@ -737,7 +739,7 @@
 					
 					in_Renamed.Close();
 				}
-				catch (System.IO.IOException )
+				catch (System.IO.IOException)
 				{
 					System.Console.Out.WriteLine("error reading " + args[i]);
 				}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/SimpleAnalyzer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/SimpleAnalyzer.cs Sat Jun  3 19:41:13 2006
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
+
 namespace Lucene.Net.Analysis
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/CharStream.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/CharStream.cs Sat Jun  3 19:41:13 2006
@@ -18,6 +18,21 @@
 using System;
 namespace Lucene.Net.Analysis.Standard
 {
+	
+	/// <summary> This interface describes a character stream that maintains line and
+	/// column number positions of the characters.  It also has the capability
+	/// to backup the stream to some extent.  An implementation of this
+	/// interface is used in the TokenManager implementation generated by
+	/// JavaCCParser.
+	/// 
+	/// All the methods except backup can be implemented in any fashion. backup
+	/// needs to be implemented correctly for the correct operation of the lexer.
+	/// Rest of the methods are all used to get information like line number,
+	/// column number and the String that constitutes a token and are not used
+	/// by the lexer. Hence their implementation won't affect the generated lexer's
+	/// operation.
+	/// </summary>
+	
 	public interface CharStream
 	{
 		
@@ -30,14 +45,14 @@
 		/// <summary> Returns the column position of the character last read.</summary>
 		/// <deprecated> 
 		/// </deprecated>
-		/// <seealso cref="#getEndColumn">
+		/// <seealso cref="getEndColumn">
 		/// </seealso>
 		int GetColumn();
 		
 		/// <summary> Returns the line number of the character last read.</summary>
 		/// <deprecated> 
 		/// </deprecated>
-		/// <seealso cref="#getEndLine">
+		/// <seealso cref="getEndLine">
 		/// </seealso>
 		int GetLine();
 		

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/FastCharStream.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/FastCharStream.cs Sat Jun  3 19:41:13 2006
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 namespace Lucene.Net.Analysis.Standard
 {
@@ -20,7 +21,7 @@
 	/// <summary>An efficient implementation of JavaCC's CharStream interface.  <p>Note that
 	/// this does not do line-number counting, but instead keeps track of the
 	/// character position of the token in the input, as required by Lucene's {@link
-	/// Lucene.Net.Analysis.Token} API. 
+	/// Lucene.Net.analysis.Token} API. 
 	/// </summary>
 	public sealed class FastCharStream : CharStream
 	{
@@ -78,7 +79,6 @@
 			bufferStart += tokenStart;
 			tokenStart = 0;
 			
-
 			int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition);
 			if (charsRead <= 0)
 				throw new System.IO.IOException("read past eof");

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/ParseException.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/ParseException.cs Sat Jun  3 19:41:13 2006
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */
 using System;
 namespace Lucene.Net.Analysis.Standard
@@ -104,13 +105,10 @@
 		/// print the error message in the form:
 		/// ParseException: &lt;result of getMessage&gt;
 		/// </summary>
-		public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal, System.String[] tokenImageVal) : base("")
+		public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal, System.String[] tokenImageVal):base("")
 		{
-            eol = System.Configuration.ConfigurationSettings.AppSettings.Get("line.separator");
             if (eol == null)
-            {
-                eol = "\n";
-            }
+                eol = @"\n";
 			specialConstructor = true;
 			currentToken = currentTokenVal;
 			expectedTokenSequences = expectedTokenSequencesVal;
@@ -128,24 +126,16 @@
 		
 		public ParseException() : base()
 		{
-            eol = System.Configuration.ConfigurationSettings.AppSettings.Get("line.separator");
             if (eol == null)
-            {
-                eol = "\n";
-            }
-
-			specialConstructor = false;
+                eol = @"\n";
+            specialConstructor = false;
 		}
 		
 		public ParseException(System.String message) : base(message)
 		{
-            eol = System.Configuration.ConfigurationSettings.AppSettings.Get("line.separator");
             if (eol == null)
-            {
-                eol = "\n";
-            }
-
-			specialConstructor = false;
+                eol = @"\n";
+            specialConstructor = false;
 		}
 		
 		/// <summary> This variable determines which constructor was used to create

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardAnalyzer.cs Sat Jun  3 19:41:13 2006
@@ -13,16 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Lucene.Net.Analysis;
+
 namespace Lucene.Net.Analysis.Standard
 {
 	
 	/// <summary> Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
-	/// LowerCaseFilter} and {@link StopFilter}.
+	/// LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
 	/// 
 	/// </summary>
-	/// <version>  $Id: StandardAnalyzer.java,v 1.8 2004/03/29 22:48:01 cutting Exp $
+	/// <version>  $Id: StandardAnalyzer.java 219090 2005-07-14 20:36:28Z dnaber $
 	/// </version>
 	public class StandardAnalyzer : Analyzer
 	{
@@ -33,15 +35,37 @@
 		/// </summary>
 		public static readonly System.String[] STOP_WORDS;
 		
-		/// <summary>Builds an analyzer. </summary>
+		/// <summary>Builds an analyzer with the default stop words ({@link #STOP_WORDS}). </summary>
 		public StandardAnalyzer() : this(STOP_WORDS)
 		{
 		}
 		
 		/// <summary>Builds an analyzer with the given stop words. </summary>
+		public StandardAnalyzer(System.Collections.Hashtable stopWords)
+		{
+			stopSet = stopWords;
+		}
+		
+		/// <summary>Builds an analyzer with the given stop words. </summary>
 		public StandardAnalyzer(System.String[] stopWords)
 		{
 			stopSet = StopFilter.MakeStopSet(stopWords);
+		}
+		
+		/// <summary>Builds an analyzer with the stop words from the given file.</summary>
+		/// <seealso cref="WordlistLoader.GetWordSet(File)">
+		/// </seealso>
+		public StandardAnalyzer(System.IO.FileInfo stopwords)
+		{
+			stopSet = WordlistLoader.GetWordSet(stopwords);
+		}
+		
+		/// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+		/// <seealso cref="WordlistLoader.GetWordSet(Reader)">
+		/// </seealso>
+		public StandardAnalyzer(System.IO.TextReader stopwords)
+		{
+			stopSet = WordlistLoader.GetWordSet(stopwords);
 		}
 		
 		/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardFilter.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs Sat Jun  3 19:41:13 2006
@@ -13,8 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 using System;
 using Lucene.Net.Analysis;
+
 namespace Lucene.Net.Analysis.Standard
 {
 	

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Sat Jun  3 19:41:13 2006
@@ -13,14 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */
 using System;
+
 namespace Lucene.Net.Analysis.Standard
 {
 	
 	/// <summary>A grammar-based tokenizer constructed with JavaCC.
 	/// 
-	/// <p> This should be a good tokenizer for most European-language documents.
+	/// <p> This should be a good tokenizer for most European-language documents:
+	/// 
+	/// <ul>
+	/// <li>Splits words at punctuation characters, removing punctuation. However, a 
+	/// dot that's not followed by whitespace is considered part of a token.
+	/// <li>Splits words at hyphens, unless there's a number in the token, in which case
+	/// the whole token is interpreted as a product number and is not split.
+	/// <li>Recognizes email addresses and internet hostnames as one token.
+	/// </ul>
 	/// 
 	/// <p>Many applications have specific tokenizer needs.  If this tokenizer does
 	/// not suit your application, please consider copying this source code
@@ -28,6 +38,7 @@
 	/// </summary>
 	public class StandardTokenizer : Lucene.Net.Analysis.Tokenizer
 	{
+		
 		/// <summary>Constructs a tokenizer for this Reader. </summary>
 		public StandardTokenizer(System.IO.TextReader reader) : this(new FastCharStream(reader))
 		{
@@ -72,8 +83,8 @@
 					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.NUM);
 					break;
 				
-				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJK: 
-					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJK);
+				case Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ: 
+					token = Jj_consume_token(Lucene.Net.Analysis.Standard.StandardTokenizerConstants.CJ);
 					break;
 				
 				case 0: 
@@ -110,7 +121,7 @@
             base.Close(); 
         }
 
-		public StandardTokenizerTokenManager token_source;
+        public StandardTokenizerTokenManager token_source;
 		public Token token, jj_nt;
 		private int jj_ntk;
 		private int jj_gen;
@@ -125,20 +136,20 @@
 		{
 			token_source = new StandardTokenizerTokenManager(stream);
 			token = new Token();
-			jj_ntk = -1;
+			jj_ntk = - 1;
 			jj_gen = 0;
 			for (int i = 0; i < 1; i++)
-				jj_la1[i] = -1;
+				jj_la1[i] = - 1;
 		}
 		
 		public virtual void  ReInit(CharStream stream)
 		{
 			token_source.ReInit(stream);
 			token = new Token();
-			jj_ntk = -1;
+			jj_ntk = - 1;
 			jj_gen = 0;
 			for (int i = 0; i < 1; i++)
-				jj_la1[i] = -1;
+				jj_la1[i] = - 1;
 		}
 		
 		public StandardTokenizer(StandardTokenizerTokenManager tm)
@@ -218,8 +229,8 @@
 		public virtual ParseException GenerateParseException()
 		{
 			jj_expentries.Clear();
-			bool[] la1tokens = new bool[15];
-			for (int i = 0; i < 15; i++)
+			bool[] la1tokens = new bool[16];
+			for (int i = 0; i < 16; i++)
 			{
 				la1tokens[i] = false;
 			}
@@ -241,7 +252,7 @@
 					}
 				}
 			}
-			for (int i = 0; i < 15; i++)
+			for (int i = 0; i < 16; i++)
 			{
 				if (la1tokens[i])
 				{

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj Sat Jun  3 19:41:13 2006
@@ -1,202 +1,177 @@
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation.  All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- *    if any, must include the following acknowledgment:
- *       "This product includes software developed by the
- *        Apache Software Foundation (http://www.apache.org/)."
- *    Alternately, this acknowledgment may appear in the software itself,
- *    if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- *    "Apache Lucene" must not be used to endorse or promote products
- *    derived from this software without prior written permission. For
- *    written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- *    "Apache Lucene", nor may "Apache" appear in their name, without
- *    prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation.  For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
-
-options {
-  STATIC = false;
-//IGNORE_CASE = true;
-//BUILD_PARSER = false;
-  UNICODE_INPUT = true;
-  USER_CHAR_STREAM = true;
-  OPTIMIZE_TOKEN_MANAGER = true;
-//DEBUG_TOKEN_MANAGER = true;
-}
-PARSER_BEGIN(StandardTokenizer)
-
-package org.apache.lucene.analysis.standard;
-
-import java.io.*;
-
-/** A grammar-based tokenizer constructed with JavaCC.
- *
- * <p> This should be a good tokenizer for most European-language documents.
- *
- * <p>Many applications have specific tokenizer needs.  If this tokenizer does
- * not suit your application, please consider copying this source code
- * directory to your project and maintaining your own grammar-based tokenizer.
- */
-public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer {
-
-  /** Constructs a tokenizer for this Reader. */
-  public StandardTokenizer(Reader reader) {
-    this(new FastCharStream(reader));
-    this.input = reader;
-  }
-}
-
-PARSER_END(StandardTokenizer)
-
-TOKEN : {					  // token patterns
-
-  // basic word: a sequence of digits & letters
-  <ALPHANUM: (<LETTER>|<DIGIT>)+ >
-
-  // internal apostrophes: O'Reilly, you're, O'Reilly's
-  // use a post-filter to remove possesives
-| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ >
-
-  // acronyms: U.S.A., I.B.M., etc.
-  // use a post-filter to remove dots
-| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ >
-
-  // company names like AT&T and Excite@Home.
-| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> >
-
-  // email addresses
-| <EMAIL: <ALPHANUM> (("."|"-"|"_") <ALPHANUM>)* "@" <ALPHANUM> (("."|"-") <ALPHANUM>)+ >
-
-  // hostname
-| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
-
-  // floating point, serial, model numbers, ip addresses, etc.
-  // every other segment must have at least one digit
-| <NUM: (<ALPHANUM> <P> <HAS_DIGIT>
-       | <HAS_DIGIT> <P> <ALPHANUM>
-       | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
-       | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
-       | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
-       | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
-        )
-  >
-| <#P: ("_"|"-"|"/"|"."|",") >
-| <#HAS_DIGIT:					  // at least one digit
-    (<LETTER>|<DIGIT>)*
-    <DIGIT>
-    (<LETTER>|<DIGIT>)*
-  >
-
-| < #ALPHA: (<LETTER>)+>
-| < #LETTER:					  // unicode letters
-      [
-       "\u0041"-"\u005a",
-       "\u0061"-"\u007a",
-       "\u00c0"-"\u00d6",
-       "\u00d8"-"\u00f6",
-       "\u00f8"-"\u00ff",
-       "\u0100"-"\u1fff"
-      ]
-  >
-| < CJK:                                          // non-alphabets
-      [
-       "\u3040"-"\u318f",
-       "\u3300"-"\u337f",
-       "\u3400"-"\u3d2d",
-       "\u4e00"-"\u9fff",
-       "\uf900"-"\ufaff"
-      ]
-  >
-| < #DIGIT:					  // unicode digits
-      [
-       "\u0030"-"\u0039",
-       "\u0660"-"\u0669",
-       "\u06f0"-"\u06f9",
-       "\u0966"-"\u096f",
-       "\u09e6"-"\u09ef",
-       "\u0a66"-"\u0a6f",
-       "\u0ae6"-"\u0aef",
-       "\u0b66"-"\u0b6f",
-       "\u0be7"-"\u0bef",
-       "\u0c66"-"\u0c6f",
-       "\u0ce6"-"\u0cef",
-       "\u0d66"-"\u0d6f",
-       "\u0e50"-"\u0e59",
-       "\u0ed0"-"\u0ed9",
-       "\u1040"-"\u1049"
-      ]
-  >
-}
-
-SKIP : {					  // skip unrecognized chars
- <NOISE: ~[] >
-}
-
-/** Returns the next token in the stream, or null at EOS.
- * <p>The returned token's type is set to an element of {@link
- * StandardTokenizerConstants#tokenImage}.
- */
-org.apache.lucene.analysis.Token next() throws IOException :
-{
-  Token token = null;
-}
-{
-  ( token = <ALPHANUM> |
-    token = <APOSTROPHE> |
-    token = <ACRONYM> |
-    token = <COMPANY> |
-    token = <EMAIL> |
-    token = <HOST> |
-    token = <NUM> |
-    token = <CJK> |
-    token = <EOF>
-   )
-    {
-      if (token.kind == EOF) {
-	return null;
-      } else {
-	return
-	  new org.apache.lucene.analysis.Token(token.image,
-					token.beginColumn,token.endColumn,
-					tokenImage[token.kind]);
-      }
-    }
-}
+/**f
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+options {
+  STATIC = false;
+//IGNORE_CASE = true;
+//BUILD_PARSER = false;
+  UNICODE_INPUT = true;
+  USER_CHAR_STREAM = true;
+  OPTIMIZE_TOKEN_MANAGER = true;
+//DEBUG_TOKEN_MANAGER = true;
+}
+PARSER_BEGIN(StandardTokenizer)
+
+package org.apache.lucene.analysis.standard;
+
+import java.io.*;
+
+/** A grammar-based tokenizer constructed with JavaCC.
+ *
+ * <p> This should be a good tokenizer for most European-language documents:
+ *
+ * <ul>
+ *   <li>Splits words at punctuation characters, removing punctuation. However, a 
+ *     dot that's not followed by whitespace is considered part of a token.
+ *   <li>Splits words at hyphens, unless there's a number in the token, in which case
+ *     the whole token is interpreted as a product number and is not split.
+ *   <li>Recognizes email addresses and internet hostnames as one token.
+ * </ul>
+ *
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ */
+public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer {
+
+  /** Constructs a tokenizer for this Reader. */
+  public StandardTokenizer(Reader reader) {
+    this(new FastCharStream(reader));
+    this.input = reader;
+  }
+}
+
+PARSER_END(StandardTokenizer)
+
+TOKEN : {					  // token patterns
+
+  // basic word: a sequence of digits & letters
+  <ALPHANUM: (<LETTER>|<DIGIT>|<KOREAN>)+ >
+
+  // internal apostrophes: O'Reilly, you're, O'Reilly's
+  // use a post-filter to remove possesives
+| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ >
+
+  // acronyms: U.S.A., I.B.M., etc.
+  // use a post-filter to remove dots
+| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ >
+
+  // company names like AT&T and Excite@Home.
+| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> >
+
+  // email addresses
+| <EMAIL: <ALPHANUM> (("."|"-"|"_") <ALPHANUM>)* "@" <ALPHANUM> (("."|"-") <ALPHANUM>)+ >
+
+  // hostname
+| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
+
+  // floating point, serial, model numbers, ip addresses, etc.
+  // every other segment must have at least one digit
+| <NUM: (<ALPHANUM> <P> <HAS_DIGIT>
+       | <HAS_DIGIT> <P> <ALPHANUM>
+       | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+       | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+        )
+  >
+| <#P: ("_"|"-"|"/"|"."|",") >
+| <#HAS_DIGIT:					  // at least one digit
+    (<LETTER>|<DIGIT>)*
+    <DIGIT>
+    (<LETTER>|<DIGIT>)*
+  >
+
+| < #ALPHA: (<LETTER>)+>
+| < #LETTER:					  // unicode letters
+      [
+       "\u0041"-"\u005a",
+       "\u0061"-"\u007a",
+       "\u00c0"-"\u00d6",
+       "\u00d8"-"\u00f6",
+       "\u00f8"-"\u00ff",
+       "\u0100"-"\u1fff"
+      ]
+  >
+| < CJ:                                          // Chinese, Japanese
+      [
+       "\u3040"-"\u318f",
+       "\u3300"-"\u337f",
+       "\u3400"-"\u3d2d",
+       "\u4e00"-"\u9fff",
+       "\uf900"-"\ufaff"
+      ]
+  >
+| < KOREAN:                                          // Korean
+      [
+       "\uac00"-"\ud7af"
+      ]
+  >
+| < #DIGIT:					  // unicode digits
+      [
+       "\u0030"-"\u0039",
+       "\u0660"-"\u0669",
+       "\u06f0"-"\u06f9",
+       "\u0966"-"\u096f",
+       "\u09e6"-"\u09ef",
+       "\u0a66"-"\u0a6f",
+       "\u0ae6"-"\u0aef",
+       "\u0b66"-"\u0b6f",
+       "\u0be7"-"\u0bef",
+       "\u0c66"-"\u0c6f",
+       "\u0ce6"-"\u0cef",
+       "\u0d66"-"\u0d6f",
+       "\u0e50"-"\u0e59",
+       "\u0ed0"-"\u0ed9",
+       "\u1040"-"\u1049"
+      ]
+  >
+}
+
+SKIP : {					  // skip unrecognized chars
+ <NOISE: ~[] >
+}
+
+/** Returns the next token in the stream, or null at EOS.
+ * <p>The returned token's type is set to an element of {@link
+ * StandardTokenizerConstants#tokenImage}.
+ */
+org.apache.lucene.analysis.Token next() throws IOException :
+{
+  Token token = null;
+}
+{
+  ( token = <ALPHANUM> |
+    token = <APOSTROPHE> |
+    token = <ACRONYM> |
+    token = <COMPANY> |
+    token = <EMAIL> |
+    token = <HOST> |
+    token = <NUM> |
+    token = <CJ> |
+    token = <EOF>
+   )
+    {
+      if (token.kind == EOF) {
+	return null;
+      } else {
+	return
+	  new org.apache.lucene.analysis.Token(token.image,
+					token.beginColumn,token.endColumn,
+					tokenImage[token.kind]);
+      }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs?rev=411501&r1=411500&r2=411501&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs Sat Jun  3 19:41:13 2006
@@ -13,8 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /* Generated By:JavaCC: Do not edit this line. StandardTokenizerConstants.java */
 using System;
+
 namespace Lucene.Net.Analysis.Standard
 {
 	
@@ -32,16 +34,11 @@
 		public const int HAS_DIGIT = 9;
 		public const int ALPHA = 10;
 		public const int LETTER = 11;
-		public const int CJK = 12;
-		public const int DIGIT = 13;
-		public const int NOISE = 14;
+		public const int CJ = 12;
+		public const int KOREAN = 13;
+		public const int DIGIT = 14;
+		public const int NOISE = 15;
 		public const int DEFAULT = 0;
-		public static System.String[] tokenImage = new System.String[]
-            {
-                "<EOF>", "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", 
-                "<EMAIL>", "<HOST>", "<NUM>", "<P>", "<HAS_DIGIT>", "<ALPHA>", 
-                "<LETTER>", "<CJK>", "<DIGIT>", "<NOISE>"
-            };
+		public static System.String[] tokenImage = new System.String[]{"<EOF>", "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<P>", "<HAS_DIGIT>", "<ALPHA>", "<LETTER>", "<CJ>", "<KOREAN>", "<DIGIT>", "<NOISE>"};
 	}
-
 }



Mime
View raw message