lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dougs...@apache.org
Subject svn commit: r798995 [2/35] - in /incubator/lucene.net/trunk/C#/src: Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/ Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net/...
Date Wed, 29 Jul 2009 18:04:24 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardFilter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardFilter.cs Wed Jul 29 18:04:12 2009
@@ -25,38 +25,46 @@
 {
 	
 	/// <summary>Normalizes tokens extracted with {@link StandardTokenizer}. </summary>
-	
 	public sealed class StandardFilter:TokenFilter
 	{
+        private static readonly System.String APOSTROPHE_TYPE;
+        private static readonly System.String ACRONYM_TYPE;
+
+        static StandardFilter()
+        {
+            APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+            ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+        }
 		
-		
-		/// <summary>Construct filtering <i>in</i>. </summary>
-		public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
-		{
-		}
-		
-		private static readonly System.String APOSTROPHE_TYPE;
-		private static readonly System.String ACRONYM_TYPE;
-		
-		/// <summary>Returns the next token in the stream, or null at EOS.
+        /// <summary>Construct filtering <i>in</i>. </summary>
+        public StandardFilter(TokenStream in_Renamed)
+            : base(in_Renamed)
+        {
+        }
+
+        /// <summary>Returns the next token in the stream, or null at EOS.
 		/// <p>Removes <tt>'s</tt> from the end of words.
 		/// <p>Removes dots from acronyms.
 		/// </summary>
-		public override Token Next(Token result)
+		public override Token Next(/* in */ Token reusableToken)
 		{
-			Token t = input.Next(result);
+            System.Diagnostics.Debug.Assert(reusableToken != null);
+			Token nextToken = input.Next(reusableToken);
 			
-			if (t == null)
+			if (nextToken == null)
 				return null;
 			
-			char[] buffer = t.TermBuffer();
-			int bufferLength = t.TermLength();
-			System.String type = t.Type();
+			char[] buffer = nextToken.TermBuffer();
+			int bufferLength = nextToken.TermLength();
+			System.String type = nextToken.Type();
 			
-			if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+			if (type == APOSTROPHE_TYPE && 
+                bufferLength >= 2 &&
+                buffer[bufferLength - 2] == '\'' &&
+                (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
 			{
 				// Strip last 2 characters off
-				t.SetTermLength(bufferLength - 2);
+				nextToken.SetTermLength(bufferLength - 2);
 			}
 			else if (type == ACRONYM_TYPE)
 			{
@@ -68,15 +76,10 @@
 					if (c != '.')
 						buffer[upto++] = c;
 				}
-				t.SetTermLength(upto);
+				nextToken.SetTermLength(upto);
 			}
 			
-			return t;
-		}
-		static StandardFilter()
-		{
-			APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
-			ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+			return nextToken;
 		}
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.cs Wed Jul 29 18:04:12 2009
@@ -135,8 +135,9 @@
 		*
 		* @see Lucene.Net.Analysis.TokenStream#next()
 		*/
-		public override Token Next(Token result)
+		public override Token Next(/* in */ Token reusableToken)
 		{
+            System.Diagnostics.Debug.Assert(reusableToken != null);
 			int posIncr = 1;
 			
 			while (true)
@@ -150,12 +151,12 @@
 				
 				if (scanner.Yylength() <= maxTokenLength)
 				{
-					result.Clear();
-					result.SetPositionIncrement(posIncr);
-					scanner.GetText(result);
+					reusableToken.Clear();
+					reusableToken.SetPositionIncrement(posIncr);
+					scanner.GetText(reusableToken);
 					int start = scanner.Yychar();
-					result.SetStartOffset(start);
-					result.SetEndOffset(start + result.TermLength());
+					reusableToken.SetStartOffset(start);
+					reusableToken.SetEndOffset(start + reusableToken.TermLength());
 					// This 'if' should be removed in the next release. For now, it converts
 					// invalid acronyms to HOST. When removed, only the 'else' part should
 					// remain.
@@ -163,19 +164,19 @@
 					{
 						if (replaceInvalidAcronym)
 						{
-							result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
-							result.SetTermLength(result.TermLength() - 1); // remove extra '.'
+							reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
+							reusableToken.SetTermLength(reusableToken.TermLength() - 1); // remove extra '.'
 						}
 						else
 						{
-							result.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
+							reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
 						}
 					}
 					else
 					{
-						result.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
+						reusableToken.SetType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
 					}
-					return result;
+					return reusableToken;
 				}
 				// When we skip a too-long term, we still increment the
 				// position increment

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.cs Wed Jul 29 18:04:12 2009
@@ -15,7 +15,16 @@
  * limitations under the License.
  */
 
-/* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */
+/*
+
+NOTE: if you change this file and need to regenerate the tokenizer,
+      remember to use JRE 1.4 when running jflex (before Lucene 3.0).
+      This grammar now uses constructs (eg :digit:) whose meaning can
+      vary according to the JRE used to run jflex.  See
+      https://issues.apache.org/jira/browse/LUCENE-1126 for details
+
+*/
+
 using System;
 
 using Token = Lucene.Net.Analysis.Token;
@@ -42,19 +51,140 @@
 		public const int YYINITIAL = 0;
 		
 		/// <summary> Translates characters to character classes</summary>
-		private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000E\x0001\x0000\x0001\x0000\x0001\x000D\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0003" + "\x0001\x0001\x0004\x0000\x0001\x0007\x0001\x0005\x0001\x0002\x0001\x0007\x000A\x0009\x0006\x0000\x0001\x0004\x001A\x0008" + "\x0004\x0000\x0001\x0006\x0001\x0000\x001A\x0008\x0045\x0000\x0017\x0008\x0001\x0000\x001F\x0008\x0001\x0000\u0568\x0008" + "\x000A\x000A\x0086\x0008\x000A\x000A\u026c\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x0076\x0008\x000A\x000A\x0077\x0008\x0009\x000A\x0076\x0008\x000A\x000A\x0076\x0008\x000A\x000A\x0076\x0008" + "\x000A\x000A\x00E0\x0008\x000A\x000A\x0076\x0008\x000A\x000A\u0166\x0008\x000A\x000A\x00B6\x0008\u0100\x0008\u0e00\x0008" + "\u1040\x0000\u0150\x000C\x0060\x0000\x0010\x000C\u0100\x0000\x0080\x000C\x0080\x0000\u19c0\x000C\x0040\x0000\u5200\x000C" + "\u0c00\x0000\u2bb0\x000B\u2150\x0000\u0200\x000C\u0465\x0000\x003B
 \x000C\x003D\x0008\x0023\x0000";
-		
+		private const System.String ZZ_CMAP_PACKED = 
+            "\x0009\x0000\x0001\x0000\x0001\x000d\x0001\x0000\x0001\x0000"+
+            "\x0001\x000c\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005"+
+            "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004"+
+            "\x0001\x0009\x000a\x0002\x0006\x0000\x0001\x0006\x001a\x000a"+
+            "\x0004\x0000\x0001\x0008\x0001\x0000\x001a\x000a\x002f\x0000"+
+            "\x0001\x000a\x000a\x0000\x0001\x000a\x0004\x0000\x0001\x000a"+
+            "\x0005\x0000\x0017\x000a\x0001\x0000\x001f\x000a\x0001\x0000"+
+            "\u0128\x000a\x0002\x0000\x0012\x000a\x001c\x0000\x005e\x000a"+
+            "\x0002\x0000\x0009\x000a\x0002\x0000\x0007\x000a\x000e\x0000"+
+            "\x0002\x000a\x000e\x0000\x0005\x000a\x0009\x0000\x0001\x000a"+
+            "\x008b\x0000\x0001\x000a\x000b\x0000\x0001\x000a\x0001\x0000"+
+            "\x0003\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0014\x000a"+
+            "\x0001\x0000\x002c\x000a\x0001\x0000\x0008\x000a\x0002\x0000"+
+            "\x001a\x000a\x000c\x0000\x0082\x000a\x000a\x0000\x0039\x000a"+
+            "\x0002\x0000\x0002\x000a\x0002\x0000\x0002\x000a\x0003\x0000"+
+            "\x0026\x000a\x0002\x0000\x0002\x000a\x0037\x0000\x0026\x000a"+
+            "\x0002\x0000\x0001\x000a\x0007\x0000\x0027\x000a\x0048\x0000"+
+            "\x001b\x000a\x0005\x0000\x0003\x000a\x002e\x0000\x001a\x000a"+
+            "\x0005\x0000\x000b\x000a\x0015\x0000\x000a\x0002\x0007\x0000"+
+            "\x0063\x000a\x0001\x0000\x0001\x000a\x000f\x0000\x0002\x000a"+
+            "\x0009\x0000\x000a\x0002\x0003\x000a\x0013\x0000\x0001\x000a"+
+            "\x0001\x0000\x001b\x000a\x0053\x0000\x0026\x000a\u015f\x0000"+
+            "\x0035\x000a\x0003\x0000\x0001\x000a\x0012\x0000\x0001\x000a"+
+            "\x0007\x0000\x000a\x000a\x0004\x0000\x000a\x0002\x0015\x0000"+
+            "\x0008\x000a\x0002\x0000\x0002\x000a\x0002\x0000\x0016\x000a"+
+            "\x0001\x0000\x0007\x000a\x0001\x0000\x0001\x000a\x0003\x0000"+
+            "\x0004\x000a\x0022\x0000\x0002\x000a\x0001\x0000\x0003\x000a"+
+            "\x0004\x0000\x000a\x0002\x0002\x000a\x0013\x0000\x0006\x000a"+
+            "\x0004\x0000\x0002\x000a\x0002\x0000\x0016\x000a\x0001\x0000"+
+            "\x0007\x000a\x0001\x0000\x0002\x000a\x0001\x0000\x0002\x000a"+
+            "\x0001\x0000\x0002\x000a\x001f\x0000\x0004\x000a\x0001\x0000"+
+            "\x0001\x000a\x0007\x0000\x000a\x0002\x0002\x0000\x0003\x000a"+
+            "\x0010\x0000\x0007\x000a\x0001\x0000\x0001\x000a\x0001\x0000"+
+            "\x0003\x000a\x0001\x0000\x0016\x000a\x0001\x0000\x0007\x000a"+
+            "\x0001\x0000\x0002\x000a\x0001\x0000\x0005\x000a\x0003\x0000"+
+            "\x0001\x000a\x0012\x0000\x0001\x000a\x000f\x0000\x0001\x000a"+
+            "\x0005\x0000\x000a\x0002\x0015\x0000\x0008\x000a\x0002\x0000"+
+            "\x0002\x000a\x0002\x0000\x0016\x000a\x0001\x0000\x0007\x000a"+
+            "\x0001\x0000\x0002\x000a\x0002\x0000\x0004\x000a\x0003\x0000"+
+            "\x0001\x000a\x001e\x0000\x0002\x000a\x0001\x0000\x0003\x000a"+
+            "\x0004\x0000\x000a\x0002\x0015\x0000\x0006\x000a\x0003\x0000"+
+            "\x0003\x000a\x0001\x0000\x0004\x000a\x0003\x0000\x0002\x000a"+
+            "\x0001\x0000\x0001\x000a\x0001\x0000\x0002\x000a\x0003\x0000"+
+            "\x0002\x000a\x0003\x0000\x0003\x000a\x0003\x0000\x0008\x000a"+
+            "\x0001\x0000\x0003\x000a\x002d\x0000\x0009\x0002\x0015\x0000"+
+            "\x0008\x000a\x0001\x0000\x0003\x000a\x0001\x0000\x0017\x000a"+
+            "\x0001\x0000\x000a\x000a\x0001\x0000\x0005\x000a\x0026\x0000"+
+            "\x0002\x000a\x0004\x0000\x000a\x0002\x0015\x0000\x0008\x000a"+
+            "\x0001\x0000\x0003\x000a\x0001\x0000\x0017\x000a\x0001\x0000"+
+            "\x000a\x000a\x0001\x0000\x0005\x000a\x0024\x0000\x0001\x000a"+
+            "\x0001\x0000\x0002\x000a\x0004\x0000\x000a\x0002\x0015\x0000"+
+            "\x0008\x000a\x0001\x0000\x0003\x000a\x0001\x0000\x0017\x000a"+
+            "\x0001\x0000\x0010\x000a\x0026\x0000\x0002\x000a\x0004\x0000"+
+            "\x000a\x0002\x0015\x0000\x0012\x000a\x0003\x0000\x0018\x000a"+
+            "\x0001\x0000\x0009\x000a\x0001\x0000\x0001\x000a\x0002\x0000"+
+            "\x0007\x000a\x0039\x0000\x0001\x0001\x0030\x000a\x0001\x0001"+
+            "\x0002\x000a\x000c\x0001\x0007\x000a\x0009\x0001\x000a\x0002"+
+            "\x0027\x0000\x0002\x000a\x0001\x0000\x0001\x000a\x0002\x0000"+
+            "\x0002\x000a\x0001\x0000\x0001\x000a\x0002\x0000\x0001\x000a"+
+            "\x0006\x0000\x0004\x000a\x0001\x0000\x0007\x000a\x0001\x0000"+
+            "\x0003\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0001\x000a"+
+            "\x0002\x0000\x0002\x000a\x0001\x0000\x0004\x000a\x0001\x0000"+
+            "\x0002\x000a\x0009\x0000\x0001\x000a\x0002\x0000\x0005\x000a"+
+            "\x0001\x0000\x0001\x000a\x0009\x0000\x000a\x0002\x0002\x0000"+
+            "\x0002\x000a\x0022\x0000\x0001\x000a\x001f\x0000\x000a\x0002"+
+            "\x0016\x0000\x0008\x000a\x0001\x0000\x0022\x000a\x001d\x0000"+
+            "\x0004\x000a\x0074\x0000\x0022\x000a\x0001\x0000\x0005\x000a"+
+            "\x0001\x0000\x0002\x000a\x0015\x0000\x000a\x0002\x0006\x0000"+
+            "\x0006\x000a\x004a\x0000\x0026\x000a\x000a\x0000\x0027\x000a"+
+            "\x0009\x0000\x005a\x000a\x0005\x0000\x0044\x000a\x0005\x0000"+
+            "\x0052\x000a\x0006\x0000\x0007\x000a\x0001\x0000\x003f\x000a"+
+            "\x0001\x0000\x0001\x000a\x0001\x0000\x0004\x000a\x0002\x0000"+
+            "\x0007\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0004\x000a"+
+            "\x0002\x0000\x0027\x000a\x0001\x0000\x0001\x000a\x0001\x0000"+
+            "\x0004\x000a\x0002\x0000\x001f\x000a\x0001\x0000\x0001\x000a"+
+            "\x0001\x0000\x0004\x000a\x0002\x0000\x0007\x000a\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0004\x000a\x0002\x0000\x0007\x000a"+
+            "\x0001\x0000\x0007\x000a\x0001\x0000\x0017\x000a\x0001\x0000"+
+            "\x001f\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0004\x000a"+
+            "\x0002\x0000\x0007\x000a\x0001\x0000\x0027\x000a\x0001\x0000"+
+            "\x0013\x000a\x000e\x0000\x0009\x0002\x002e\x0000\x0055\x000a"+
+            "\x000c\x0000\u026c\x000a\x0002\x0000\x0008\x000a\x000a\x0000"+
+            "\x001a\x000a\x0005\x0000\x004b\x000a\x0095\x0000\x0034\x000a"+
+            "\x002c\x0000\x000a\x0002\x0026\x0000\x000a\x0002\x0006\x0000"+
+            "\x0058\x000a\x0008\x0000\x0029\x000a\u0557\x0000\x009c\x000a"+
+            "\x0004\x0000\x005a\x000a\x0006\x0000\x0016\x000a\x0002\x0000"+
+            "\x0006\x000a\x0002\x0000\x0026\x000a\x0002\x0000\x0006\x000a"+
+            "\x0002\x0000\x0008\x000a\x0001\x0000\x0001\x000a\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x001f\x000a"+
+            "\x0002\x0000\x0035\x000a\x0001\x0000\x0007\x000a\x0001\x0000"+
+            "\x0001\x000a\x0003\x0000\x0003\x000a\x0001\x0000\x0007\x000a"+
+            "\x0003\x0000\x0004\x000a\x0002\x0000\x0006\x000a\x0004\x0000"+
+            "\x000d\x000a\x0005\x0000\x0003\x000a\x0001\x0000\x0007\x000a"+
+            "\x0082\x0000\x0001\x000a\x0082\x0000\x0001\x000a\x0004\x0000"+
+            "\x0001\x000a\x0002\x0000\x000a\x000a\x0001\x0000\x0001\x000a"+
+            "\x0003\x0000\x0005\x000a\x0006\x0000\x0001\x000a\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0004\x000a"+
+            "\x0001\x0000\x0003\x000a\x0001\x0000\x0007\x000a\u0ecb\x0000"+
+            "\x0002\x000a\x002a\x0000\x0005\x000a\x000a\x0000\x0001\x000b"+
+            "\x0054\x000b\x0008\x000b\x0002\x000b\x0002\x000b\x005a\x000b"+
+            "\x0001\x000b\x0003\x000b\x0006\x000b\x0028\x000b\x0003\x000b"+
+            "\x0001\x0000\x005e\x000a\x0011\x0000\x0018\x000a\x0038\x0000"+
+            "\x0010\x000b\u0100\x0000\x0080\x000b\x0080\x0000\u19b6\x000b"+
+            "\x000a\x000b\x0040\x0000\u51a6\x000b\x005a\x000b\u048d\x000a"+
+            "\u0773\x0000\u2ba4\x000a\u215c\x0000\u012e\x000b\x00d2\x000b"+
+            "\x0007\x000a\x000c\x0000\x0005\x000a\x0005\x0000\x0001\x000a"+
+            "\x0001\x0000\x000a\x000a\x0001\x0000\x000d\x000a\x0001\x0000"+
+            "\x0005\x000a\x0001\x0000\x0001\x000a\x0001\x0000\x0002\x000a"+
+            "\x0001\x0000\x0002\x000a\x0001\x0000\x006c\x000a\x0021\x0000"+
+            "\u016b\x000a\x0012\x0000\x0040\x000a\x0002\x0000\x0036\x000a"+
+            "\x0028\x0000\x000c\x000a\x0074\x0000\x0003\x000a\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0087\x000a\x0013\x0000\x000a\x0002"+
+            "\x0007\x0000\x001a\x000a\x0006\x0000\x001a\x000a\x000a\x0000"+
+            "\x0001\x000b\x003a\x000b\x001f\x000a\x0003\x0000\x0006\x000a"+
+            "\x0002\x0000\x0006\x000a\x0002\x0000\x0006\x000a\x0002\x0000"+
+            "\x0003\x000a\x0023\x0000";
+
 		/// <summary> Translates characters to character classes</summary>
 		private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
 		
 		/// <summary> Translates DFA states to action switch labels.</summary>
 		private static readonly int[] ZZ_ACTION = ZzUnpackAction();
 		
-		private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0004\x0002\x0001\x0003\x0001\x0001\x0006\x0000\x0002\x0002\x0006\x0000" + "\x0001\x0004\x0004\x0005\x0002\x0006\x0002\x0000\x0001\x0007\x0001\x0000\x0001\x0007\x0003\x0005" + "\x0006\x0007\x0003\x0005\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x0000\x0001\x0008\x0001\x0009" + "\x0001\x0000\x0002\x0009\x0002\x0008\x0002\x0005\x0001\x000A";
+		private const System.String ZZ_ACTION_PACKED_0 =
+            "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001"+
+            "\x000b\x0000\x0001\x0002\x0003\x0004\x0002\x0000\x0001\x0005"+
+            "\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006"+
+            "\x0001\x0004\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008"+
+            "\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000a\x0001\x0004";
 		
 		private static int[] ZzUnpackAction()
 		{
-			int[] result = new int[61];
+			int[] result = new int[51];
 			int offset = 0;
 			offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
 			return result;
@@ -80,11 +210,22 @@
 		/// <summary> Translates a state to a row index in the transition table</summary>
 		private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
 		
-		private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000F\x0000\x001E\x0000\x002D\x0000\x003C\x0000\x004B\x0000\x000F\x0000\x005A" + "\x0000\x0069\x0000\x0078\x0000\x0087\x0000\x0096\x0000\x00A5\x0000\x00B4\x0000\x00C3\x0000\x00D2" + "\x0000\x00E1\x0000\x00F0\x0000\x00FF\x0000\u010e\x0000\u011d\x0000\u012c\x0000\u013b\x0000\u014a" + "\x0000\u0159\x0000\u0168\x0000\u0177\x0000\x0087\x0000\u0186\x0000\u0195\x0000\u01a4\x0000\u01b3" + "\x0000\u01c2\x0000\u01d1\x0000\u01e0\x0000\u01ef\x0000\u01fe\x0000\u020d\x0000\u021c\x0000\u022b" + "\x0000\u023a\x0000\u0249\x0000\u0258\x0000\u0267\x0000\u0276\x0000\u0285\x0000\u0294\x0000\u02a3" + "\x0000\u02b2\x0000\u02c1\x0000\u02d0\x0000\u02df\x0000\u02ee\x0000\u02fd\x0000\u012c\x0000\x00E1" + "\x0000\x0078\x0000\u011d\x0000\u030c\x0000\u031b\x0000\u032a";
-		
+		private const System.String ZZ_ROWMAP_PACKED_0 =
+            "\x0000\x0000\x0000\x000e\x0000\x001c\x0000\x002a\x0000\x0038"+
+            "\x0000\x000e\x0000\x0046\x0000\x0054\x0000\x0062\x0000\x0070"+
+            "\x0000\x007e\x0000\x008c\x0000\x009a\x0000\x00a8\x0000\x00b6"+
+            "\x0000\x00c4\x0000\x00d2\x0000\x00e0\x0000\x00ee\x0000\x00fc"+
+            "\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134\x0000\u0142"+
+            "\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188"+
+            "\x0000\u0196\x0000\u01a4\x0000\u01b2\x0000\u01c0\x0000\u01ce"+
+            "\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00d2\x0000\u0206"+
+            "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c"+
+            "\x0000\u025a\x0000\x0054\x0000\x008c\x0000\u0268\x0000\u0276"+
+            "\x0000\u0284";
+
 		private static int[] ZzUnpackRowMap()
 		{
-			int[] result = new int[61];
+			int[] result = new int[51];
 			int offset = 0;
 			offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
 			return result;
@@ -106,13 +247,75 @@
 		/// <summary> The transition table of the DFA</summary>
 		private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
 		
-		private const System.String ZZ_TRANS_PACKED_0 = "\x0008\x0002\x0001\x0003\x0001\x0004\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0008\x0001\x0002" + "\x0010\x0000\x0001\x0009\x0001\x000A\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0011\x0001\x0000\x0001\x0012\x0002\x0013" + "\x0001\x0014\x0003\x0004\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0015\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x0005\x0000\x0001\x0016" + "\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0006\x0011\x0000\x0001\x0002\x0008\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x000C\x0000\x0001\x0018\x0001\x0019\x0001\x001A\x0001\x001B" + "\x000B\x0000\x0001\x001C\x0001\x0000\x0001\x001C\x000C\x0000\x0001\x001D\x0001\x001E\x0001\x001D" + "\x0001\x001E\x000B\x0000\x0001\x001F\x0002\x0020\x0001\x0021\x000B\x0000\x0001\x000E\x0002\x0022" + "\x0005\x0000\x0001\x0009\x000
 1\x0016\x0001\x000B\x0001\x000C\x0002\x000D\x0001\x000E\x0001\x000F" + "\x0001\x0004\x0001\x0010\x0001\x0006\x0004\x0000\x0001\x0009\x0001\x0011\x0001\x000B\x0001\x000C" + "\x0002\x0013\x0001\x0014\x0001\x0010\x0001\x0004\x0001\x0010\x0001\x0006\x000B\x0000\x0001\x0023" + "\x0002\x0024\x0001\x0025\x000B\x0000\x0004\x001E\x000B\x0000\x0001\x0026\x0002\x0027\x0001\x0028" + "\x000B\x0000\x0001\x0029\x0002\x002A\x0001\x002B\x000B\x0000\x0001\x002C\x0001\x0024\x0001\x002D" + "\x0001\x0025\x000B\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0004\x0000\x0001\x0009\x0006\x0000" + "\x0001\x0017\x0001\x0000\x0001\x0017\x0006\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0001\x002E\x0002\x0019\x0001\x001B\x0005\x0000\x0001\x0031\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0034\x0001\x0000\x0001\x0012" + "\x0002\x0032\x0001\x0033\x0003\x0019\x0001\x001B\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012" + 
-			"\x0002\x0030\x0001\x0000\x0004\x001B\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000" + "\x0001\x001D\x0001\x001E\x0001\x001D\x0001\x001E\x0005\x0000\x0001\x0036\x0002\x0000\x0001\x0036" + "\x0002\x0000\x0004\x001E\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x001F\x0002\x0020\x0001\x0021\x0005\x0000\x0001\x0032\x0001\x0000\x0001\x0012\x0002\x0032" + "\x0001\x0033\x0003\x0020\x0001\x0021\x0005\x0000\x0001\x0030\x0001\x0000\x0001\x0012\x0002\x0030" + "\x0001\x0000\x0004\x0021\x0005\x0000\x0001\x0033\x0002\x0000\x0003\x0033\x0003\x0022\x0006\x0000" + "\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023\x0002\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0038\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0024\x0001\x0025" + "\x0005\x0000\x0001\x0037\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0026\x00
 02\x0027\x0001\x0028" + "\x0005\x0000\x0001\x0013\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014\x0003\x0027\x0001\x0028" + "\x0005\x0000\x0001\x000D\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0028\x0005\x0000" + "\x0001\x000E\x0002\x0000\x0003\x000E\x0001\x0029\x0002\x002A\x0001\x002B\x0005\x0000\x0001\x0014" + "\x0002\x0000\x0003\x0014\x0003\x002A\x0001\x002B\x0005\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0004\x002B\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0001\x0023" + "\x0002\x0024\x0001\x0025\x0005\x0000\x0001\x003A\x0001\x0000\x0001\x0012\x0002\x0013\x0001\x0014" + "\x0003\x0024\x0001\x0025\x0005\x0000\x0001\x0035\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000" + "\x0001\x002E\x0002\x0019\x0001\x001B\x000B\x0000\x0001\x003B\x0001\x001B\x0001\x003B\x0001\x001B" + "\x000B\x0000\x0004\x0021\x000B\x0000\x0004\x0025\x000B\x0000\x0004\x0028\x000B\x0000\x0004\x002B" + "\x000B\x0000\x0001\x003C\x0001\x0025\x0001\x003C\x0001\
 x0025\x000B\x0000\x0004\x001B\x000B\x0000" + 
-			"\x0004\x003D\x0005\x0000\x0001\x002F\x0001\x0000\x0001\x0012\x0002\x0030\x0001\x0000\x0004\x001B" + "\x0005\x0000\x0001\x0039\x0001\x0000\x0001\x0012\x0002\x000D\x0001\x000E\x0004\x0025\x0005\x0000" + "\x0001\x0036\x0002\x0000\x0001\x0036\x0002\x0000\x0004\x003D\x0003\x0000";
-		
+		private const System.String ZZ_TRANS_PACKED_0 =
+            "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005"+
+            "\x0001\x0006\x0001\x0007\x0001\x0002\x000f\x0000\x0002\x0003"+
+            "\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000a"+
+            "\x0001\x000b\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004"+
+            "\x0001\x0000\x0001\x000c\x0001\x0000\x0001\x0009\x0002\x000d"+
+            "\x0001\x000e\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004"+
+            "\x0001\x000f\x0001\x0010\x0001\x0011\x0001\x0012\x0002\x000a"+
+            "\x0001\x000b\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000"+
+            "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000"+
+            "\x0002\x0017\x0007\x0000\x0001\x0017\x0004\x0000\x0001\x0018"+
+            "\x0001\x0019\x0007\x0000\x0001\x001a\x0005\x0000\x0001\x001b"+
+            "\x0007\x0000\x0001\x000b\x0004\x0000\x0001\x001c\x0001\x001d"+
+            "\x0007\x0000\x0001\x001e\x0004\x0000\x0001\x001f\x0001\x0020"+
+            "\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023"+
+            "\x0007\x0000\x0001\x0024\x000d\x0000\x0001\x0025\x0004\x0000"+
+            "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000d\x0000"+
+            "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028"+
+            "\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000f\x0001\x0008"+
+            "\x0001\x0011\x0001\x0012\x0002\x000a\x0001\x000b\x0001\x0013"+
+            "\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000"+
+            "\x0001\x0009\x0002\x002a\x0001\x0000\x0001\x0014\x0004\x0000"+
+            "\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002b\x0001\x0000"+
+            "\x0001\x0009\x0002\x002c\x0001\x002d\x0001\x0015\x0004\x0000"+
+            "\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000"+
+            "\x0001\x0009\x0002\x002a\x0001\x0000\x0001\x0016\x0004\x0000"+
+            "\x0002\x0017\x0001\x0000\x0001\x002e\x0002\x0000\x0001\x002e"+
+            "\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000"+
+            "\x0001\x002a\x0001\x0000\x0001\x0009\x0002\x002a\x0001\x0000"+
+            "\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019\x0001\x0000"+
+            "\x0001\x002c\x0001\x0000\x0001\x0009\x0002\x002c\x0001\x002d"+
+            "\x0001\x0019\x0004\x0000\x0001\x0018\x0001\x0019\x0001\x0000"+
+            "\x0001\x002a\x0001\x0000\x0001\x0009\x0002\x002a\x0001\x0000"+
+            "\x0001\x001a\x0005\x0000\x0001\x001b\x0001\x0000\x0001\x002d"+
+            "\x0002\x0000\x0003\x002d\x0001\x001b\x0004\x0000\x0002\x001c"+
+            "\x0001\x0000\x0001\x002f\x0001\x0000\x0001\x0009\x0002\x000a"+
+            "\x0001\x000b\x0001\x001c\x0004\x0000\x0001\x001c\x0001\x001d"+
+            "\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009\x0002\x000d"+
+            "\x0001\x000e\x0001\x001d\x0004\x0000\x0001\x001c\x0001\x001d"+
+            "\x0001\x0000\x0001\x002f\x0001\x0000\x0001\x0009\x0002\x000a"+
+            "\x0001\x000b\x0001\x001e\x0004\x0000\x0002\x001f\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0001\x0009\x0002\x000a\x0001\x000b"+
+            "\x0001\x001f\x0004\x0000\x0001\x001f\x0001\x0020\x0001\x0000"+
+            "\x0001\x000d\x0001\x0000\x0001\x0009\x0002\x000d\x0001\x000e"+
+            "\x0001\x0020\x0004\x0000\x0001\x001f\x0001\x0020\x0001\x0000"+
+            "\x0001\x000a\x0001\x0000\x0001\x0009\x0002\x000a\x0001\x000b"+
+            "\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000b"+
+            "\x0002\x0000\x0003\x000b\x0001\x0022\x0004\x0000\x0001\x0022"+
+            "\x0001\x0023\x0001\x0000\x0001\x000e\x0002\x0000\x0003\x000e"+
+            "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000"+
+            "\x0001\x000b\x0002\x0000\x0003\x000b\x0001\x0024\x0006\x0000"+
+            "\x0001\x000f\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014"+
+            "\x0001\x0015\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009"+
+            "\x0002\x002a\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017"+
+            "\x0001\x0000\x0001\x002e\x0002\x0000\x0001\x002e\x0002\x0000"+
+            "\x0001\x0028\x0004\x0000\x0002\x0014\x0007\x0000\x0001\x0014"+
+            "\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000"+
+            "\x0002\x001c\x0007\x0000\x0001\x001c\x0004\x0000\x0002\x001f"+
+            "\x0007\x0000\x0001\x001f\x0004\x0000\x0002\x0022\x0007\x0000"+
+            "\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032"+
+            "\x0004\x0000\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000"+
+            "\x0002\x0032\x0001\x0000\x0001\x002e\x0002\x0000\x0001\x002e"+
+            "\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000"+
+            "\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002a\x0001\x0000"+
+            "\x0001\x0014\x0003\x0000";
+
 		private static int[] ZzUnpackTrans()
 		{
-			int[] result = new int[825];
+			int[] result = new int[658];
 			int offset = 0;
 			offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
 			return result;
@@ -135,7 +338,6 @@
 			return j;
 		}
 		
-		
 		/* error codes */
 		private const int ZZ_UNKNOWN_ERROR = 0;
 		private const int ZZ_NO_MATCH = 1;
@@ -147,11 +349,14 @@
 		/// <summary> ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code></summary>
 		private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
 		
-		private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0004\x0001\x0001\x0009\x0001\x0001\x0006\x0000\x0002\x0001\x0006\x0000" + "\x0007\x0001\x0002\x0000\x0001\x0001\x0001\x0000\x000E\x0001\x0001\x0000\x0001\x0001\x0002\x0000" + "\x0002\x0001\x0001\x0000\x0007\x0001";
+		private const System.String ZZ_ATTRIBUTE_PACKED_0 =
+            "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001"+
+            "\x000b\x0000\x0004\x0001\x0002\x0000\x0001\x0001\x0001\x0000"+
+            "\x000f\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001";
 		
 		private static int[] ZzUnpackAttribute()
 		{
-			int[] result = new int[61];
+			int[] result = new int[51];
 			int offset = 0;
 			offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
 			return result;
@@ -284,7 +489,7 @@
 			char[] map = new char[0x10000];
 			int i = 0; /* index in packed string  */
 			int j = 0; /* index in unpacked array */
-			while (i < 156)
+			while (i < 1154)
 			{
 				int count = packed[i++];
 				char value_Renamed = packed[i++];
@@ -572,7 +777,7 @@
 				switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction])
 				{
 					
-					case 5: 
+					case 4: 
 					{
 						return HOST;
 					}
@@ -581,14 +786,14 @@
 					
 					case 9: 
 					{
-						return ACRONYM_DEP;
+						return ACRONYM;
 					}
 					
 					case 12:  break;
 					
 					case 8: 
 					{
-						return ACRONYM;
+                        return ACRONYM_DEP;
 					}
 					
 					case 13:  break;
@@ -601,7 +806,7 @@
 					
 					case 14:  break;
 					
-					case 7: 
+					case 5: 
 					{
 						return NUM;
 					}
@@ -622,14 +827,14 @@
 					
 					case 17:  break;
 					
-					case 6: 
+					case 7: 
 					{
 						return COMPANY;
 					}
 					
 					case 18:  break;
 					
-					case 4: 
+					case 6: 
 					{
 						return APOSTROPHE;
 					}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerImpl.jflex Wed Jul 29 18:04:12 2009
@@ -17,6 +17,17 @@
  * limitations under the License.
  */
 
+/*
+
+NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate
+      the tokenizer, remember to use JRE 1.4 to run jflex (before
+      Lucene 3.0).  This grammar now uses constructs (eg :digit:,
+      :letter:) whose meaning can vary according to the JRE used to
+      run jflex.  See
+      https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+
+*/
+
 import org.apache.lucene.analysis.Token;
 
 %%
@@ -60,8 +71,10 @@
 }
 %}
 
-// basic word: a sequence of digits & letters
-ALPHANUM   = ({LETTER}|{DIGIT}|{KOREAN})+
+THAI       = [\u0E00-\u0E59]
+
+// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
+ALPHANUM   = ({LETTER}|{THAI}|[:digit:])+
 
 // internal apostrophes: O'Reilly, you're, O'Reilly's
 // use a post-filter to remove possesives
@@ -95,22 +108,15 @@
 P	         = ("_"|"-"|"/"|"."|",")
 
 // at least one digit
-HAS_DIGIT  =
-    ({LETTER}|{DIGIT})*
-    {DIGIT}
-    ({LETTER}|{DIGIT})*
+HAS_DIGIT  = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
 
 ALPHA      = ({LETTER})+
 
+// From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
+LETTER     = !(![:letter:]|{CJ})
 
-LETTER     = [\u0041-\u005a\u0061-\u007a\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff\u0100-\u1fff\uffa0-\uffdc]
-
-DIGIT      = [\u0030-\u0039\u0660-\u0669\u06f0-\u06f9\u0966-\u096f\u09e6-\u09ef\u0a66-\u0a6f\u0ae6-\u0aef\u0b66-\u0b6f\u0be7-\u0bef\u0c66-\u0c6f\u0ce6-\u0cef\u0d66-\u0d6f\u0e50-\u0e59\u0ed0-\u0ed9\u1040-\u1049]
-
-KOREAN     = [\uac00-\ud7af\u1100-\u11ff]
-
-// Chinese, Japanese
-CJ         = [\u3040-\u318f\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
+// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
+CJ         = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
 
 WHITESPACE = \r\n | [ \r\n\t\f]
 

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/StopFilter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs Wed Jul 29 18:04:12 2009
@@ -117,22 +117,23 @@
 			return stopSet;
 		}
 		
-		/// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
-		public override Token Next(Token result)
+		/// <summary> Returns the next input Token whose term() is not a stop word.</summary>
+        public override Token Next(/* in */ Token reusableToken)
 		{
+            System.Diagnostics.Debug.Assert(reusableToken != null);
 			// return the first non-stop word found
 			int skippedPositions = 0;
-			while ((result = input.Next(result)) != null)
-			{
-				if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
+            for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
+            {
+				if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength()))
 				{
 					if (enablePositionIncrements)
 					{
-						result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
+						nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions);
 					}
-					return result;
+					return nextToken;
 				}
-				skippedPositions += result.GetPositionIncrement();
+				skippedPositions += nextToken.GetPositionIncrement();
 			}
 			// reached EOS -- return null
 			return null;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TeeTokenFilter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TeeTokenFilter.cs Wed Jul 29 18:04:12 2009
@@ -62,11 +62,12 @@
 			this.sink = sink;
 		}
 		
-		public override Token Next(Token result)
+		public override Token Next(/* in */ Token reusableToken)
 		{
-			Token t = input.Next(result);
-			sink.Add(t);
-			return t;
+            System.Diagnostics.Debug.Assert(reusableToken != null);
+			Token nextToken = input.Next(reusableToken);
+			sink.Add(nextToken);
+			return nextToken;
 		}
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Token.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs Wed Jul 29 18:04:12 2009
@@ -18,6 +18,7 @@
 using System;
 
 using Payload = Lucene.Net.Index.Payload;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
 
 namespace Lucene.Net.Analysis
 {
@@ -31,7 +32,7 @@
 	/// browser, or to show matching text fragments in a KWIC (KeyWord In Context)
 	/// display, etc.
 	/// <p>
-	/// The type is an interned string, assigned by a lexical analyzer
+	/// The type is a string, assigned by a lexical analyzer
 	/// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
 	/// belongs to.  For example an end of sentence marker token might be implemented
 	/// with type "eos".  The default token type is "word".  
@@ -49,7 +50,7 @@
 	/// <p><b>NOTE:</b> As of 2.3, Token stores the term text
 	/// internally as a malleable char[] termBuffer instead of
 	/// String termText.  The indexing code and core tokenizers
-	/// have been changed re-use a single Token instance, changing
+	/// have been changed to re-use a single Token instance, changing
 	/// its buffer and other fields in-place as the Token is
 	/// processed.  This provides substantially better indexing
 	/// performance as it saves the GC cost of new'ing a Token and
@@ -57,19 +58,59 @@
 	/// termText are still available but a warning about the
 	/// associated performance cost has been added (below).  The
 	/// {@link #TermText()} method has been deprecated.</p>
-	/// </summary>
-	/// <summary><p>Tokenizers and filters should try to re-use a Token
+    /// <p>Tokenizers and filters should try to re-use a Token
 	/// instance when possible for best performance, by
 	/// implementing the {@link TokenStream#Next(Token)} API.
 	/// Failing that, to create a new Token you should first use
-	/// one of the constructors that starts with null text.  Then
-	/// you should call either {@link #TermBuffer()} or {@link
-	/// #ResizeTermBuffer(int)} to retrieve the Token's
-	/// termBuffer.  Fill in the characters of your term into this
-	/// buffer, and finally call {@link #SetTermLength(int)} to
-	/// set the length of the term text.  See <a target="_top"
-	/// href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
-	/// for details.</p>
+    /// one of the constructors that starts with null text.  To load
+    /// the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
+    /// To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}.
+    ///  Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()},
+    ///  if you know that your text is shorter than the capacity of the termBuffer
+    /// or {@link #resizeTermBuffer(int)}, if there is any possibility
+    /// that you may need to grow the buffer. Fill in the characters of your term into this
+    /// buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
+    /// or with {@link System#arraycopy(object, int, object, int, int)}, and finally call {@link #setTermLength(int)} to
+    /// set the length of the term text.  See <a target="_top"
+    /// href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
+    /// for details.</p>
+    /// <p>Typical reuse patterns:
+    /// <ul>
+    /// <li> Copying text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+    ///  <pre>
+    ///    return reusableToken.reinit(string, startOffset, endOffset[, type]);
+    /// </pre>
+    /// </li>
+    /// <li> Copying some text from a string (type is reset to #DEFAULT_TYPE if not specified):<br/>
+    ///  <pre>
+    /// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+    /// </pre>
+    /// </li>
+    /// </li>
+    /// <li> Copying text from char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+    ///  <pre>
+    /// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+    /// </pre>
+    /// </li>
+    /// <li> Copying some text from a char[] buffer (type is reset to #DEFAULT_TYPE if not specified):<br/>
+    /// <pre>
+    /// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+    /// </pre>
+    /// </li>
+    /// <li> Copying from one one Token to another (type is reset to #DEFAULT_TYPE if not specified):<br/>
+    /// <pre>
+    /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+    /// </pre>
+    /// </li>
+    ///  </ul>
+    /// A few things to note:
+    /// <ul>
+    ///  <li>clear() initializes most of the fields to default values, but not startOffset, endOffset and type.</li>
+    /// <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+    /// <li>The startOffset and endOffset represent the start and offset in the source text. So be careful in adjusting them.</li>
+    /// <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+    /// </ul>
+    /// </p>
 	/// </summary>
 	/// <seealso cref="Lucene.Net.Index.Payload">
 	/// </seealso>
@@ -77,97 +118,192 @@
     {
 		
 		public const System.String DEFAULT_TYPE = "word";
+
 		private static int MIN_BUFFER_SIZE = 10;
 		
-		/// <deprecated>: we will remove this when we remove the
-		/// deprecated APIs 
+		/// <deprecated>
+        /// We will remove this when we remove the deprecated APIs. 
 		/// </deprecated>
 		private System.String termText;
-		
-		internal char[] termBuffer; // characters for the term text
-		internal int termLength; // length of term text in buffer
-		
-		internal int startOffset; // start in source text
-		internal int endOffset; // end in source text
-		internal System.String type = DEFAULT_TYPE; // lexical type
-		
-		internal Payload payload;
-		
+
+        /// <summary>
+        /// Characters for the term text.
+        /// </summary>
+        /// <deprecated>
+        /// This will be made private.  Instead, use:
+        /// {@link #setTermBuffer(char[], int, int)},
+        /// {@link #setTermBuffer(String)}, or
+        /// {@link #setTermBuffer(String, int, int)},
+        /// </deprecated>
+        internal char[] termBuffer;
+
+        /// <summary>
+        /// Length of term text in the buffer.
+        /// </summary>
+        /// <deprecated>
+        /// This will be made private.  Instead, use:
+        /// {@link termLength()} or {@link setTermLength(int)}
+        /// </deprecated>
+        internal int termLength;
+
+        /// <summary>
+        /// Start in source text.
+        /// </summary>
+        /// <deprecated>
+        /// This will be made private.  Instead, use:
+        /// {@link startOffset()} or {@link setStartOffset(int)}
+        /// </deprecated>
+        internal int startOffset;
+
+        /// <summary>
+        /// End in source text.
+        /// </summary>
+        /// <deprecated>
+        /// This will be made private.  Instead, use:
+        /// {@link endOffset()} or {@link setEndOffset(int)}
+        /// </deprecated>
+        internal int endOffset;
+
+        /// <summary>
+        /// The lexical type of the token.
+        /// </summary>
+        /// <deprecated>
+        /// This will be made private.  Instead, use:
+        /// {@link type()} or {@link setType(String)}
+        /// </deprecated>
+        internal System.String type = DEFAULT_TYPE;
+
+        private int flags;
+        
+        /// <deprecated>
+        /// This will be made private. Instead, use:
+        /// {@link getPayload()} or {@link setPayload(Payload)}.
+        /// </deprecated>
+        internal Payload payload;
+		
+        /// <deprecated>
+        /// This will be made private. Instead, use:
+        /// {@link getPositionIncrement()} or {@link setPositionIncrement(String)}.
+        /// </deprecated>
 		internal int positionIncrement = 1;
-		
+
 		/// <summary>Constructs a Token will null text. </summary>
 		public Token()
 		{
 		}
 		
-		/// <summary>Constructs a Token with null text and start & end
-		/// offsets.
+		/// <summary>
+        /// Constructs a Token with null text and start & endoffsets.
 		/// </summary>
-		/// <param name="start">start offset
-		/// </param>
-		/// <param name="end">end offset 
-		/// </param>
+		/// <param name="start">start offset in the source text</param>
+        /// <param name="end">end offset in the source text</param>
 		public Token(int start, int end)
 		{
 			startOffset = start;
 			endOffset = end;
 		}
-		
-		/// <summary>Constructs a Token with null text and start & end
-		/// offsets plus the Token type.
-		/// </summary>
-		/// <param name="start">start offset
-		/// </param>
-		/// <param name="end">end offset 
-		/// </param>
-		public Token(int start, int end, System.String typ)
+
+        /// <summary>
+        /// Constructs a Token with null text and start & endoffsets plus the Token type.
+        /// </summary>
+        /// <param name="start">start offset in the source text</param>
+        /// <param name="end">end offset in the source text</param>
+        /// <param name="typ">the lexical type of this Token</param>
+        public Token(int start, int end, System.String typ)
 		{
 			startOffset = start;
 			endOffset = end;
 			type = typ;
 		}
-		
-		/// <summary>Constructs a Token with the given term text, and start
-		/// & end offsets.  The type defaults to "word."
+
+        /// <summary>
+        /// Constructs a Token with null text and start & endoffsets plus flags.
+        /// NOTE: flags is EXPERIMENTAL.
+        /// </summary>
+        /// <param name="start">start offset in the source text</param>
+        /// <param name="end">end offset in the source text</param>
+        /// <param name="flags">the bits to set for this Token</param>
+        public Token(int start, int end, int flags)
+        {
+            startOffset = start;
+            endOffset = end;
+            this.flags = flags;
+        }
+
+        /// <summary>
+        /// Constructs a Token with the given term text, and start
+		/// and end offsets.  The type defaults to "word."
 		/// <b>NOTE:</b> for better indexing speed you should
 		/// instead use the char[] termBuffer methods to set the
 		/// term text.
 		/// </summary>
-		/// <param name="text">term text
-		/// </param>
-		/// <param name="start">start offset
-		/// </param>
-		/// <param name="end">end offset 
-		/// </param>
+        /// <param name="text">term text</param>
+		/// <param name="start">start offset</param>
+		/// <param name="end">end offset</param>
+        /// <deprecated></deprecated>
 		public Token(System.String text, int start, int end)
 		{
 			termText = text;
 			startOffset = start;
 			endOffset = end;
 		}
-		
-		/// <summary>Constructs a Token with the given text, start and end
-		/// offsets, & type.  <b>NOTE:</b> for better indexing
-		/// speed you should instead use the char[] termBuffer
-		/// methods to set the term text.
-		/// </summary>
-		/// <param name="text">term text
-		/// </param>
-		/// <param name="start">start offset
-		/// </param>
-		/// <param name="end">end offset
-		/// </param>
-		/// <param name="typ">token type 
-		/// </param>
-		public Token(System.String text, int start, int end, System.String typ)
+
+        /// <summary>
+        /// Constructs a Token with the given term text, start
+        /// and end offsets, and type.
+        /// <b>NOTE:</b> for better indexing speed you should
+        /// instead use the char[] termBuffer methods to set the
+        /// term text.
+        /// </summary>
+        /// <param name="text">term text</param>
+        /// <param name="start">start offset</param>
+        /// <param name="end">end offset</param>
+        /// <param name="typ">token type</param>
+        /// <deprecated></deprecated>
+        public Token(System.String text, int start, int end, System.String typ)
 		{
 			termText = text;
 			startOffset = start;
 			endOffset = end;
 			type = typ;
 		}
-		
-		/// <summary>Set the position increment.  This determines the position of this token
+
+        /// <summary>
+        /// Constructs a Token with the given term text, start
+        /// and end offsets, and flags.
+        /// <b>NOTE:</b> for better indexing speed you should
+        /// instead use the char[] termBuffer methods to set the
+        /// term text.
+        /// </summary>
+        /// <param name="text">term text</param>
+        /// <param name="start">start offset</param>
+        /// <param name="end">end offset</param>
+        /// <param name="flags">the bits to set for this Token</param>
+        /// <deprecated></deprecated>
+        public Token(System.String text, int start, int end, int flags)
+        {
+            termText = text;
+            startOffset = start;
+            endOffset = end;
+            this.flags = flags;
+        }
+
+        /// <summary>
+        /// Constructs a Token with the given term buffer (offset and length), start and end offsets.
+        /// </summary>
+        /// <param name="startTermBuffer"></param>
+        /// <param name="termBufferOffset"></param>
+        /// <param name="termBufferLength"></param>
+        /// <param name="start"></param>
+        /// <param name="end"></param>
+        public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
+        {
+            SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+            startOffset = start;
+            endOffset = end;
+        }
+
+        /// <summary>Set the position increment.  This determines the position of this token
 		/// relative to the previous Token in a {@link TokenStream}, used in phrase
 		/// searching.
 		/// 
@@ -191,6 +327,7 @@
 		/// 
 		/// </ul>
 		/// </summary>
+        /// <param name="positionIncrement">the distance from the prior term</param>
 		/// <seealso cref="Lucene.Net.Index.TermPositions">
 		/// </seealso>
 		public virtual void  SetPositionIncrement(int positionIncrement)
@@ -212,41 +349,102 @@
 		/// indexing speed you should instead use the char[]
 		/// termBuffer methods to set the term text. 
 		/// </summary>
+        /// <deprecated>
+        /// use {@link #setTermBuffer(char[], int, int)}, 
+        ///     {@link #setTermBuffer(string)}, or
+        ///     {@link #setTermBuffer(string, int, int)}.
+        /// </deprecated>
 		public virtual void  SetTermText(System.String text)
 		{
 			termText = text;
 			termBuffer = null;
 		}
-		
-		/// <summary>Returns the Token's term text.
-		/// 
-		/// </summary>
-		/// <deprecated> Use {@link #TermBuffer()} and {@link
-		/// #TermLength()} instead. 
-		/// </deprecated>
-		public System.String TermText()
+
+        /// <summary>
+        /// Returns the Token's term text.
+        /// This method has a performance penalty because the text is stored
+        /// internally in a char[].  If possible, use {@link #termBuffer()}
+        /// and {@link #termLength()} directly instead.  If you really need
+        /// a string, use {@link #Term()}.
+        /// </summary>
+        /// <returns></returns>
+        public System.String TermText()
 		{
 			if (termText == null && termBuffer != null)
 				termText = new System.String(termBuffer, 0, termLength);
 			return termText;
 		}
-		
-		/// <summary>Copies the contents of buffer, starting at offset for
-		/// length characters, into the termBuffer
-		/// array. <b>NOTE:</b> for better indexing speed you
-		/// should instead retrieve the termBuffer, using {@link
-		/// #TermBuffer()} or {@link #ResizeTermBuffer(int)}, and
-		/// fill it in directly to set the term text.  This saves
-		/// an extra copy. 
-		/// </summary>
+
+        /// <summary>
+        /// Returns the Token's term text.
+        /// This method has a performance penalty because the text is stored
+        /// internally in a char[].  If possible, use {@link #termBuffer()}
+        /// and {@link #termLength()} directly instead.  If you really need
+        /// a string, use this method which is nothing more than a
+        /// convenience cal to <b>new String(token.TermBuffer(), o, token.TermLength())</b>.
+        /// </summary>
+        /// <returns></returns>
+        public string Term()
+        {
+            if (termText != null)
+                return termText;
+            InitTermBuffer();
+            return new String(termBuffer, 0, termLength);
+        }
+
+		/// <summary>
+        /// Copies the contents of buffer, starting at offset for
+		/// length characters, into the termBuffer array.
+		/// </summary>
+        /// <param name="buffer"/>
+        /// <param name="offset"/>
+        /// <param name="length"/>
 		public void  SetTermBuffer(char[] buffer, int offset, int length)
 		{
-			ResizeTermBuffer(length);
-			Array.Copy(buffer, offset, termBuffer, 0, length);
+            termText = null;
+            char[] newCharBuffer = GrowTermBuffer(length);
+            if (newCharBuffer != null)
+                termBuffer = newCharBuffer;
+            Array.Copy(buffer, offset, termBuffer, 0, length);
 			termLength = length;
 		}
-		
-		/// <summary>Returns the internal termBuffer character array which
+
+        /// <summary>
+        /// Copies the contents of buffer, starting at offset for
+        /// length characters, into the termBuffer array.
+        /// </summary>
+        /// <param name="buffer"/>
+        public void SetTermBuffer(string buffer)
+        {
+            termText = null;
+            int length = buffer.Length;
+            char[] newCharBuffer = GrowTermBuffer(length);
+            if (newCharBuffer != null)
+                termBuffer = newCharBuffer;
+            buffer.CopyTo(0, termBuffer, 0, length);
+            termLength = length;
+        }
+
+        /// <summary>
+        /// Copies the contents of buffer, starting at offset for
+        /// length characters, into the termBuffer array.
+        /// </summary>
+        /// <param name="buffer"/>
+        /// <param name="offset"/>
+        /// <param name="length"/>
+        public void SetTermBuffer(string buffer, int offset, int length)
+        {
+            System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+            System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+            termText = null;
+            char[] newCharBuffer = GrowTermBuffer(length);
+            if (newCharBuffer != null)
+                termBuffer = newCharBuffer;
+            buffer.CopyTo(offset, termBuffer, 0, length);
+            termLength = length;
+        }
+
+        /// <summary>Returns the internal termBuffer character array which
 		/// you can then directly alter.  If the array is too
 		/// small for your token, use {@link
 		/// #ResizeTermBuffer(int)} to increase it.  After
@@ -260,11 +458,17 @@
 			return termBuffer;
 		}
 		
-		/// <summary>Grows the termBuffer to at least size newSize.</summary>
-		/// <param name="newSize">minimum size of the new termBuffer
-		/// </param>
-		/// <returns> newly created termBuffer with length >= newSize
-		/// </returns>
+		/// <summary>
+        /// Grows the termBuffer to at least size newSize, preserving the
+        /// existing content.  Note: If the next operation is to change
+        /// the contents of the term buffer use
+        /// {@link #setTermBuffer(char[], int, int)},
+        /// {@link #setTermBuffer(String)}, or
+        /// {@link #setTermBuffer(String, int, int)},
+        /// to optimally combine the resize with the setting of the termBuffer.
+        /// </summary>
+		/// <param name="newSize">minimum size of the new termBuffer</param>
+		/// <returns> newly created termBuffer with length >= newSize</returns>
 		public virtual char[] ResizeTermBuffer(int newSize)
 		{
 			InitTermBuffer();
@@ -279,7 +483,43 @@
 			}
 			return termBuffer;
 		}
-		
+
+        /// <summary>
+        /// Allocates a buffer char[] of at least newSize.
+        /// </summary>
+        /// <param name="newSize">minimum size of the buffer</param>
+        /// <returns>newly created buffer with length >= newSize or null if the current termBuffer is big enough</returns>
+        private char[] GrowTermBuffer(int newSize)
+        {
+            if (termBuffer != null)
+            {
+                if (termBuffer.Length >= newSize)
+                    // Already big enough 
+                    return null;
+                else
+                    // Not big enough; create a new array with slight
+                    // over-allocation
+                    return new char[ArrayUtil.GetNextSize(newSize)];
+            }
+            else
+            {
+                // determine the best size
+                // The buffer is always at least MIN_BUFFER_SIZE
+                if (newSize < MIN_BUFFER_SIZE)
+                    newSize = MIN_BUFFER_SIZE;
+
+                // If there is already a termText, then the size has to be at least that big
+                if (termText != null)
+                {
+                    int ttLengh = termText.Length;
+                    if (newSize < ttLengh)
+                        newSize = ttLengh;
+                }
+
+                return new char[newSize];
+            }
+        }
+
 		// TODO: once we remove the deprecated termText() method
 		// and switch entirely to char[] termBuffer we don't need
 		// to use this method anymore
@@ -324,11 +564,16 @@
 		}
 		
 		/// <summary>Set number of valid characters (length of the term) in
-		/// the termBuffer array. 
+		/// the termBuffer array.  Use this to truncate the termBuffer
+        /// or to synchronize with external manipulation of the termBuffer.
+        /// Note: to grow the size of the array use {@link #resizeTermBuffer(int)} first.
 		/// </summary>
+        /// <param name="length">the truncated length</param>
 		public void  SetTermLength(int length)
 		{
 			InitTermBuffer();
+            if (length > termBuffer.Length)
+                throw new ArgumentOutOfRangeException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
 			termLength = length;
 		}
 		
@@ -352,7 +597,8 @@
 		}
 		
 		/// <summary>Returns this Token's ending offset, one greater than the position of the
-		/// last character corresponding to this token in the source text. 
+		/// last character corresponding to this token in the source text.  The length of the
+        /// token in the source text is (endOffset - startOffset).
 		/// </summary>
 		public int EndOffset()
 		{
@@ -380,7 +626,27 @@
 		{
 			this.type = type;
 		}
-		
+
+        ///
+        /// <summary>
+        /// EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+        /// Get the bitset for any bits that have been set.  This is completely distinct from {@link #type()}, although they do share similar purposes.
+        /// The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+        /// </summary>
+        /// <returns>The bits</returns>
+        public int GetFlags()
+        {
+            return flags;
+        }
+
+        ///
+        /// <seealso cref="GetFlags()"/>
+        ///
+        public void SetFlags(int flags)
+        {
+            this.flags = flags;
+        }
+
 		/// <summary> Returns this Token's payload.</summary>
 		public virtual Payload GetPayload()
 		{
@@ -422,19 +688,20 @@
 			termLength = 0;
 			termText = null;
 			positionIncrement = 1;
+            flags = 0;
 			// startOffset = endOffset = 0;
 			// type = DEFAULT_TYPE;
 		}
 		
-		public virtual System.Object Clone()
+		public virtual object Clone()
 		{
 			try
 			{
 				Token t = (Token) base.MemberwiseClone();
+                // Do a deep clone
 				if (termBuffer != null)
 				{
-					t.termBuffer = null;
-					t.SetTermBuffer(termBuffer, 0, termLength);
+					t.termBuffer = (char[]) termBuffer.Clone();
 				}
 				if (payload != null)
 				{
@@ -447,5 +714,231 @@
 				throw new System.SystemException("", e); // shouldn't happen
 			}
 		}
-	}
+
+        /** Makes a clone, but replaces the term buffer &
+         * start/end offset in the process.  This is more
+         * efficient than doing a full clone (and then calling
+         * setTermBuffer) because it saves a wasted copy of the old
+         * termBuffer. */
+        public Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+        {
+            Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
+            t.positionIncrement = positionIncrement;
+            t.flags = flags;
+            t.type = type;
+            if (payload != null)
+                t.payload = (Payload)payload.Clone();
+            return t;
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (obj == this)
+                return true;
+
+            if (obj is Token)
+            {
+                Token other = (Token)obj;
+
+                InitTermBuffer();
+                other.InitTermBuffer();
+
+                if (termLength == other.termLength &&
+                    startOffset == other.startOffset &&
+                    endOffset == other.endOffset &&
+                    flags == other.flags &&
+                    positionIncrement == other.positionIncrement &&
+                    SubEqual(type, other.type) &&
+                    SubEqual(payload, other.payload))
+                {
+                    for (int i = 0; i < termLength; i++)
+                        if (termBuffer[i] != other.termBuffer[i])
+                            return false;
+                    return true;
+                }
+                else
+                    return false;
+            }
+            else
+                return false;
+        }
+
+        private bool SubEqual(object o1, object o2)
+        {
+            if (o1 == null)
+                return o2 == null;
+            else
+                return o1.Equals(o2);
+        }
+
+        public override int GetHashCode()
+        {
+            InitTermBuffer();
+            int code = termLength;
+            code = code * 31 + startOffset;
+            code = code * 31 + endOffset;
+            code = code * 31 + flags;
+            code = code * 31 + positionIncrement;
+            code = code * 31 + type.GetHashCode();
+            code = (payload == null ? code : code * 31 + payload.GetHashCode());
+            code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+            return code;
+        }
+
+        // like clear() but doesn't clear termBuffer/text
+        private void ClearNoTermBuffer()
+        {
+            payload = null;
+            positionIncrement = 1;
+            flags = 0;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #setTermBuffer(char[], int, int)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset},
+         *  {@link #setType}
+         *  @return this Token instance */
+        public Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType)
+        {
+            ClearNoTermBuffer();
+            payload = null;
+            positionIncrement = 1;
+            SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = newType;
+            return this;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #SetTermBuffer(char[], int, int)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset}
+         *  {@link #setType} on Token.DEFAULT_TYPE
+         *  @return this Token instance */
+        public Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+        {
+            ClearNoTermBuffer();
+            SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = DEFAULT_TYPE;
+            return this;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #SetTermBuffer(String)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset}
+         *  {@link #setType}
+         *  @return this Token instance */
+        public Token Reinit(String newTerm, int newStartOffset, int newEndOffset, String newType)
+        {
+            ClearNoTermBuffer();
+            SetTermBuffer(newTerm);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = newType;
+            return this;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #SetTermBuffer(String, int, int)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset}
+         *  {@link #setType}
+         *  @return this Token instance */
+        public Token Reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType)
+        {
+            ClearNoTermBuffer();
+            SetTermBuffer(newTerm, newTermOffset, newTermLength);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = newType;
+            return this;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #SetTermBuffer(String)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset}
+         *  {@link #setType} on Token.DEFAULT_TYPE
+         *  @return this Token instance */
+        public Token Reinit(String newTerm, int newStartOffset, int newEndOffset)
+        {
+            ClearNoTermBuffer();
+            SetTermBuffer(newTerm);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = DEFAULT_TYPE;
+            return this;
+        }
+
+        /** Shorthand for calling {@link #clear},
+         *  {@link #SetTermBuffer(String, int, int)},
+         *  {@link #setStartOffset},
+         *  {@link #setEndOffset}
+         *  {@link #setType} on Token.DEFAULT_TYPE
+         *  @return this Token instance */
+        public Token Reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+        {
+            ClearNoTermBuffer();
+            SetTermBuffer(newTerm, newTermOffset, newTermLength);
+            startOffset = newStartOffset;
+            endOffset = newEndOffset;
+            type = DEFAULT_TYPE;
+            return this;
+        }
+
+        /**
+         * Copy the prototype token's fields into this one. Note: Payloads are shared.
+         * @param prototype
+         */
+        public void Reinit(Token prototype)
+        {
+            prototype.InitTermBuffer();
+            SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+            positionIncrement = prototype.positionIncrement;
+            flags = prototype.flags;
+            startOffset = prototype.startOffset;
+            endOffset = prototype.endOffset;
+            type = prototype.type;
+            payload = prototype.payload;
+        }
+
+        /**
+         * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+         * @param prototype
+         * @param newTerm
+         */
+        public void Reinit(Token prototype, String newTerm)
+        {
+            SetTermBuffer(newTerm);
+            positionIncrement = prototype.positionIncrement;
+            flags = prototype.flags;
+            startOffset = prototype.startOffset;
+            endOffset = prototype.endOffset;
+            type = prototype.type;
+            payload = prototype.payload;
+        }
+
+        /**
+         * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+         * @param prototype
+         * @param newTermBuffer
+         * @param offset
+         * @param length
+         */
+        public void Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
+        {
+            SetTermBuffer(newTermBuffer, offset, length);
+            positionIncrement = prototype.positionIncrement;
+            flags = prototype.flags;
+            startOffset = prototype.startOffset;
+            endOffset = prototype.endOffset;
+            type = prototype.type;
+            payload = prototype.payload;
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TokenFilter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs Wed Jul 29 18:04:12 2009
@@ -23,8 +23,9 @@
     /// <summary>A TokenFilter is a TokenStream whose input is another token stream.
     /// <p>
     /// This is an abstract class.
-	/// NOTE: subclasses must override at least one of {@link
-	/// #Next()} or {@link #Next(Token)}.
+    /// NOTE: subclasses must override {@link #Next(Token)}.
+    /// It's also OK to instead override {@link #Next()}, but
+    /// that method is now deprecated in favor of {@link #Next(Token)}.
 	/// </summary>
     public abstract class TokenFilter : TokenStream
     {
@@ -42,5 +43,14 @@
         {
             input.Close();
         }
+
+        /// <summary>
+        /// Reset the filter as well as the input TokenStream.
+        /// </summary>
+        public override void Reset()
+        {
+            base.Reset();
+            input.Reset();
+        }
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TokenStream.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs Wed Jul 29 18:04:12 2009
@@ -32,32 +32,34 @@
 	/// <li>{@link TokenFilter}, a TokenStream
 	/// whose input is another TokenStream.
 	/// </ul>
-	/// NOTE: subclasses must override at least one of {@link
-	/// #Next()} or {@link #Next(Token)}.
-	/// </summary>
+    /// NOTE: subclasses must override {@link #next(Token)}.  It's
+    /// also OK to instead override {@link #next()} but that
+    /// method is now deprecated in favor of {@link #next(Token)}.
+    /// </summary>
 	
 	public abstract class TokenStream
 	{
 		
 		/// <summary>Returns the next token in the stream, or null at EOS.
-		/// The returned Token is a "full private copy" (not
+		/// @deprecated The returned Token is a "full private copy" (not
 		/// re-used across calls to next()) but will be slower
 		/// than calling {@link #Next(Token)} instead.. 
 		/// </summary>
 		public virtual Token Next()
 		{
-			Token result = Next(new Token());
+            Token reusableToken = new Token();
+			Token nextToken = Next(reusableToken);
 			
-			if (result != null)
+			if (nextToken != null)
 			{
-				Payload p = result.GetPayload();
+				Payload p = nextToken.GetPayload();
 				if (p != null)
 				{
-					result.SetPayload((Payload) p.Clone());
+					nextToken.SetPayload((Payload) p.Clone());
 				}
 			}
 			
-			return result;
+			return nextToken;
 		}
 		
 		/// <summary>Returns the next token in the stream, or null at EOS.
@@ -77,14 +79,23 @@
 		/// <li>A producer must call {@link Token#Clear()}
 		/// before setting the fields in it & returning it</li>
 		/// </ul>
+        /// Also, the producer must make no assumptions about a
+        /// Token after it has been returned: the caller may
+        /// arbitrarily change it.  If the producer needs to hold
+        /// onto the token for subsequent calls, it must clone()
+        /// it before storing it.
 		/// Note that a {@link TokenFilter} is considered a consumer.
 		/// </summary>
-		/// <param name="result">a Token that may or may not be used to return
+		/// <param name="reusableToken">a Token that may or may not be used to
+        /// return; this parameter should never be null (the callee
+        /// is not required to chedk for null before using it, but it is a
+        /// good idea to assert that it is not null.)
 		/// </param>
 		/// <returns> next token in the stream or null if end-of-stream was hit
 		/// </returns>
-		public virtual Token Next(Token result)
+		public virtual Token Next(/* in */ Token reusableToken)
 		{
+            // We don't actually use inputToken, but still add the assert
 			return Next();
 		}
 		
@@ -93,7 +104,13 @@
 		/// implement this method. Reset() is not needed for
 		/// the standard indexing process. However, if the Tokens 
 		/// of a TokenStream are intended to be consumed more than 
-		/// once, it is necessary to implement reset(). 
+		/// once, it is necessary to implement reset().
+   		/// once, it is necessary to implement reset().  Note that
+		/// if your TokenStream caches tokens and feeds them back
+		/// again after a reset, it is imperative that you
+		/// clone the tokens when you store them away (on the
+		/// first pass) as well as when you return them (on future
+        /// passes after reset()).
 		/// </summary>
 		public virtual void  Reset()
 		{

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Tokenizer.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs Wed Jul 29 18:04:12 2009
@@ -24,9 +24,10 @@
 	/// <p>
 	/// This is an abstract class.
 	/// <p>
-	/// NOTE: subclasses must override at least one of {@link
-	/// #Next()} or {@link #Next(Token)}.
-	/// <p>
+    /// NOTE: subclasses must override {@link #Next(Token)}.
+    /// It's also OK to instead override {@link #Next()}, but
+    /// that method is now deprecated in favor of {@link #Next(Token)}.
+    /// <p>
 	/// NOTE: subclasses overriding {@link #Next(Token)} must  
 	/// call {@link Token#Clear()}.
 	/// </summary>

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/AssemblyInfo.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs Wed Jul 29 18:04:12 2009
@@ -28,12 +28,12 @@
 [assembly: AssemblyConfiguration("")]
 [assembly: AssemblyCompany("The Apache Software Foundation")]
 [assembly: AssemblyProduct("Lucene.Net")]
-[assembly: AssemblyCopyright("Copyright 2006 - 2008 The Apache Software Foundation")]
-[assembly: AssemblyTrademark("Copyright 2006 - 2008 The Apache Software Foundation")]
+[assembly: AssemblyCopyright("Copyright 2006 - 2009 The Apache Software Foundation")]
+[assembly: AssemblyTrademark("Copyright 2006 - 2009 The Apache Software Foundation")]
 [assembly: AssemblyDefaultAlias("Lucene.Net")]
 [assembly: AssemblyCulture("")]
 
-[assembly: AssemblyInformationalVersionAttribute("2.3.2")]
+[assembly: AssemblyInformationalVersionAttribute("2.4.0")]
 
 
 //
@@ -47,7 +47,7 @@
 // You can specify all the values or you can default the Revision and Build Numbers 
 // by using the '*' as shown below:
 
-[assembly: AssemblyVersion("2.3.2.001")]
+[assembly: AssemblyVersion("2.4.0.001")]
 
 
 //

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/AbstractField.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/AbstractField.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/AbstractField.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/AbstractField.cs Wed Jul 29 18:04:12 2009
@@ -38,9 +38,13 @@
 		protected internal bool isBinary = false;
 		protected internal bool isCompressed = false;
 		protected internal bool lazy = false;
-		protected internal float boost = 1.0f;
+        protected internal bool omitTf = false;
+        protected internal float boost = 1.0f;
 		// the one and only data object for all different kind of field values
-		protected internal System.Object fieldsData = null;
+		protected internal object fieldsData = null;
+        // length/offset for all primitive types
+        protected int binaryLength;
+        protected int binaryOffset;
 		
 		protected internal AbstractField()
 		{
@@ -77,23 +81,29 @@
 				this.isIndexed = false;
 				this.isTokenized = false;
 			}
-			else if (index == Field.Index.TOKENIZED)
+			else if (index == Field.Index.ANALYZED)
 			{
 				this.isIndexed = true;
 				this.isTokenized = true;
 			}
-			else if (index == Field.Index.UN_TOKENIZED)
+            else if (index == Field.Index.NOT_ANALYZED)
 			{
 				this.isIndexed = true;
 				this.isTokenized = false;
 			}
-			else if (index == Field.Index.NO_NORMS)
+            else if (index == Field.Index.NOT_ANALYZED_NO_NORMS)
 			{
 				this.isIndexed = true;
 				this.isTokenized = false;
 				this.omitNorms = true;
 			}
-			else
+            else if (index == Field.Index.ANALYZED_NO_NORMS)
+            {
+                this.isIndexed = true;
+                this.isTokenized = false;
+                this.omitNorms = true;
+            }
+            else
 			{
 				throw new System.ArgumentException("unknown index parameter " + index);
 			}
@@ -255,14 +265,71 @@
 		{
 			return isBinary;
 		}
-		
-		/// <summary>True if norms are omitted for this indexed field </summary>
-		public virtual bool GetOmitNorms()
+
+        /// <summary>
+        /// Return the raw byte[] for the binary field.  Note that
+        /// you must also call {@link #GetBinaryLength} and
+        /// {@link #GetBinaryOffset} to know which range of bytes in 
+        /// this returned array belong to the field.
+        /// </summary>
+        /// <returns>reference to the field value as byte[]</returns>
+        public byte[] GetBinaryValue()
+        {
+            return GetBinaryValue(null);
+        }
+
+        public virtual byte[] GetBinaryValue(byte[] result)
+        {
+            if (isBinary || fieldsData is byte[])
+                return (byte[])fieldsData;
+            else
+                return null;
+        }
+
+        /// <summary>
+        /// Returns the length of byte[] segment that is used as value.
+        /// If Field is not binary returned value is undefined.
+        /// </summary>
+        /// <returns>length of byte[] segment that represents this Field value</returns>
+        public int GetBinaryLength()
+        {
+            if (isBinary)
+                if (!isCompressed)
+                    return binaryLength;
+                else
+                    return ((byte[])fieldsData).Length;
+            else if (fieldsData is byte[])
+                return ((byte[])fieldsData).Length;
+            else
+                return 0;
+        }
+
+        /// <summary>
+        /// Returns offset into byte[] segment that is used as value.
+        /// If Field is not binary returned value is undefined.
+        /// </summary>
+        /// <returns>index of the byte[] segment that represents this Field value</returns>
+        public int GetBinaryOffset()
+        {
+            return binaryOffset;
+        }
+
+        /// <summary>True if norms are omitted for this indexed field </summary>
+        public virtual bool GetOmitNorms()
 		{
 			return omitNorms;
 		}
-		
-		/// <summary>Expert:
+
+        /// <summary>
+        /// Returns true if tf is omitted for this indexed field.
+        /// </summary>
+        /// <returns>true if tf is omitted for this indexed field</returns>
+        public virtual bool GetOmitTf()
+        {
+            return omitTf;
+        }
+
+        /// <summary>Expert:
 		/// 
 		/// If set, omit normalization factors associated with this indexed field.
 		/// This effectively disables indexing boosts and length normalization for this field.
@@ -271,8 +338,17 @@
 		{
 			this.omitNorms = omitNorms;
 		}
-		
-		public virtual bool IsLazy()
+
+        /// <summary>
+        /// Expert: If set, omit tf from postings of this indexed field.
+        /// </summary>
+        /// <param name="omitTf"></param>
+        public void SetOmitTf(bool omitTf)
+        {
+            this.omitTf = omitTf;
+        }
+
+        public virtual bool IsLazy()
 		{
 			return lazy;
 		}
@@ -329,7 +405,11 @@
 			{
 				result.Append(",omitNorms");
 			}
-			if (lazy)
+            if (omitTf)
+            {
+                result.Append(",omitTf");
+            }
+            if (lazy)
 			{
 				result.Append(",lazy");
 			}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Document/DateTools.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Document/DateTools.cs Wed Jul 29 18:04:12 2009
@@ -60,7 +60,7 @@
 		/// {@link #Round(Date, DateTools.Resolution)}
 		/// </param>
 		/// <returns> a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
-		/// depeding on <code>resolution</code>; using UTC as timezone 
+		/// depeding on <code>resolution</code>; using GMT as timezone 
 		/// </returns>
 		public static System.String DateToString(System.DateTime date, Resolution resolution)
 		{
@@ -76,7 +76,7 @@
 		/// {@link #Round(long, DateTools.Resolution)}
 		/// </param>
 		/// <returns> a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
-		/// depeding on <code>resolution</code>; using UTC as timezone
+		/// depeding on <code>resolution</code>; using GMT as timezone
 		/// </returns>
 		public static System.String TimeToString(long time, Resolution resolution)
 		{



Mime
View raw message