lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ccurr...@apache.org
Subject [28/51] [partial] Mass convert mixed tabs to spaces
Date Wed, 03 Apr 2013 17:40:11 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/Analyzer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/Analyzer.cs b/src/core/Analysis/Analyzer.cs
index cea0ee3..353ea24 100644
--- a/src/core/Analysis/Analyzer.cs
+++ b/src/core/Analysis/Analyzer.cs
@@ -22,77 +22,77 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis
 {
-	/// <summary>An Analyzer builds TokenStreams, which analyze text.  It thus represents a
-	/// policy for extracting index terms from text.
-	/// <p/>
-	/// Typical implementations first build a Tokenizer, which breaks the stream of
-	/// characters from the Reader into raw Tokens.  One or more TokenFilters may
-	/// then be applied to the output of the Tokenizer.
-	/// </summary>
-	public abstract class Analyzer : IDisposable
-	{
-		/// <summary>Creates a TokenStream which tokenizes all the text in the provided
-		/// Reader.  Must be able to handle null field name for
-		/// backward compatibility.
-		/// </summary>
-		public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
-		
-		/// <summary>Creates a TokenStream that is allowed to be re-used
-		/// from the previous time that the same thread called
-		/// this method.  Callers that do not need to use more
-		/// than one TokenStream at the same time from this
-		/// analyzer should use this method for better
-		/// performance.
-		/// </summary>
-		public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
-		{
-			return TokenStream(fieldName, reader);
-		}
-		
-		private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
-	    private bool isDisposed;
+    /// <summary>An Analyzer builds TokenStreams, which analyze text.  It thus represents a
+    /// policy for extracting index terms from text.
+    /// <p/>
+    /// Typical implementations first build a Tokenizer, which breaks the stream of
+    /// characters from the Reader into raw Tokens.  One or more TokenFilters may
+    /// then be applied to the output of the Tokenizer.
+    /// </summary>
+    public abstract class Analyzer : IDisposable
+    {
+        /// <summary>Creates a TokenStream which tokenizes all the text in the provided
+        /// Reader.  Must be able to handle null field name for
+        /// backward compatibility.
+        /// </summary>
+        public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
+        
+        /// <summary>Creates a TokenStream that is allowed to be re-used
+        /// from the previous time that the same thread called
+        /// this method.  Callers that do not need to use more
+        /// than one TokenStream at the same time from this
+        /// analyzer should use this method for better
+        /// performance.
+        /// </summary>
+        public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
+        {
+            return TokenStream(fieldName, reader);
+        }
+        
+        private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
+        private bool isDisposed;
 
-	    /// <summary>Used by Analyzers that implement reusableTokenStream
-	    /// to retrieve previously saved TokenStreams for re-use
-	    /// by the same thread. 
-	    /// </summary>
-	    protected internal virtual object PreviousTokenStream
-	    {
-	        get
-	        {
-	            if (tokenStreams == null)
-	            {
-	                throw new AlreadyClosedException("this Analyzer is closed");
-	            }
-	            return tokenStreams.Get();
-	        }
-	        set
-	        {
-	            if (tokenStreams == null)
-	            {
-	                throw new AlreadyClosedException("this Analyzer is closed");
-	            }
-	            tokenStreams.Set(value);
-	        }
-	    }
+        /// <summary>Used by Analyzers that implement reusableTokenStream
+        /// to retrieve previously saved TokenStreams for re-use
+        /// by the same thread. 
+        /// </summary>
+        protected internal virtual object PreviousTokenStream
+        {
+            get
+            {
+                if (tokenStreams == null)
+                {
+                    throw new AlreadyClosedException("this Analyzer is closed");
+                }
+                return tokenStreams.Get();
+            }
+            set
+            {
+                if (tokenStreams == null)
+                {
+                    throw new AlreadyClosedException("this Analyzer is closed");
+                }
+                tokenStreams.Set(value);
+            }
+        }
 
-	    [Obsolete()]
-		protected internal bool overridesTokenStreamMethod = false;
-		
-		/// <deprecated> This is only present to preserve
-		/// back-compat of classes that subclass a core analyzer
-		/// and override tokenStream but not reusableTokenStream 
-		/// </deprecated>
-		/// <summary>
+        [Obsolete()]
+        protected internal bool overridesTokenStreamMethod = false;
+        
+        /// <deprecated> This is only present to preserve
+        /// back-compat of classes that subclass a core analyzer
+        /// and override tokenStream but not reusableTokenStream 
+        /// </deprecated>
+        /// <summary>
         /// Java uses Class&lt;? extends Analyer&gt; to constrain <typeparamref name="TClass"/> to
         /// only Types that inherit from Analyzer.  C# does not have a generic type class,
         /// ie Type&lt;t&gt;.  The method signature stays the same, and an exception may
         /// still be thrown, if the method doesn't exist.
-		/// </summary>
+        /// </summary>
         [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
-		protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
+        protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
             where TClass : Analyzer
-		{
+        {
             try
             {
                 System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
@@ -103,50 +103,50 @@ namespace Lucene.Net.Analysis
                 // can't happen, as baseClass is subclass of Analyzer
                 overridesTokenStreamMethod = false;
             }
-		}
-		
-		
-		/// <summary> Invoked before indexing a Fieldable instance if
-		/// terms have already been added to that field.  This allows custom
-		/// analyzers to place an automatic position increment gap between
-		/// Fieldable instances using the same field name.  The default value
-		/// position increment gap is 0.  With a 0 position increment gap and
-		/// the typical default token position increment of 1, all terms in a field,
-		/// including across Fieldable instances, are in successive positions, allowing
-		/// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
-		/// 
-		/// </summary>
-		/// <param name="fieldName">Fieldable name being indexed.
-		/// </param>
-		/// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
-		/// </returns>
-		public virtual int GetPositionIncrementGap(String fieldName)
-		{
-			return 0;
-		}
-		
-		/// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
-		/// Token offsets instead.  By default this returns 1 for
-		/// tokenized fields and, as if the fields were joined
-		/// with an extra space character, and 0 for un-tokenized
-		/// fields.  This method is only called if the field
-		/// produced at least one token for indexing.
-		/// 
-		/// </summary>
-		/// <param name="field">the field just indexed
-		/// </param>
-		/// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
-		/// </returns>
-		public virtual int GetOffsetGap(IFieldable field)
-		{
-			return field.IsTokenized ? 1 : 0;
-		}
+        }
+        
+        
+        /// <summary> Invoked before indexing a Fieldable instance if
+        /// terms have already been added to that field.  This allows custom
+        /// analyzers to place an automatic position increment gap between
+        /// Fieldable instances using the same field name.  The default value
+        /// position increment gap is 0.  With a 0 position increment gap and
+        /// the typical default token position increment of 1, all terms in a field,
+        /// including across Fieldable instances, are in successive positions, allowing
+        /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+        /// 
+        /// </summary>
+        /// <param name="fieldName">Fieldable name being indexed.
+        /// </param>
+        /// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+        /// </returns>
+        public virtual int GetPositionIncrementGap(String fieldName)
+        {
+            return 0;
+        }
+        
+        /// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
+        /// Token offsets instead.  By default this returns 1 for
+        /// tokenized fields and, as if the fields were joined
+        /// with an extra space character, and 0 for un-tokenized
+        /// fields.  This method is only called if the field
+        /// produced at least one token for indexing.
+        /// 
+        /// </summary>
+        /// <param name="field">the field just indexed
+        /// </param>
+        /// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
+        /// </returns>
+        public virtual int GetOffsetGap(IFieldable field)
+        {
+            return field.IsTokenized ? 1 : 0;
+        }
 
-		/// <summary>Frees persistent resources used by this Analyzer </summary>
-		public void  Close()
-		{
-		    Dispose();
-		}
+        /// <summary>Frees persistent resources used by this Analyzer </summary>
+        public void  Close()
+        {
+            Dispose();
+        }
 
         public virtual void Dispose()
         {
@@ -167,5 +167,5 @@ namespace Lucene.Net.Analysis
             }
             isDisposed = true;
         }
-	}
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/BaseCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/BaseCharFilter.cs b/src/core/Analysis/BaseCharFilter.cs
index b84fce0..7c91e1c 100644
--- a/src/core/Analysis/BaseCharFilter.cs
+++ b/src/core/Analysis/BaseCharFilter.cs
@@ -68,7 +68,7 @@ namespace Lucene.Net.Analysis
 
             if (currentOff < offsets[mid])
                 return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
-        	return currentOff + diffs[mid];
+            return currentOff + diffs[mid];
         }
 
         protected int LastCumulativeDiff

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CachingTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CachingTokenFilter.cs b/src/core/Analysis/CachingTokenFilter.cs
index c5f7694..3661362 100644
--- a/src/core/Analysis/CachingTokenFilter.cs
+++ b/src/core/Analysis/CachingTokenFilter.cs
@@ -17,70 +17,70 @@
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> This class can be used if the token attributes of a TokenStream
-	/// are intended to be consumed more than once. It caches
-	/// all token attribute states locally in a List.
-	/// 
-	/// <p/>CachingTokenFilter implements the optional method
-	/// <see cref="TokenStream.Reset()" />, which repositions the
-	/// stream to the first Token. 
-	/// </summary>
-	public sealed class CachingTokenFilter : TokenFilter
-	{
+    
+    /// <summary> This class can be used if the token attributes of a TokenStream
+    /// are intended to be consumed more than once. It caches
+    /// all token attribute states locally in a List.
+    /// 
+    /// <p/>CachingTokenFilter implements the optional method
+    /// <see cref="TokenStream.Reset()" />, which repositions the
+    /// stream to the first Token. 
+    /// </summary>
+    public sealed class CachingTokenFilter : TokenFilter
+    {
         private System.Collections.Generic.LinkedList<State> cache = null;
-		private System.Collections.Generic.IEnumerator<State> iterator = null;
-		private State finalState;
-		
-		public CachingTokenFilter(TokenStream input):base(input)
-		{
-		}
+        private System.Collections.Generic.IEnumerator<State> iterator = null;
+        private State finalState;
+        
+        public CachingTokenFilter(TokenStream input):base(input)
+        {
+        }
 
-		public override bool IncrementToken()
-		{
-			if (cache == null)
-			{
-				// fill cache lazily
-				cache = new System.Collections.Generic.LinkedList<State>();
-				FillCache();
-				iterator = cache.GetEnumerator();
-			}
-			
-			if (!iterator.MoveNext())
-			{
-				// the cache is exhausted, return false
-				return false;
-			}
-			// Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
-			RestoreState(iterator.Current);
-			return true;
-		}
-		
-		public override void  End()
-		{
-			if (finalState != null)
-			{
-				RestoreState(finalState);
-			}
-		}
-		
-		public override void  Reset()
-		{
-			if (cache != null)
-			{
-				iterator = cache.GetEnumerator();
-			}
-		}
-		
-		private void  FillCache()
-		{
-			while (input.IncrementToken())
-			{
-				cache.AddLast(CaptureState());
-			}
-			// capture final state
-			input.End();
-			finalState = CaptureState();
-		}
-	}
+        public override bool IncrementToken()
+        {
+            if (cache == null)
+            {
+                // fill cache lazily
+                cache = new System.Collections.Generic.LinkedList<State>();
+                FillCache();
+                iterator = cache.GetEnumerator();
+            }
+            
+            if (!iterator.MoveNext())
+            {
+                // the cache is exhausted, return false
+                return false;
+            }
+            // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+            RestoreState(iterator.Current);
+            return true;
+        }
+        
+        public override void  End()
+        {
+            if (finalState != null)
+            {
+                RestoreState(finalState);
+            }
+        }
+        
+        public override void  Reset()
+        {
+            if (cache != null)
+            {
+                iterator = cache.GetEnumerator();
+            }
+        }
+        
+        private void  FillCache()
+        {
+            while (input.IncrementToken())
+            {
+                cache.AddLast(CaptureState());
+            }
+            // capture final state
+            input.End();
+            finalState = CaptureState();
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharArraySet.cs b/src/core/Analysis/CharArraySet.cs
index e7df0ba..5564f74 100644
--- a/src/core/Analysis/CharArraySet.cs
+++ b/src/core/Analysis/CharArraySet.cs
@@ -300,8 +300,8 @@ namespace Lucene.Net.Analysis
 
         public bool Contains(object item)
         {
-        	var text = item as char[];
-        	return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
+            var text = item as char[];
+            return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
         }
 
         public bool Add(object item)
@@ -454,7 +454,7 @@ namespace Lucene.Net.Analysis
         /// </summary>
         public class CharArraySetEnumerator : IEnumerator<string>
         {
-        	readonly CharArraySet _Creator;
+            readonly CharArraySet _Creator;
             int pos = -1;
             char[] cur;
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CharFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharFilter.cs b/src/core/Analysis/CharFilter.cs
index 039f841..d761e6f 100644
--- a/src/core/Analysis/CharFilter.cs
+++ b/src/core/Analysis/CharFilter.cs
@@ -17,42 +17,42 @@
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> Subclasses of CharFilter can be chained to filter CharStream.
-	/// They can be used as <see cref="System.IO.TextReader" /> with additional offset
-	/// correction. <see cref="Tokenizer" />s will automatically use <see cref="CorrectOffset" />
-	/// if a CharFilter/CharStream subclass is used.
-	/// 
-	/// </summary>
-	/// <version>  $Id$
-	/// 
-	/// </version>
-	public abstract class CharFilter : CharStream
-	{
+    
+    /// <summary> Subclasses of CharFilter can be chained to filter CharStream.
+    /// They can be used as <see cref="System.IO.TextReader" /> with additional offset
+    /// correction. <see cref="Tokenizer" />s will automatically use <see cref="CorrectOffset" />
+    /// if a CharFilter/CharStream subclass is used.
+    /// 
+    /// </summary>
+    /// <version>  $Id$
+    /// 
+    /// </version>
+    public abstract class CharFilter : CharStream
+    {
         private long currentPosition = -1;
-	    private bool isDisposed;
-		protected internal CharStream input;
-		
-		protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
-		{
-			input = in_Renamed;
-		}
-		
-		/// <summary>Subclass may want to override to correct the current offset.</summary>
-		/// <param name="currentOff">current offset</param>
-		/// <returns>corrected offset</returns>
-		protected internal virtual int Correct(int currentOff)
+        private bool isDisposed;
+        protected internal CharStream input;
+        
+        protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
         {
-			return currentOff;
-		}
-		
-		/// <summary> Chains the corrected offset through the input
-		/// CharFilter.
-		/// </summary>
-		public override int CorrectOffset(int currentOff)
-		{
-			return input.CorrectOffset(Correct(currentOff));
-		}
+            input = in_Renamed;
+        }
+        
+        /// <summary>Subclass may want to override to correct the current offset.</summary>
+        /// <param name="currentOff">current offset</param>
+        /// <returns>corrected offset</returns>
+        protected internal virtual int Correct(int currentOff)
+        {
+            return currentOff;
+        }
+        
+        /// <summary> Chains the corrected offset through the input
+        /// CharFilter.
+        /// </summary>
+        public override int CorrectOffset(int currentOff)
+        {
+            return input.CorrectOffset(Correct(currentOff));
+        }
 
         protected override void Dispose(bool disposing)
         {
@@ -70,26 +70,26 @@ namespace Lucene.Net.Analysis
             isDisposed = true;
             base.Dispose(disposing);
         }
-		
-		public override int Read(System.Char[] cbuf, int off, int len)
+        
+        public override int Read(System.Char[] cbuf, int off, int len)
         {
-			return input.Read(cbuf, off, len);
-		}
-		
-		public bool MarkSupported()
+            return input.Read(cbuf, off, len);
+        }
+        
+        public bool MarkSupported()
         {
             return input.BaseStream.CanSeek;
-		}
-		
-		public void Mark(int readAheadLimit)
+        }
+        
+        public void Mark(int readAheadLimit)
         {
             currentPosition = input.BaseStream.Position;
-			input.BaseStream.Position = readAheadLimit;
-		}
-		
-		public void Reset()
+            input.BaseStream.Position = readAheadLimit;
+        }
+        
+        public void Reset()
         {
-			input.BaseStream.Position = currentPosition;
-		}
-	}
+            input.BaseStream.Position = currentPosition;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CharReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharReader.cs b/src/core/Analysis/CharReader.cs
index 2120bd4..7dc9f50 100644
--- a/src/core/Analysis/CharReader.cs
+++ b/src/core/Analysis/CharReader.cs
@@ -17,41 +17,41 @@
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> CharReader is a Reader wrapper. It reads chars from
-	/// Reader and outputs <see cref="CharStream" />, defining an
-	/// identify function <see cref="CorrectOffset" /> method that
-	/// simply returns the provided offset.
-	/// </summary>
-	public sealed class CharReader:CharStream
-	{
+    
+    /// <summary> CharReader is a Reader wrapper. It reads chars from
+    /// Reader and outputs <see cref="CharStream" />, defining an
+    /// identify function <see cref="CorrectOffset" /> method that
+    /// simply returns the provided offset.
+    /// </summary>
+    public sealed class CharReader:CharStream
+    {
         private long currentPosition = -1;
 
-	    private bool isDisposed;
+        private bool isDisposed;
 
-		internal System.IO.StreamReader input;
-		
-		public static CharStream Get(System.IO.TextReader input)
-		{
-			var charStream = input as CharStream;
-			if (charStream != null)
-				return charStream;
-			
-			// {{Aroush-2.9}} isn't there a better (faster) way to do this?
-			var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
-			return new CharReader(new System.IO.StreamReader(theString));
-			//return input is CharStream?(CharStream) input:new CharReader(input);
-		}
-		
-		private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
-		{
-			input = in_Renamed;
-		}
-		
-		public override int CorrectOffset(int currentOff)
-		{
-			return currentOff;
-		}
+        internal System.IO.StreamReader input;
+        
+        public static CharStream Get(System.IO.TextReader input)
+        {
+            var charStream = input as CharStream;
+            if (charStream != null)
+                return charStream;
+            
+            // {{Aroush-2.9}} isn't there a better (faster) way to do this?
+            var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
+            return new CharReader(new System.IO.StreamReader(theString));
+            //return input is CharStream?(CharStream) input:new CharReader(input);
+        }
+        
+        private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
+        {
+            input = in_Renamed;
+        }
+        
+        public override int CorrectOffset(int currentOff)
+        {
+            return currentOff;
+        }
 
         protected override void Dispose(bool disposing)
         {
@@ -69,26 +69,26 @@ namespace Lucene.Net.Analysis
             isDisposed = true;
             base.Dispose(disposing);
         }
-		
-		public  override int Read(System.Char[] cbuf, int off, int len)
-		{
-			return input.Read(cbuf, off, len);
-		}
-		
-		public bool MarkSupported()
-		{
-			return input.BaseStream.CanSeek;
-		}
-		
-		public void  Mark(int readAheadLimit)
-		{
-			currentPosition = input.BaseStream.Position;
-			input.BaseStream.Position = readAheadLimit;
+        
+        public  override int Read(System.Char[] cbuf, int off, int len)
+        {
+            return input.Read(cbuf, off, len);
+        }
+        
+        public bool MarkSupported()
+        {
+            return input.BaseStream.CanSeek;
         }
-		
-		public void  Reset()
-		{
-			input.BaseStream.Position = currentPosition;
+        
+        public void  Mark(int readAheadLimit)
+        {
+            currentPosition = input.BaseStream.Position;
+            input.BaseStream.Position = readAheadLimit;
+        }
+        
+        public void  Reset()
+        {
+            input.BaseStream.Position = currentPosition;
         }
-	}
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CharStream.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharStream.cs b/src/core/Analysis/CharStream.cs
index 0b36fe2..22aaaae 100644
--- a/src/core/Analysis/CharStream.cs
+++ b/src/core/Analysis/CharStream.cs
@@ -17,29 +17,29 @@
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> CharStream adds <see cref="CorrectOffset" />
-	/// functionality over <see cref="System.IO.TextReader" />.  All Tokenizers accept a
-	/// CharStream instead of <see cref="System.IO.TextReader" /> as input, which enables
-	/// arbitrary character based filtering before tokenization. 
-	/// The <see cref="CorrectOffset" /> method fixed offsets to account for
-	/// removal or insertion of characters, so that the offsets
-	/// reported in the tokens match the character offsets of the
-	/// original Reader.
+    
+    /// <summary> CharStream adds <see cref="CorrectOffset" />
+    /// functionality over <see cref="System.IO.TextReader" />.  All Tokenizers accept a
+    /// CharStream instead of <see cref="System.IO.TextReader" /> as input, which enables
+    /// arbitrary character based filtering before tokenization. 
+    /// The <see cref="CorrectOffset" /> method fixed offsets to account for
+    /// removal or insertion of characters, so that the offsets
+    /// reported in the tokens match the character offsets of the
+    /// original Reader.
     /// </summary>
-	public abstract class CharStream : System.IO.StreamReader
-	{
-	    protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
+    public abstract class CharStream : System.IO.StreamReader
+    {
+        protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
         {
         }
-		
-		/// <summary> Called by CharFilter(s) and Tokenizer to correct token offset.
-		/// 
-		/// </summary>
-		/// <param name="currentOff">offset as seen in the output
-		/// </param>
-		/// <returns> corrected offset based on the input
-		/// </returns>
-		public abstract int CorrectOffset(int currentOff);
-	}
+        
+        /// <summary> Called by CharFilter(s) and Tokenizer to correct token offset.
+        /// 
+        /// </summary>
+        /// <param name="currentOff">offset as seen in the output
+        /// </param>
+        /// <returns> corrected offset based on the input
+        /// </returns>
+        public abstract int CorrectOffset(int currentOff);
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/CharTokenizer.cs b/src/core/Analysis/CharTokenizer.cs
index 22423ec..3c34664 100644
--- a/src/core/Analysis/CharTokenizer.cs
+++ b/src/core/Analysis/CharTokenizer.cs
@@ -20,116 +20,116 @@ using AttributeSource = Lucene.Net.Util.AttributeSource;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary>An abstract base class for simple, character-oriented tokenizers.</summary>
-	public abstract class CharTokenizer:Tokenizer
-	{
-	    protected CharTokenizer(System.IO.TextReader input):base(input)
-		{
-			offsetAtt = AddAttribute<IOffsetAttribute>();
+    
+    /// <summary>An abstract base class for simple, character-oriented tokenizers.</summary>
+    public abstract class CharTokenizer:Tokenizer
+    {
+        protected CharTokenizer(System.IO.TextReader input):base(input)
+        {
+            offsetAtt = AddAttribute<IOffsetAttribute>();
             termAtt = AddAttribute<ITermAttribute>();
-		}
+        }
 
-	    protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
-		{
+        protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
+        {
             offsetAtt = AddAttribute<IOffsetAttribute>();
             termAtt = AddAttribute<ITermAttribute>();
-		}
+        }
 
-	    protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
-		{
+        protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
+        {
             offsetAtt = AddAttribute<IOffsetAttribute>();
             termAtt = AddAttribute<ITermAttribute>();
-		}
-		
-		private int offset = 0, bufferIndex = 0, dataLen = 0;
-		private const int MAX_WORD_LEN = 255;
-		private const int IO_BUFFER_SIZE = 4096;
-		private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
-		
-		private readonly ITermAttribute termAtt;
-		private readonly IOffsetAttribute offsetAtt;
-		
-		/// <summary>Returns true iff a character should be included in a token.  This
-		/// tokenizer generates as tokens adjacent sequences of characters which
-		/// satisfy this predicate.  Characters for which this is false are used to
-		/// define token boundaries and are not included in tokens. 
-		/// </summary>
-		protected internal abstract bool IsTokenChar(char c);
-		
-		/// <summary>Called on each token character to normalize it before it is added to the
-		/// token.  The default implementation does nothing.  Subclasses may use this
-		/// to, e.g., lowercase tokens. 
-		/// </summary>
-		protected internal virtual char Normalize(char c)
-		{
-			return c;
-		}
-		
-		public override bool IncrementToken()
-		{
-			ClearAttributes();
-			int length = 0;
-			int start = bufferIndex;
-			char[] buffer = termAtt.TermBuffer();
-			while (true)
-			{
-				
-				if (bufferIndex >= dataLen)
-				{
-					offset += dataLen;
-					dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
-					if (dataLen <= 0)
-					{
-						dataLen = 0; // so next offset += dataLen won't decrement offset
-						if (length > 0)
-							break;
-						return false;
-					}
-					bufferIndex = 0;
-				}
-				
-				char c = ioBuffer[bufferIndex++];
-				
-				if (IsTokenChar(c))
-				{
-					// if it's a token char
-					
-					if (length == 0)
-					// start of token
-						start = offset + bufferIndex - 1;
-					else if (length == buffer.Length)
-						buffer = termAtt.ResizeTermBuffer(1 + length);
-					
-					buffer[length++] = Normalize(c); // buffer it, normalized
-					
-					if (length == MAX_WORD_LEN)
-					// buffer overflow!
-						break;
-				}
-				else if (length > 0)
-				// at non-Letter w/ chars
-					break; // return 'em
-			}
-			
-			termAtt.SetTermLength(length);
-			offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
-			return true;
-		}
-		
-		public override void  End()
-		{
-			// set final offset
-			int finalOffset = CorrectOffset(offset);
-			offsetAtt.SetOffset(finalOffset, finalOffset);
-		}
-		
-		public override void  Reset(System.IO.TextReader input)
-		{
-			base.Reset(input);
-			bufferIndex = 0;
-			offset = 0;
-			dataLen = 0;
-		}
-	}
+        }
+        
+        private int offset = 0, bufferIndex = 0, dataLen = 0;
+        private const int MAX_WORD_LEN = 255;
+        private const int IO_BUFFER_SIZE = 4096;
+        private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+        
+        private readonly ITermAttribute termAtt;
+        private readonly IOffsetAttribute offsetAtt;
+        
+        /// <summary>Returns true iff a character should be included in a token.  This
+        /// tokenizer generates as tokens adjacent sequences of characters which
+        /// satisfy this predicate.  Characters for which this is false are used to
+        /// define token boundaries and are not included in tokens. 
+        /// </summary>
+        protected internal abstract bool IsTokenChar(char c);
+        
+        /// <summary>Called on each token character to normalize it before it is added to the
+        /// token.  The default implementation does nothing.  Subclasses may use this
+        /// to, e.g., lowercase tokens. 
+        /// </summary>
+        protected internal virtual char Normalize(char c)
+        {
+            return c;
+        }
+        
+        public override bool IncrementToken()
+        {
+            ClearAttributes();
+            int length = 0;
+            int start = bufferIndex;
+            char[] buffer = termAtt.TermBuffer();
+            while (true)
+            {
+                
+                if (bufferIndex >= dataLen)
+                {
+                    offset += dataLen;
+                    dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
+                    if (dataLen <= 0)
+                    {
+                        dataLen = 0; // so next offset += dataLen won't decrement offset
+                        if (length > 0)
+                            break;
+                        return false;
+                    }
+                    bufferIndex = 0;
+                }
+                
+                char c = ioBuffer[bufferIndex++];
+                
+                if (IsTokenChar(c))
+                {
+                    // if it's a token char
+                    
+                    if (length == 0)
+                    // start of token
+                        start = offset + bufferIndex - 1;
+                    else if (length == buffer.Length)
+                        buffer = termAtt.ResizeTermBuffer(1 + length);
+                    
+                    buffer[length++] = Normalize(c); // buffer it, normalized
+                    
+                    if (length == MAX_WORD_LEN)
+                    // buffer overflow!
+                        break;
+                }
+                else if (length > 0)
+                // at non-Letter w/ chars
+                    break; // return 'em
+            }
+            
+            termAtt.SetTermLength(length);
+            offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
+            return true;
+        }
+        
+        public override void  End()
+        {
+            // set final offset
+            int finalOffset = CorrectOffset(offset);
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+        
+        public override void  Reset(System.IO.TextReader input)
+        {
+            base.Reset(input);
+            bufferIndex = 0;
+            offset = 0;
+            dataLen = 0;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/ISOLatin1AccentFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/ISOLatin1AccentFilter.cs b/src/core/Analysis/ISOLatin1AccentFilter.cs
index 5fd839e..a6fde44 100644
--- a/src/core/Analysis/ISOLatin1AccentFilter.cs
+++ b/src/core/Analysis/ISOLatin1AccentFilter.cs
@@ -1,4 +1,4 @@
-/* 
+/* 
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,325 +20,325 @@ using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> A filter that replaces accented characters in the ISO Latin 1 character set 
-	/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
-	/// <p/>
-	/// For instance, '&#192;' will be replaced by 'a'.
-	/// <p/>
-	/// 
-	/// </summary>
-	/// <deprecated> If you build a new index, use <see cref="ASCIIFoldingFilter"/>
-	/// which covers a superset of Latin 1.
-	/// This class is included for use with existing indexes and will be removed
-	/// in a future release (possible Lucene 4.0)
-	/// </deprecated>
+    
+    /// <summary> A filter that replaces accented characters in the ISO Latin 1 character set 
+    /// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+    /// <p/>
+    /// For instance, '&#192;' will be replaced by 'a'.
+    /// <p/>
+    /// 
+    /// </summary>
+    /// <deprecated> If you build a new index, use <see cref="ASCIIFoldingFilter"/>
+    /// which covers a superset of Latin 1.
+    /// This class is included for use with existing indexes and will be removed
+    /// in a future release (possible Lucene 4.0)
+    /// </deprecated>
     [Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1.  This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")]
-	public class ISOLatin1AccentFilter : TokenFilter
-	{
-		public ISOLatin1AccentFilter(TokenStream input):base(input)
-		{
+    public class ISOLatin1AccentFilter : TokenFilter
+    {
+        public ISOLatin1AccentFilter(TokenStream input):base(input)
+        {
             termAtt = AddAttribute<ITermAttribute>();
-		}
-		
-		private char[] output = new char[256];
-		private int outputPos;
-		private readonly ITermAttribute termAtt;
-		
-		public override bool IncrementToken()
-		{
-			if (input.IncrementToken())
-			{
-				char[] buffer = termAtt.TermBuffer();
-				int length = termAtt.TermLength();
-				// If no characters actually require rewriting then we
-				// just return token as-is:
-				for (int i = 0; i < length; i++)
-				{
-					char c = buffer[i];
-					if (c >= '\u00c0' && c <= '\uFB06')
-					{
-						RemoveAccents(buffer, length);
-						termAtt.SetTermBuffer(output, 0, outputPos);
-						break;
-					}
-				}
-				return true;
-			}
-			return false;
-		}
+        }
+        
+        private char[] output = new char[256];
+        private int outputPos;
+        private readonly ITermAttribute termAtt;
+        
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                char[] buffer = termAtt.TermBuffer();
+                int length = termAtt.TermLength();
+                // If no characters actually require rewriting then we
+                // just return token as-is:
+                for (int i = 0; i < length; i++)
+                {
+                    char c = buffer[i];
+                    if (c >= '\u00c0' && c <= '\uFB06')
+                    {
+                        RemoveAccents(buffer, length);
+                        termAtt.SetTermBuffer(output, 0, outputPos);
+                        break;
+                    }
+                }
+                return true;
+            }
+            return false;
+        }
 
-		/// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
-		public void  RemoveAccents(char[] input, int length)
-		{
-			
-			// Worst-case length required:
-			int maxSizeNeeded = 2 * length;
-			
-			int size = output.Length;
-			while (size < maxSizeNeeded)
-				size *= 2;
-			
-			if (size != output.Length)
-				output = new char[size];
-			
-			outputPos = 0;
-			
-			int pos = 0;
-			
-			for (int i = 0; i < length; i++, pos++)
-			{
-				char c = input[pos];
-				
-				// Quick test: if it's not in range then just keep
-				// current character
-				if (c < '\u00c0' || c > '\uFB06')
-					output[outputPos++] = c;
-				else
-				{
-					switch (c)
-					{
-						
-						case '\u00C0': 
-						// À
-						case '\u00C1': 
-						// �?
-						case '\u00C2': 
-						// Â
-						case '\u00C3': 
-						// Ã
-						case '\u00C4': 
-						// Ä
-						case '\u00C5':  // Ã…
-							output[outputPos++] = 'A';
-							break;
-						
-						case '\u00C6':  // Æ
-							output[outputPos++] = 'A';
-							output[outputPos++] = 'E';
-							break;
-						
-						case '\u00C7':  // Ç
-							output[outputPos++] = 'C';
-							break;
-						
-						case '\u00C8': 
-						// È
-						case '\u00C9': 
-						// É
-						case '\u00CA': 
-						// Ê
-						case '\u00CB':  // Ë
-							output[outputPos++] = 'E';
-							break;
-						
-						case '\u00CC': 
-						// Ì
-						case '\u00CD': 
-						// �?
-						case '\u00CE': 
-						// ÃŽ
-						case '\u00CF':  // �?
-							output[outputPos++] = 'I';
-							break;
-						
-						case '\u0132':  // IJ
-							output[outputPos++] = 'I';
-							output[outputPos++] = 'J';
-							break;
-						
-						case '\u00D0':  // �?
-							output[outputPos++] = 'D';
-							break;
-						
-						case '\u00D1':  // Ñ
-							output[outputPos++] = 'N';
-							break;
-						
-						case '\u00D2': 
-						// Ã’
-						case '\u00D3': 
-						// Ó
-						case '\u00D4': 
-						// Ô
-						case '\u00D5': 
-						// Õ
-						case '\u00D6': 
-						// Ö
-						case '\u00D8':  // Ø
-							output[outputPos++] = 'O';
-							break;
-						
-						case '\u0152':  // Å’
-							output[outputPos++] = 'O';
-							output[outputPos++] = 'E';
-							break;
-						
-						case '\u00DE':  // Þ
-							output[outputPos++] = 'T';
-							output[outputPos++] = 'H';
-							break;
-						
-						case '\u00D9': 
-						// Ù
-						case '\u00DA': 
-						// Ú
-						case '\u00DB': 
-						// Û
-						case '\u00DC':  // Ü
-							output[outputPos++] = 'U';
-							break;
-						
-						case '\u00DD': 
-						// �?
-						case '\u0178':  // Ÿ
-							output[outputPos++] = 'Y';
-							break;
-						
-						case '\u00E0': 
-						// à
-						case '\u00E1': 
-						// á
-						case '\u00E2': 
-						// â
-						case '\u00E3': 
-						// ã
-						case '\u00E4': 
-						// ä
-						case '\u00E5':  // å
-							output[outputPos++] = 'a';
-							break;
-						
-						case '\u00E6':  // æ
-							output[outputPos++] = 'a';
-							output[outputPos++] = 'e';
-							break;
-						
-						case '\u00E7':  // ç
-							output[outputPos++] = 'c';
-							break;
-						
-						case '\u00E8': 
-						// è
-						case '\u00E9': 
-						// é
-						case '\u00EA': 
-						// ê
-						case '\u00EB':  // ë
-							output[outputPos++] = 'e';
-							break;
-						
-						case '\u00EC': 
-						// ì
-						case '\u00ED': 
-						// í
-						case '\u00EE': 
-						// î
-						case '\u00EF':  // ï
-							output[outputPos++] = 'i';
-							break;
-						
-						case '\u0133':  // ij
-							output[outputPos++] = 'i';
-							output[outputPos++] = 'j';
-							break;
-						
-						case '\u00F0':  // ð
-							output[outputPos++] = 'd';
-							break;
-						
-						case '\u00F1':  // ñ
-							output[outputPos++] = 'n';
-							break;
-						
-						case '\u00F2': 
-						// ò
-						case '\u00F3': 
-						// ó
-						case '\u00F4': 
-						// ô
-						case '\u00F5': 
-						// õ
-						case '\u00F6': 
-						// ö
-						case '\u00F8':  // ø
-							output[outputPos++] = 'o';
-							break;
-						
-						case '\u0153':  // Å“
-							output[outputPos++] = 'o';
-							output[outputPos++] = 'e';
-							break;
-						
-						case '\u00DF':  // ß
-							output[outputPos++] = 's';
-							output[outputPos++] = 's';
-							break;
-						
-						case '\u00FE':  // þ
-							output[outputPos++] = 't';
-							output[outputPos++] = 'h';
-							break;
-						
-						case '\u00F9': 
-						// ù
-						case '\u00FA': 
-						// ú
-						case '\u00FB': 
-						// û
-						case '\u00FC':  // ü
-							output[outputPos++] = 'u';
-							break;
-						
-						case '\u00FD': 
-						// ý
-						case '\u00FF':  // ÿ
-							output[outputPos++] = 'y';
-							break;
-						
-						case '\uFB00':  // ff
-							output[outputPos++] = 'f';
-							output[outputPos++] = 'f';
-							break;
-						
-						case '\uFB01':  // �?
-							output[outputPos++] = 'f';
-							output[outputPos++] = 'i';
-							break;
-						
-						case '\uFB02':  // fl
-							output[outputPos++] = 'f';
-							output[outputPos++] = 'l';
-							break;
-							// following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
-							//        case '\uFB03': // ffi
-							//            output[outputPos++] = 'f';
-							//            output[outputPos++] = 'f';
-							//            output[outputPos++] = 'i';
-							//            break;
-							//        case '\uFB04': // ffl
-							//            output[outputPos++] = 'f';
-							//            output[outputPos++] = 'f';
-							//            output[outputPos++] = 'l';
-							//            break;
-						
-						case '\uFB05':  // ſt
-							output[outputPos++] = 'f';
-							output[outputPos++] = 't';
-							break;
-						
-						case '\uFB06':  // st
-							output[outputPos++] = 's';
-							output[outputPos++] = 't';
-							break;
-						
-						default: 
-							output[outputPos++] = c;
-							break;
-						
-					}
-				}
-			}
-		}
-	}
+        /// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
+        public void  RemoveAccents(char[] input, int length)
+        {
+            
+            // Worst-case length required:
+            int maxSizeNeeded = 2 * length;
+            
+            int size = output.Length;
+            while (size < maxSizeNeeded)
+                size *= 2;
+            
+            if (size != output.Length)
+                output = new char[size];
+            
+            outputPos = 0;
+            
+            int pos = 0;
+            
+            for (int i = 0; i < length; i++, pos++)
+            {
+                char c = input[pos];
+                
+                // Quick test: if it's not in range then just keep
+                // current character
+                if (c < '\u00c0' || c > '\uFB06')
+                    output[outputPos++] = c;
+                else
+                {
+                    switch (c)
+                    {
+                        
+                        case '\u00C0': 
+                        // À
+                        case '\u00C1': 
+                        // �?
+                        case '\u00C2': 
+                        // Â
+                        case '\u00C3': 
+                        // Ã
+                        case '\u00C4': 
+                        // Ä
+                        case '\u00C5':  // Ã…
+                            output[outputPos++] = 'A';
+                            break;
+                        
+                        case '\u00C6':  // Æ
+                            output[outputPos++] = 'A';
+                            output[outputPos++] = 'E';
+                            break;
+                        
+                        case '\u00C7':  // Ç
+                            output[outputPos++] = 'C';
+                            break;
+                        
+                        case '\u00C8': 
+                        // È
+                        case '\u00C9': 
+                        // É
+                        case '\u00CA': 
+                        // Ê
+                        case '\u00CB':  // Ë
+                            output[outputPos++] = 'E';
+                            break;
+                        
+                        case '\u00CC': 
+                        // Ì
+                        case '\u00CD': 
+                        // �?
+                        case '\u00CE': 
+                        // ÃŽ
+                        case '\u00CF':  // �?
+                            output[outputPos++] = 'I';
+                            break;
+                        
+                        case '\u0132':  // IJ
+                            output[outputPos++] = 'I';
+                            output[outputPos++] = 'J';
+                            break;
+                        
+                        case '\u00D0':  // �?
+                            output[outputPos++] = 'D';
+                            break;
+                        
+                        case '\u00D1':  // Ñ
+                            output[outputPos++] = 'N';
+                            break;
+                        
+                        case '\u00D2': 
+                        // Ã’
+                        case '\u00D3': 
+                        // Ó
+                        case '\u00D4': 
+                        // Ô
+                        case '\u00D5': 
+                        // Õ
+                        case '\u00D6': 
+                        // Ö
+                        case '\u00D8':  // Ø
+                            output[outputPos++] = 'O';
+                            break;
+                        
+                        case '\u0152':  // Å’
+                            output[outputPos++] = 'O';
+                            output[outputPos++] = 'E';
+                            break;
+                        
+                        case '\u00DE':  // Þ
+                            output[outputPos++] = 'T';
+                            output[outputPos++] = 'H';
+                            break;
+                        
+                        case '\u00D9': 
+                        // Ù
+                        case '\u00DA': 
+                        // Ú
+                        case '\u00DB': 
+                        // Û
+                        case '\u00DC':  // Ü
+                            output[outputPos++] = 'U';
+                            break;
+                        
+                        case '\u00DD': 
+                        // �?
+                        case '\u0178':  // Ÿ
+                            output[outputPos++] = 'Y';
+                            break;
+                        
+                        case '\u00E0': 
+                        // à
+                        case '\u00E1': 
+                        // á
+                        case '\u00E2': 
+                        // â
+                        case '\u00E3': 
+                        // ã
+                        case '\u00E4': 
+                        // ä
+                        case '\u00E5':  // å
+                            output[outputPos++] = 'a';
+                            break;
+                        
+                        case '\u00E6':  // æ
+                            output[outputPos++] = 'a';
+                            output[outputPos++] = 'e';
+                            break;
+                        
+                        case '\u00E7':  // ç
+                            output[outputPos++] = 'c';
+                            break;
+                        
+                        case '\u00E8': 
+                        // è
+                        case '\u00E9': 
+                        // é
+                        case '\u00EA': 
+                        // ê
+                        case '\u00EB':  // ë
+                            output[outputPos++] = 'e';
+                            break;
+                        
+                        case '\u00EC': 
+                        // ì
+                        case '\u00ED': 
+                        // í
+                        case '\u00EE': 
+                        // î
+                        case '\u00EF':  // ï
+                            output[outputPos++] = 'i';
+                            break;
+                        
+                        case '\u0133':  // ij
+                            output[outputPos++] = 'i';
+                            output[outputPos++] = 'j';
+                            break;
+                        
+                        case '\u00F0':  // ð
+                            output[outputPos++] = 'd';
+                            break;
+                        
+                        case '\u00F1':  // ñ
+                            output[outputPos++] = 'n';
+                            break;
+                        
+                        case '\u00F2': 
+                        // ò
+                        case '\u00F3': 
+                        // ó
+                        case '\u00F4': 
+                        // ô
+                        case '\u00F5': 
+                        // õ
+                        case '\u00F6': 
+                        // ö
+                        case '\u00F8':  // ø
+                            output[outputPos++] = 'o';
+                            break;
+                        
+                        case '\u0153':  // Å“
+                            output[outputPos++] = 'o';
+                            output[outputPos++] = 'e';
+                            break;
+                        
+                        case '\u00DF':  // ß
+                            output[outputPos++] = 's';
+                            output[outputPos++] = 's';
+                            break;
+                        
+                        case '\u00FE':  // þ
+                            output[outputPos++] = 't';
+                            output[outputPos++] = 'h';
+                            break;
+                        
+                        case '\u00F9': 
+                        // ù
+                        case '\u00FA': 
+                        // ú
+                        case '\u00FB': 
+                        // û
+                        case '\u00FC':  // ü
+                            output[outputPos++] = 'u';
+                            break;
+                        
+                        case '\u00FD': 
+                        // ý
+                        case '\u00FF':  // ÿ
+                            output[outputPos++] = 'y';
+                            break;
+                        
+                        case '\uFB00':  // ff
+                            output[outputPos++] = 'f';
+                            output[outputPos++] = 'f';
+                            break;
+                        
+                        case '\uFB01':  // �?
+                            output[outputPos++] = 'f';
+                            output[outputPos++] = 'i';
+                            break;
+                        
+                        case '\uFB02':  // fl
+                            output[outputPos++] = 'f';
+                            output[outputPos++] = 'l';
+                            break;
+                            // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
+                            //        case '\uFB03': // ffi
+                            //            output[outputPos++] = 'f';
+                            //            output[outputPos++] = 'f';
+                            //            output[outputPos++] = 'i';
+                            //            break;
+                            //        case '\uFB04': // ffl
+                            //            output[outputPos++] = 'f';
+                            //            output[outputPos++] = 'f';
+                            //            output[outputPos++] = 'l';
+                            //            break;
+                        
+                        case '\uFB05':  // ſt
+                            output[outputPos++] = 'f';
+                            output[outputPos++] = 't';
+                            break;
+                        
+                        case '\uFB06':  // st
+                            output[outputPos++] = 's';
+                            output[outputPos++] = 't';
+                            break;
+                        
+                        default: 
+                            output[outputPos++] = c;
+                            break;
+                        
+                    }
+                }
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/KeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/KeywordAnalyzer.cs b/src/core/Analysis/KeywordAnalyzer.cs
index 116babb..9083816 100644
--- a/src/core/Analysis/KeywordAnalyzer.cs
+++ b/src/core/Analysis/KeywordAnalyzer.cs
@@ -17,38 +17,38 @@
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> "Tokenizes" the entire stream as a single token. This is useful
-	/// for data like zip codes, ids, and some product names.
-	/// </summary>
-	public class KeywordAnalyzer:Analyzer
-	{
-		public KeywordAnalyzer()
-		{
+    
+    /// <summary> "Tokenizes" the entire stream as a single token. This is useful
+    /// for data like zip codes, ids, and some product names.
+    /// </summary>
+    public class KeywordAnalyzer:Analyzer
+    {
+        public KeywordAnalyzer()
+        {
             SetOverridesTokenStreamMethod<KeywordAnalyzer>();
-		}
-		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			return new KeywordTokenizer(reader);
-		}
-		public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			if (overridesTokenStreamMethod)
-			{
-				// LUCENE-1678: force fallback to tokenStream() if we
-				// have been subclassed and that subclass overrides
-				// tokenStream but not reusableTokenStream
-				return TokenStream(fieldName, reader);
-			}
-			var tokenizer = (Tokenizer) PreviousTokenStream;
-			if (tokenizer == null)
-			{
-				tokenizer = new KeywordTokenizer(reader);
-				PreviousTokenStream = tokenizer;
-			}
-			else
-				tokenizer.Reset(reader);
-			return tokenizer;
-		}
-	}
+        }
+        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            return new KeywordTokenizer(reader);
+        }
+        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            if (overridesTokenStreamMethod)
+            {
+                // LUCENE-1678: force fallback to tokenStream() if we
+                // have been subclassed and that subclass overrides
+                // tokenStream but not reusableTokenStream
+                return TokenStream(fieldName, reader);
+            }
+            var tokenizer = (Tokenizer) PreviousTokenStream;
+            if (tokenizer == null)
+            {
+                tokenizer = new KeywordTokenizer(reader);
+                PreviousTokenStream = tokenizer;
+            }
+            else
+                tokenizer.Reset(reader);
+            return tokenizer;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/KeywordTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/KeywordTokenizer.cs b/src/core/Analysis/KeywordTokenizer.cs
index f97ff95..38f6f8a 100644
--- a/src/core/Analysis/KeywordTokenizer.cs
+++ b/src/core/Analysis/KeywordTokenizer.cs
@@ -20,80 +20,80 @@ using AttributeSource = Lucene.Net.Util.AttributeSource;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> Emits the entire input as a single token.</summary>
-	public sealed class KeywordTokenizer:Tokenizer
-	{
-		
-		private const int DEFAULT_BUFFER_SIZE = 256;
-		
-		private bool done;
-		private int finalOffset;
-		private ITermAttribute termAtt;
-		private IOffsetAttribute offsetAtt;
-		
-		public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
-		{
-		}
-		
-		public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
-		{
-			Init(bufferSize);
-		}
-		
-		public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
-		{
-			Init(bufferSize);
-		}
-		
-		public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
-		{
-			Init(bufferSize);
-		}
-		
-		private void  Init(int bufferSize)
-		{
-			this.done = false;
+    
+    /// <summary> Emits the entire input as a single token.</summary>
+    public sealed class KeywordTokenizer:Tokenizer
+    {
+        
+        private const int DEFAULT_BUFFER_SIZE = 256;
+        
+        private bool done;
+        private int finalOffset;
+        private ITermAttribute termAtt;
+        private IOffsetAttribute offsetAtt;
+        
+        public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
+        {
+        }
+        
+        public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
+        {
+            Init(bufferSize);
+        }
+        
+        public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
+        {
+            Init(bufferSize);
+        }
+        
+        public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
+        {
+            Init(bufferSize);
+        }
+        
+        private void  Init(int bufferSize)
+        {
+            this.done = false;
             termAtt = AddAttribute<ITermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
-			termAtt.ResizeTermBuffer(bufferSize);
-		}
-		
-		public override bool IncrementToken()
-		{
-			if (!done)
-			{
-				ClearAttributes();
-				done = true;
-				int upto = 0;
-				char[] buffer = termAtt.TermBuffer();
-				while (true)
-				{
-					int length = input.Read(buffer, upto, buffer.Length - upto);
-					if (length == 0)
-						break;
-					upto += length;
-					if (upto == buffer.Length)
-						buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
-				}
-				termAtt.SetTermLength(upto);
-				finalOffset = CorrectOffset(upto);
-				offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
-				return true;
-			}
-			return false;
-		}
-		
-		public override void  End()
-		{
-			// set final offset 
-			offsetAtt.SetOffset(finalOffset, finalOffset);
-		}
-		
-		public override void  Reset(System.IO.TextReader input)
-		{
-			base.Reset(input);
-			this.done = false;
-		}
-	}
+            termAtt.ResizeTermBuffer(bufferSize);
+        }
+        
+        public override bool IncrementToken()
+        {
+            if (!done)
+            {
+                ClearAttributes();
+                done = true;
+                int upto = 0;
+                char[] buffer = termAtt.TermBuffer();
+                while (true)
+                {
+                    int length = input.Read(buffer, upto, buffer.Length - upto);
+                    if (length == 0)
+                        break;
+                    upto += length;
+                    if (upto == buffer.Length)
+                        buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
+                }
+                termAtt.SetTermLength(upto);
+                finalOffset = CorrectOffset(upto);
+                offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+                return true;
+            }
+            return false;
+        }
+        
+        public override void  End()
+        {
+            // set final offset 
+            offsetAtt.SetOffset(finalOffset, finalOffset);
+        }
+        
+        public override void  Reset(System.IO.TextReader input)
+        {
+            base.Reset(input);
+            this.done = false;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/LengthFilter.cs b/src/core/Analysis/LengthFilter.cs
index c4f60ad..1a9899f 100644
--- a/src/core/Analysis/LengthFilter.cs
+++ b/src/core/Analysis/LengthFilter.cs
@@ -19,42 +19,42 @@ using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary>Removes words that are too long or too short from the stream.</summary>
-	public sealed class LengthFilter:TokenFilter
-	{
-		
-		internal int min;
-		internal int max;
-		
-		private readonly ITermAttribute termAtt;
-		
-		/// <summary> Build a filter that removes words that are too long or too
-		/// short from the text.
-		/// </summary>
-		public LengthFilter(TokenStream in_Renamed, int min, int max)
+    
+    /// <summary>Removes words that are too long or too short from the stream.</summary>
+    public sealed class LengthFilter:TokenFilter
+    {
+        
+        internal int min;
+        internal int max;
+        
+        private readonly ITermAttribute termAtt;
+        
+        /// <summary> Build a filter that removes words that are too long or too
+        /// short from the text.
+        /// </summary>
+        public LengthFilter(TokenStream in_Renamed, int min, int max)
             : base(in_Renamed)
-		{
-			this.min = min;
-			this.max = max;
+        {
+            this.min = min;
+            this.max = max;
             termAtt = AddAttribute<ITermAttribute>();
-		}
-		
-		/// <summary> Returns the next input Token whose term() is the right len</summary>
-		public override bool IncrementToken()
-		{
-			// return the first non-stop word found
-			while (input.IncrementToken())
-			{
-				var len = termAtt.TermLength();
-				if (len >= min && len <= max)
-				{
-					return true;
-				}
-				// note: else we ignore it but should we index each part of it?
-			}
-			// reached EOS -- return false
-			return false;
-		}
-	}
+        }
+        
+        /// <summary> Returns the next input Token whose term() is the right len</summary>
+        public override bool IncrementToken()
+        {
+            // return the first non-stop word found
+            while (input.IncrementToken())
+            {
+                var len = termAtt.TermLength();
+                if (len >= min && len <= max)
+                {
+                    return true;
+                }
+                // note: else we ignore it but should we index each part of it?
+            }
+            // reached EOS -- return false
+            return false;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/LetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/LetterTokenizer.cs b/src/core/Analysis/LetterTokenizer.cs
index 77629a8..ecd0cae 100644
--- a/src/core/Analysis/LetterTokenizer.cs
+++ b/src/core/Analysis/LetterTokenizer.cs
@@ -19,39 +19,39 @@ using AttributeSource = Lucene.Net.Util.AttributeSource;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters.  That's
-	/// to say, it defines tokens as maximal strings of adjacent letters, as defined
-	/// by java.lang.Character.isLetter() predicate.
-	/// Note: this does a decent job for most European languages, but does a terrible
-	/// job for some Asian languages, where words are not separated by spaces. 
-	/// </summary>
-	
-	public class LetterTokenizer:CharTokenizer
-	{
-		/// <summary>Construct a new LetterTokenizer. </summary>
-		public LetterTokenizer(System.IO.TextReader @in):base(@in)
-		{
-		}
-		
-		/// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
-		public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
-			: base(source, @in)
-		{
-		}
-		
-		/// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
-		public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
-			: base(factory, @in)
-		{
-		}
-		
-		/// <summary>Collects only characters which satisfy
-		/// <see cref="char.IsLetter(char)" />.
-		/// </summary>
-		protected internal override bool IsTokenChar(char c)
-		{
-			return System.Char.IsLetter(c);
-		}
-	}
+    
+    /// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters.  That's
+    /// to say, it defines tokens as maximal strings of adjacent letters, as defined
+    /// by java.lang.Character.isLetter() predicate.
+    /// Note: this does a decent job for most European languages, but does a terrible
+    /// job for some Asian languages, where words are not separated by spaces. 
+    /// </summary>
+    
+    public class LetterTokenizer:CharTokenizer
+    {
+        /// <summary>Construct a new LetterTokenizer. </summary>
+        public LetterTokenizer(System.IO.TextReader @in):base(@in)
+        {
+        }
+        
+        /// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
+        public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
+            : base(source, @in)
+        {
+        }
+        
+        /// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+        public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+            : base(factory, @in)
+        {
+        }
+        
+        /// <summary>Collects only characters which satisfy
+        /// <see cref="char.IsLetter(char)" />.
+        /// </summary>
+        protected internal override bool IsTokenChar(char c)
+        {
+            return System.Char.IsLetter(c);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/LowerCaseFilter.cs b/src/core/Analysis/LowerCaseFilter.cs
index cad0197..b6dcca6 100644
--- a/src/core/Analysis/LowerCaseFilter.cs
+++ b/src/core/Analysis/LowerCaseFilter.cs
@@ -19,31 +19,31 @@ using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary>Normalizes token text to lower case.</summary>
-	public sealed class LowerCaseFilter:TokenFilter
-	{
-		public LowerCaseFilter(TokenStream @in)
-			: base(@in)
-		{
+    
+    /// <summary>Normalizes token text to lower case.</summary>
+    public sealed class LowerCaseFilter:TokenFilter
+    {
+        public LowerCaseFilter(TokenStream @in)
+            : base(@in)
+        {
             termAtt = AddAttribute<ITermAttribute>();
-		}
-		
-		private readonly ITermAttribute termAtt;
-		
-		public override bool IncrementToken()
-		{
-			if (input.IncrementToken())
-			{
-				
-				char[] buffer = termAtt.TermBuffer();
-				int length = termAtt.TermLength();
-				for (int i = 0; i < length; i++)
-					buffer[i] = System.Char.ToLower(buffer[i]);
-				
-				return true;
-			}
-			return false;
-		}
-	}
+        }
+        
+        private readonly ITermAttribute termAtt;
+        
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                
+                char[] buffer = termAtt.TermBuffer();
+                int length = termAtt.TermLength();
+                for (int i = 0; i < length; i++)
+                    buffer[i] = System.Char.ToLower(buffer[i]);
+                
+                return true;
+            }
+            return false;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/LowerCaseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/LowerCaseTokenizer.cs b/src/core/Analysis/LowerCaseTokenizer.cs
index 4cea217..530b37c 100644
--- a/src/core/Analysis/LowerCaseTokenizer.cs
+++ b/src/core/Analysis/LowerCaseTokenizer.cs
@@ -19,42 +19,42 @@ using AttributeSource = Lucene.Net.Util.AttributeSource;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> LowerCaseTokenizer performs the function of LetterTokenizer
-	/// and LowerCaseFilter together.  It divides text at non-letters and converts
-	/// them to lower case.  While it is functionally equivalent to the combination
-	/// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
-	/// to doing the two tasks at once, hence this (redundant) implementation.
-	/// <p/>
-	/// Note: this does a decent job for most European languages, but does a terrible
-	/// job for some Asian languages, where words are not separated by spaces.
-	/// </summary>
-	public sealed class LowerCaseTokenizer:LetterTokenizer
-	{
-		/// <summary>Construct a new LowerCaseTokenizer. </summary>
-		public LowerCaseTokenizer(System.IO.TextReader @in)
-			: base(@in)
-		{
-		}
-		
-		/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="AttributeSource" />. </summary>
-		public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
-			: base(source, @in)
-		{
-		}
-		
-		/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
-		public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
-			: base(factory, @in)
-		{
-		}
-		
-		/// <summary>Converts char to lower case
-		/// <see cref="char.ToLower(char)" />.
-		/// </summary>
-		protected internal override char Normalize(char c)
-		{
-			return System.Char.ToLower(c);
-		}
-	}
+    
+    /// <summary> LowerCaseTokenizer performs the function of LetterTokenizer
+    /// and LowerCaseFilter together.  It divides text at non-letters and converts
+    /// them to lower case.  While it is functionally equivalent to the combination
+    /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+    /// to doing the two tasks at once, hence this (redundant) implementation.
+    /// <p/>
+    /// Note: this does a decent job for most European languages, but does a terrible
+    /// job for some Asian languages, where words are not separated by spaces.
+    /// </summary>
+    public sealed class LowerCaseTokenizer:LetterTokenizer
+    {
+        /// <summary>Construct a new LowerCaseTokenizer. </summary>
+        public LowerCaseTokenizer(System.IO.TextReader @in)
+            : base(@in)
+        {
+        }
+        
+        /// <summary>Construct a new LowerCaseTokenizer using a given <see cref="AttributeSource" />. </summary>
+        public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
+            : base(source, @in)
+        {
+        }
+        
+        /// <summary>Construct a new LowerCaseTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
+        public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+            : base(factory, @in)
+        {
+        }
+        
+        /// <summary>Converts char to lower case
+        /// <see cref="char.ToLower(char)" />.
+        /// </summary>
+        protected internal override char Normalize(char c)
+        {
+            return System.Char.ToLower(c);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/MappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/MappingCharFilter.cs b/src/core/Analysis/MappingCharFilter.cs
index 9705719..9dd1c6d 100644
--- a/src/core/Analysis/MappingCharFilter.cs
+++ b/src/core/Analysis/MappingCharFilter.cs
@@ -19,148 +19,148 @@ using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
-	/// contained in a <see cref="NormalizeCharMap" /> to the character
-	/// stream, and correcting the resulting changes to the
-	/// offsets.
-	/// </summary>
-	public class MappingCharFilter : BaseCharFilter
-	{
-		private readonly NormalizeCharMap normMap;
-		private LinkedList<char> buffer;
-		private System.String replacement;
-		private int charPointer;
-		private int nextCharCounter;
-		
-		/// Default constructor that takes a <see cref="CharStream" />.
-		public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
-			: base(@in)
-		{
-			this.normMap = normMap;
-		}
-		
-		/// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
-		public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
-			: base(CharReader.Get(@in))
-		{
-			this.normMap = normMap;
-		}
-		
-		public  override int Read()
-		{
-			while (true)
-			{
-				if (replacement != null && charPointer < replacement.Length)
-				{
-					return replacement[charPointer++];
-				}
-				
-				int firstChar = NextChar();
-				if (firstChar == - 1)
-					return - 1;
-			    NormalizeCharMap nm = normMap.submap != null
-			                              ? normMap.submap[(char) firstChar]
-			                              : null;
-				if (nm == null)
-					return firstChar;
-				NormalizeCharMap result = Match(nm);
-				if (result == null)
-					return firstChar;
-				replacement = result.normStr;
-				charPointer = 0;
-				if (result.diff != 0)
-				{
-					int prevCumulativeDiff = LastCumulativeDiff;
-					if (result.diff < 0)
-					{
-						for (int i = 0; i < - result.diff; i++)
-							AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
-					}
-					else
-					{
-						AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
-					}
-				}
-			}
-		}
-		
-		private int NextChar()
-		{
-			nextCharCounter++;
-			if (buffer != null && buffer.Count != 0)
-			{
-				char tempObject = buffer.First.Value;
-				buffer.RemoveFirst();
-				return (tempObject);
-			}
-			return input.Read();
-		}
-		
-		private void  PushChar(int c)
-		{
-			nextCharCounter--;
-			if (buffer == null)
-			{
-				buffer = new LinkedList<char>();
-			}
-			buffer.AddFirst((char)c);
-		}
-		
-		private void  PushLastChar(int c)
-		{
-			if (buffer == null)
-			{
+    
+    /// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
+    /// contained in a <see cref="NormalizeCharMap" /> to the character
+    /// stream, and correcting the resulting changes to the
+    /// offsets.
+    /// </summary>
+    public class MappingCharFilter : BaseCharFilter
+    {
+        private readonly NormalizeCharMap normMap;
+        private LinkedList<char> buffer;
+        private System.String replacement;
+        private int charPointer;
+        private int nextCharCounter;
+        
+        /// Default constructor that takes a <see cref="CharStream" />.
+        public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
+            : base(@in)
+        {
+            this.normMap = normMap;
+        }
+        
+        /// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
+        public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
+            : base(CharReader.Get(@in))
+        {
+            this.normMap = normMap;
+        }
+        
+        public  override int Read()
+        {
+            while (true)
+            {
+                if (replacement != null && charPointer < replacement.Length)
+                {
+                    return replacement[charPointer++];
+                }
+                
+                int firstChar = NextChar();
+                if (firstChar == - 1)
+                    return - 1;
+                NormalizeCharMap nm = normMap.submap != null
+                                          ? normMap.submap[(char) firstChar]
+                                          : null;
+                if (nm == null)
+                    return firstChar;
+                NormalizeCharMap result = Match(nm);
+                if (result == null)
+                    return firstChar;
+                replacement = result.normStr;
+                charPointer = 0;
+                if (result.diff != 0)
+                {
+                    int prevCumulativeDiff = LastCumulativeDiff;
+                    if (result.diff < 0)
+                    {
+                        for (int i = 0; i < - result.diff; i++)
+                            AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+                    }
+                    else
+                    {
+                        AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+                    }
+                }
+            }
+        }
+        
+        private int NextChar()
+        {
+            nextCharCounter++;
+            if (buffer != null && buffer.Count != 0)
+            {
+                char tempObject = buffer.First.Value;
+                buffer.RemoveFirst();
+                return (tempObject);
+            }
+            return input.Read();
+        }
+        
+        private void  PushChar(int c)
+        {
+            nextCharCounter--;
+            if (buffer == null)
+            {
                 buffer = new LinkedList<char>();
-			}
-			buffer.AddLast((char)c);
-		}
-		
-		private NormalizeCharMap Match(NormalizeCharMap map)
-		{
-			NormalizeCharMap result = null;
-			if (map.submap != null)
-			{
-				int chr = NextChar();
-				if (chr != - 1)
-				{
-					NormalizeCharMap subMap = map.submap[(char)chr];
-					if (subMap != null)
-					{
-						result = Match(subMap);
-					}
-					if (result == null)
-					{
-						PushChar(chr);
-					}
-				}
-			}
-			if (result == null && map.normStr != null)
-			{
-				result = map;
-			}
-			return result;
-		}
-		
-		public  override int Read(System.Char[] cbuf, int off, int len)
-		{
-			var tmp = new char[len];
-			int l = input.Read(tmp, 0, len);
-			if (l != 0)
-			{
-				for (int i = 0; i < l; i++)
-					PushLastChar(tmp[i]);
-			}
-			l = 0;
-			for (int i = off; i < off + len; i++)
-			{
-				int c = Read();
-				if (c == - 1)
-					break;
-				cbuf[i] = (char) c;
-				l++;
-			}
-			return l == 0?- 1:l;
-		}
-	}
+            }
+            buffer.AddFirst((char)c);
+        }
+        
+        private void  PushLastChar(int c)
+        {
+            if (buffer == null)
+            {
+                buffer = new LinkedList<char>();
+            }
+            buffer.AddLast((char)c);
+        }
+        
+        private NormalizeCharMap Match(NormalizeCharMap map)
+        {
+            NormalizeCharMap result = null;
+            if (map.submap != null)
+            {
+                int chr = NextChar();
+                if (chr != - 1)
+                {
+                    NormalizeCharMap subMap = map.submap[(char)chr];
+                    if (subMap != null)
+                    {
+                        result = Match(subMap);
+                    }
+                    if (result == null)
+                    {
+                        PushChar(chr);
+                    }
+                }
+            }
+            if (result == null && map.normStr != null)
+            {
+                result = map;
+            }
+            return result;
+        }
+        
+        public  override int Read(System.Char[] cbuf, int off, int len)
+        {
+            var tmp = new char[len];
+            int l = input.Read(tmp, 0, len);
+            if (l != 0)
+            {
+                for (int i = 0; i < l; i++)
+                    PushLastChar(tmp[i]);
+            }
+            l = 0;
+            for (int i = off; i < off + len; i++)
+            {
+                int c = Read();
+                if (c == - 1)
+                    break;
+                cbuf[i] = (char) c;
+                l++;
+            }
+            return l == 0?- 1:l;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Analysis/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/core/Analysis/NormalizeCharMap.cs b/src/core/Analysis/NormalizeCharMap.cs
index 7fd520c..5d6d558 100644
--- a/src/core/Analysis/NormalizeCharMap.cs
+++ b/src/core/Analysis/NormalizeCharMap.cs
@@ -19,50 +19,50 @@ using Lucene.Net.Support;
 
 namespace Lucene.Net.Analysis
 {
-	
-	/// <summary> Holds a map of String input to String output, to be used
-	/// with <see cref="MappingCharFilter" />.
-	/// </summary>
-	public class NormalizeCharMap
-	{
-		internal System.Collections.Generic.IDictionary<char, NormalizeCharMap> submap;
-		internal System.String normStr;
-		internal int diff;
-		
-		/// <summary>Records a replacement to be applied to the inputs
-		/// stream.  Whenever <c>singleMatch</c> occurs in
-		/// the input, it will be replaced with
-		/// <c>replacement</c>.
-		/// 
-		/// </summary>
-		/// <param name="singleMatch">input String to be replaced
-		/// </param>
-		/// <param name="replacement">output String
-		/// </param>
-		public virtual void  Add(System.String singleMatch, System.String replacement)
-		{
-			NormalizeCharMap currMap = this;
-			for (var i = 0; i < singleMatch.Length; i++)
-			{
-				char c = singleMatch[i];
-				if (currMap.submap == null)
-				{
-					currMap.submap = new HashMap<char, NormalizeCharMap>(1);
-				}
-				var map = currMap.submap[c];
-				if (map == null)
-				{
-					map = new NormalizeCharMap();
-					currMap.submap[c] = map;
-				}
-				currMap = map;
-			}
-			if (currMap.normStr != null)
-			{
-				throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
-			}
-			currMap.normStr = replacement;
-			currMap.diff = singleMatch.Length - replacement.Length;
-		}
-	}
+    
+    /// <summary> Holds a map of String input to String output, to be used
+    /// with <see cref="MappingCharFilter" />.
+    /// </summary>
+    public class NormalizeCharMap
+    {
+        internal System.Collections.Generic.IDictionary<char, NormalizeCharMap> submap;
+        internal System.String normStr;
+        internal int diff;
+        
+        /// <summary>Records a replacement to be applied to the inputs
+        /// stream.  Whenever <c>singleMatch</c> occurs in
+        /// the input, it will be replaced with
+        /// <c>replacement</c>.
+        /// 
+        /// </summary>
+        /// <param name="singleMatch">input String to be replaced
+        /// </param>
+        /// <param name="replacement">output String
+        /// </param>
+        public virtual void  Add(System.String singleMatch, System.String replacement)
+        {
+            NormalizeCharMap currMap = this;
+            for (var i = 0; i < singleMatch.Length; i++)
+            {
+                char c = singleMatch[i];
+                if (currMap.submap == null)
+                {
+                    currMap.submap = new HashMap<char, NormalizeCharMap>(1);
+                }
+                var map = currMap.submap[c];
+                if (map == null)
+                {
+                    map = new NormalizeCharMap();
+                    currMap.submap[c] = map;
+                }
+                currMap = map;
+            }
+            if (currMap.normStr != null)
+            {
+                throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
+            }
+            currMap.normStr = replacement;
+            currMap.diff = singleMatch.Length - replacement.Length;
+        }
+    }
 }
\ No newline at end of file


Mime
View raw message