lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [09/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:13 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
new file mode 100644
index 0000000..06c5e10
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
@@ -0,0 +1,182 @@
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A ShingleAnalyzerWrapper wraps a <seealso cref="ShingleFilter"/> around another <seealso cref="Analyzer"/>.
+	/// <para>
+	/// A shingle is another name for a token based n-gram.
+	/// </para>
+	/// </summary>
+	public sealed class ShingleAnalyzerWrapper : AnalyzerWrapper
+	{
+
+	  private readonly Analyzer @delegate;
+	  private readonly int maxShingleSize;
+	  private readonly int minShingleSize;
+	  private readonly string tokenSeparator;
+	  private readonly bool outputUnigrams;
+	  private readonly bool outputUnigramsIfNoShingles;
+	  private readonly string fillerToken;
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int maxShingleSize) : this(defaultAnalyzer, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+	  {
+	  }
+
+	  public ShingleAnalyzerWrapper(Analyzer defaultAnalyzer, int minShingleSize, int maxShingleSize) : this(defaultAnalyzer, minShingleSize, maxShingleSize, ShingleFilter.DEFAULT_TOKEN_SEPARATOR, true, false, ShingleFilter.DEFAULT_FILLER_TOKEN)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new ShingleAnalyzerWrapper
+	  /// </summary>
+	  /// <param name="delegate"> Analyzer whose TokenStream is to be filtered </param>
+	  /// <param name="minShingleSize"> Min shingle (token ngram) size </param>
+	  /// <param name="maxShingleSize"> Max shingle size </param>
+	  /// <param name="tokenSeparator"> Used to separate input stream tokens in output shingles </param>
+	  /// <param name="outputUnigrams"> Whether or not the filter shall pass the original
+	  ///        tokens to the output stream </param>
+	  /// <param name="outputUnigramsIfNoShingles"> Overrides the behavior of outputUnigrams==false for those
+	  ///        times when no shingles are available (because there are fewer than
+	  ///        minShingleSize tokens in the input stream)?
+	  ///        Note that if outputUnigrams==true, then unigrams are always output,
+	  ///        regardless of whether any shingles are available. </param>
+	  /// <param name="fillerToken"> filler token to use when positionIncrement is more than 1 </param>
+	  public ShingleAnalyzerWrapper(Analyzer @delegate, int minShingleSize, int maxShingleSize, string tokenSeparator, bool outputUnigrams, bool outputUnigramsIfNoShingles, string fillerToken) : base(@delegate.ReuseStrategy)
+	  {
+		this.@delegate = @delegate;
+
+		if (maxShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Max shingle size must be >= 2");
+		}
+		this.maxShingleSize = maxShingleSize;
+
+		if (minShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Min shingle size must be >= 2");
+		}
+		if (minShingleSize > maxShingleSize)
+		{
+		  throw new System.ArgumentException("Min shingle size must be <= max shingle size");
+		}
+		this.minShingleSize = minShingleSize;
+
+		this.tokenSeparator = (tokenSeparator == null ? "" : tokenSeparator);
+		this.outputUnigrams = outputUnigrams;
+		this.outputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		this.fillerToken = fillerToken;
+	  }
+
+	  /// <summary>
+	  /// Wraps <seealso cref="StandardAnalyzer"/>. 
+	  /// </summary>
+	  public ShingleAnalyzerWrapper(Version matchVersion) : this(matchVersion, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Wraps <seealso cref="StandardAnalyzer"/>. 
+	  /// </summary>
+	  public ShingleAnalyzerWrapper(Version matchVersion, int minShingleSize, int maxShingleSize) : this(new StandardAnalyzer(matchVersion), minShingleSize, maxShingleSize)
+	  {
+	  }
+
+	  /// <summary>
+	  /// The max shingle (token ngram) size
+	  /// </summary>
+	  /// <returns> The max shingle (token ngram) size </returns>
+	  public int MaxShingleSize
+	  {
+		  get
+		  {
+			return maxShingleSize;
+		  }
+	  }
+
+	  /// <summary>
+	  /// The min shingle (token ngram) size
+	  /// </summary>
+	  /// <returns> The min shingle (token ngram) size </returns>
+	  public int MinShingleSize
+	  {
+		  get
+		  {
+			return minShingleSize;
+		  }
+	  }
+
+	  public string TokenSeparator
+	  {
+		  get
+		  {
+			return tokenSeparator;
+		  }
+	  }
+
+	  public bool OutputUnigrams
+	  {
+		  get
+		  {
+			return outputUnigrams;
+		  }
+	  }
+
+	  public bool OutputUnigramsIfNoShingles
+	  {
+		  get
+		  {
+			return outputUnigramsIfNoShingles;
+		  }
+	  }
+
+	  public string FillerToken
+	  {
+		  get
+		  {
+			return fillerToken;
+		  }
+	  }
+
+	  public override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		ShingleFilter filter = new ShingleFilter(components.TokenStream, minShingleSize, maxShingleSize);
+		filter.MinShingleSize = minShingleSize;
+		filter.MaxShingleSize = maxShingleSize;
+		filter.TokenSeparator = tokenSeparator;
+		filter.OutputUnigrams = outputUnigrams;
+		filter.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		filter.FillerToken = fillerToken;
+		return new TokenStreamComponents(components.Tokenizer, filter);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
new file mode 100644
index 0000000..9bdc341
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilter.cs
@@ -0,0 +1,724 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+
+	/// <summary>
+	/// <para>A ShingleFilter constructs shingles (token n-grams) from a token stream.
+	/// In other words, it creates combinations of tokens as a single token.
+	/// 
+	/// </para>
+	/// <para>For example, the sentence "please divide this sentence into shingles"
+	/// might be tokenized into shingles "please divide", "divide this",
+	/// "this sentence", "sentence into", and "into shingles".
+	/// 
+	/// </para>
+	/// <para>This filter handles position increments > 1 by inserting filler tokens
+	/// (tokens with termtext "_"). It does not handle a position increment of 0.
+	/// </para>
+	/// </summary>
+	public sealed class ShingleFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// filler token for when positionIncrement is more than 1
+	  /// </summary>
+	  public const string DEFAULT_FILLER_TOKEN = "_";
+
+	  /// <summary>
+	  /// default maximum shingle size is 2.
+	  /// </summary>
+	  public const int DEFAULT_MAX_SHINGLE_SIZE = 2;
+
+	  /// <summary>
+	  /// default minimum shingle size is 2.
+	  /// </summary>
+	  public const int DEFAULT_MIN_SHINGLE_SIZE = 2;
+
+	  /// <summary>
+	  /// default token type attribute value is "shingle" 
+	  /// </summary>
+	  public const string DEFAULT_TOKEN_TYPE = "shingle";
+
+	  /// <summary>
+	  /// The default string to use when joining adjacent tokens to form a shingle
+	  /// </summary>
+	  public const string DEFAULT_TOKEN_SEPARATOR = " ";
+
+	  /// <summary>
+	  /// The sequence of input stream tokens (or filler tokens, if necessary)
+	  /// that will be composed to form output shingles.
+	  /// </summary>
+	  private LinkedList<InputWindowToken> inputWindow = new LinkedList<InputWindowToken>();
+
+	  /// <summary>
+	  /// The number of input tokens in the next output token.  This is the "n" in
+	  /// "token n-grams".
+	  /// </summary>
+	  private CircularSequence gramSize;
+
+	  /// <summary>
+	  /// Shingle and unigram text is composed here.
+	  /// </summary>
+	  private StringBuilder gramBuilder = new StringBuilder();
+
+	  /// <summary>
+	  /// The token type attribute value to use - default is "shingle"
+	  /// </summary>
+	  private string tokenType = DEFAULT_TOKEN_TYPE;
+
+	  /// <summary>
+	  /// The string to use when joining adjacent tokens to form a shingle
+	  /// </summary>
+	  private string tokenSeparator = DEFAULT_TOKEN_SEPARATOR;
+
+	  /// <summary>
+	  /// The string to insert for each position at which there is no token
+	  /// (i.e., when position increment is greater than one).
+	  /// </summary>
+	  private char[] fillerToken = DEFAULT_FILLER_TOKEN.ToCharArray();
+
+	  /// <summary>
+	  /// By default, we output unigrams (individual tokens) as well as shingles
+	  /// (token n-grams).
+	  /// </summary>
+	  private bool outputUnigrams = true;
+
+	  /// <summary>
+	  /// By default, we don't override behavior of outputUnigrams.
+	  /// </summary>
+	  private bool outputUnigramsIfNoShingles = false;
+
+	  /// <summary>
+	  /// maximum shingle size (number of tokens)
+	  /// </summary>
+	  private int maxShingleSize;
+
+	  /// <summary>
+	  /// minimum shingle size (number of tokens)
+	  /// </summary>
+	  private int minShingleSize;
+
+	  /// <summary>
+	  /// The remaining number of filler tokens to be inserted into the input stream
+	  /// from which shingles are composed, to handle position increments greater
+	  /// than one.
+	  /// </summary>
+	  private int numFillerTokensToInsert;
+
+	  /// <summary>
+	  /// When the next input stream token has a position increment greater than
+	  /// one, it is stored in this field until sufficient filler tokens have been
+	  /// inserted to account for the position increment. 
+	  /// </summary>
+	  private AttributeSource nextInputStreamToken;
+
+	  /// <summary>
+	  /// Whether or not there is a next input stream token.
+	  /// </summary>
+	  private bool isNextInputStreamToken = false;
+
+	  /// <summary>
+	  /// Whether at least one unigram or shingle has been output at the current 
+	  /// position.
+	  /// </summary>
+	  private bool isOutputHere = false;
+
+	  /// <summary>
+	  /// true if no shingles have been output yet (for outputUnigramsIfNoShingles).
+	  /// </summary>
+	  internal bool noShingleOutput = true;
+
+	  /// <summary>
+	  /// Holds the State after input.end() was called, so we can
+	  /// restore it in our end() impl.
+	  /// </summary>
+	  private State endState;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly PositionLengthAttribute posLenAtt = addAttribute(typeof(PositionLengthAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+
+	  /// <summary>
+	  /// Constructs a ShingleFilter with the specified shingle size from the
+	  /// <seealso cref="TokenStream"/> <code>input</code>
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="minShingleSize"> minimum shingle size produced by the filter. </param>
+	  /// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
+	  public ShingleFilter(TokenStream input, int minShingleSize, int maxShingleSize) : base(input)
+	  {
+		MaxShingleSize = maxShingleSize;
+		MinShingleSize = minShingleSize;
+	  }
+
+	  /// <summary>
+	  /// Constructs a ShingleFilter with the specified shingle size from the
+	  /// <seealso cref="TokenStream"/> <code>input</code>
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="maxShingleSize"> maximum shingle size produced by the filter. </param>
+	  public ShingleFilter(TokenStream input, int maxShingleSize) : this(input, DEFAULT_MIN_SHINGLE_SIZE, maxShingleSize)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a ShingleFilter with default shingle size: 2.
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  public ShingleFilter(TokenStream input) : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Construct a ShingleFilter with the specified token type for shingle tokens
+	  /// and the default shingle size: 2
+	  /// </summary>
+	  /// <param name="input"> input stream </param>
+	  /// <param name="tokenType"> token type for shingle tokens </param>
+	  public ShingleFilter(TokenStream input, string tokenType) : this(input, DEFAULT_MIN_SHINGLE_SIZE, DEFAULT_MAX_SHINGLE_SIZE)
+	  {
+		TokenType = tokenType;
+	  }
+
+	  /// <summary>
+	  /// Set the type of the shingle tokens produced by this filter.
+	  /// (default: "shingle")
+	  /// </summary>
+	  /// <param name="tokenType"> token tokenType </param>
+	  public string TokenType
+	  {
+		  set
+		  {
+			this.tokenType = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Shall the output stream contain the input tokens (unigrams) as well as
+	  /// shingles? (default: true.)
+	  /// </summary>
+	  /// <param name="outputUnigrams"> Whether or not the output stream shall contain
+	  /// the input tokens (unigrams) </param>
+	  public bool OutputUnigrams
+	  {
+		  set
+		  {
+			this.outputUnigrams = value;
+			gramSize = new CircularSequence(this);
+		  }
+	  }
+
+	  /// <summary>
+	  /// <para>Shall we override the behavior of outputUnigrams==false for those
+	  /// times when no shingles are available (because there are fewer than
+	  /// minShingleSize tokens in the input stream)? (default: false.)
+	  /// </para>
+	  /// <para>Note that if outputUnigrams==true, then unigrams are always output,
+	  /// regardless of whether any shingles are available.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="outputUnigramsIfNoShingles"> Whether or not to output a single
+	  /// unigram when no shingles are available. </param>
+	  public bool OutputUnigramsIfNoShingles
+	  {
+		  set
+		  {
+			this.outputUnigramsIfNoShingles = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Set the max shingle size (default: 2)
+	  /// </summary>
+	  /// <param name="maxShingleSize"> max size of output shingles </param>
+	  public int MaxShingleSize
+	  {
+		  set
+		  {
+			if (value < 2)
+			{
+			  throw new System.ArgumentException("Max shingle size must be >= 2");
+			}
+			this.maxShingleSize = value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// <para>Set the min shingle size (default: 2).
+	  /// </para>
+	  /// <para>This method requires that the passed in minShingleSize is not greater
+	  /// than maxShingleSize, so make sure that maxShingleSize is set before
+	  /// calling this method.
+	  /// </para>
+	  /// <para>The unigram output option is independent of the min shingle size.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="minShingleSize"> min size of output shingles </param>
+	  public int MinShingleSize
+	  {
+		  set
+		  {
+			if (value < 2)
+			{
+			  throw new System.ArgumentException("Min shingle size must be >= 2");
+			}
+			if (value > maxShingleSize)
+			{
+			  throw new System.ArgumentException("Min shingle size must be <= max shingle size");
+			}
+			this.minShingleSize = value;
+			gramSize = new CircularSequence(this);
+		  }
+	  }
+
+	  /// <summary>
+	  /// Sets the string to use when joining adjacent tokens to form a shingle </summary>
+	  /// <param name="tokenSeparator"> used to separate input stream tokens in output shingles </param>
+	  public string TokenSeparator
+	  {
+		  set
+		  {
+			this.tokenSeparator = null == value ? "" : value;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Sets the string to insert for each position at which there is no token
+	  /// (i.e., when position increment is greater than one).
+	  /// </summary>
+	  /// <param name="fillerToken"> string to insert at each position where there is no token </param>
+	  public string FillerToken
+	  {
+		  set
+		  {
+			this.fillerToken = null == value ? new char[0] : value.ToCharArray();
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		bool tokenAvailable = false;
+		int builtGramSize = 0;
+		if (gramSize.atMinValue() || inputWindow.Count < gramSize.Value)
+		{
+		  shiftInputWindow();
+		  gramBuilder.Length = 0;
+		}
+		else
+		{
+		  builtGramSize = gramSize.PreviousValue;
+		}
+		if (inputWindow.Count >= gramSize.Value)
+		{
+		  bool isAllFiller = true;
+		  InputWindowToken nextToken = null;
+		  IEnumerator<InputWindowToken> iter = inputWindow.GetEnumerator();
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  for (int gramNum = 1 ; iter.hasNext() && builtGramSize < gramSize.Value ; ++gramNum)
+		  {
+			nextToken = iter.Current;
+			if (builtGramSize < gramNum)
+			{
+			  if (builtGramSize > 0)
+			  {
+				gramBuilder.Append(tokenSeparator);
+			  }
+			  gramBuilder.Append(nextToken.termAtt.buffer(), 0, nextToken.termAtt.length());
+			  ++builtGramSize;
+			}
+			if (isAllFiller && nextToken.isFiller)
+			{
+			  if (gramNum == gramSize.Value)
+			  {
+				gramSize.advance();
+			  }
+			}
+			else
+			{
+			  isAllFiller = false;
+			}
+		  }
+		  if (!isAllFiller && builtGramSize == gramSize.Value)
+		  {
+			inputWindow.First.Value.attSource.copyTo(this);
+			posIncrAtt.PositionIncrement = isOutputHere ? 0 : 1;
+			termAtt.setEmpty().append(gramBuilder);
+			if (gramSize.Value > 1)
+			{
+			  typeAtt.Type = tokenType;
+			  noShingleOutput = false;
+			}
+			offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
+			posLenAtt.PositionLength = builtGramSize;
+			isOutputHere = true;
+			gramSize.advance();
+			tokenAvailable = true;
+		  }
+		}
+		return tokenAvailable;
+	  }
+
+	  private bool exhausted;
+
+	  /// <summary>
+	  /// <para>Get the next token from the input stream.
+	  /// </para>
+	  /// <para>If the next token has <code>positionIncrement > 1</code>,
+	  /// <code>positionIncrement - 1</code> <seealso cref="#fillerToken"/>s are
+	  /// inserted first.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="target"> Where to put the new token; if null, a new instance is created. </param>
+	  /// <returns> On success, the populated token; null otherwise </returns>
+	  /// <exception cref="IOException"> if the input stream has a problem </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private InputWindowToken getNextToken(InputWindowToken target) throws java.io.IOException
+	  private InputWindowToken getNextToken(InputWindowToken target)
+	  {
+		InputWindowToken newTarget = target;
+		if (numFillerTokensToInsert > 0)
+		{
+		  if (null == target)
+		  {
+			newTarget = new InputWindowToken(this, nextInputStreamToken.cloneAttributes());
+		  }
+		  else
+		  {
+			nextInputStreamToken.copyTo(target.attSource);
+		  }
+		  // A filler token occupies no space
+		  newTarget.offsetAtt.setOffset(newTarget.offsetAtt.startOffset(), newTarget.offsetAtt.startOffset());
+		  newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.Length);
+		  newTarget.isFiller = true;
+		  --numFillerTokensToInsert;
+		}
+		else if (isNextInputStreamToken)
+		{
+		  if (null == target)
+		  {
+			newTarget = new InputWindowToken(this, nextInputStreamToken.cloneAttributes());
+		  }
+		  else
+		  {
+			nextInputStreamToken.copyTo(target.attSource);
+		  }
+		  isNextInputStreamToken = false;
+		  newTarget.isFiller = false;
+		}
+		else if (!exhausted)
+		{
+		  if (input.incrementToken())
+		  {
+			if (null == target)
+			{
+			  newTarget = new InputWindowToken(this, cloneAttributes());
+			}
+			else
+			{
+			  this.copyTo(target.attSource);
+			}
+			if (posIncrAtt.PositionIncrement > 1)
+			{
+			  // Each output shingle must contain at least one input token, 
+			  // so no more than (maxShingleSize - 1) filler tokens will be inserted.
+			  numFillerTokensToInsert = Math.Min(posIncrAtt.PositionIncrement - 1, maxShingleSize - 1);
+			  // Save the current token as the next input stream token
+			  if (null == nextInputStreamToken)
+			  {
+				nextInputStreamToken = cloneAttributes();
+			  }
+			  else
+			  {
+				this.copyTo(nextInputStreamToken);
+			  }
+			  isNextInputStreamToken = true;
+			  // A filler token occupies no space
+			  newTarget.offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
+			  newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.Length);
+			  newTarget.isFiller = true;
+			  --numFillerTokensToInsert;
+			}
+			else
+			{
+			  newTarget.isFiller = false;
+			}
+		  }
+		  else
+		  {
+			exhausted = true;
+			input.end();
+			endState = captureState();
+			numFillerTokensToInsert = Math.Min(posIncrAtt.PositionIncrement, maxShingleSize - 1);
+			if (numFillerTokensToInsert > 0)
+			{
+			  nextInputStreamToken = new AttributeSource(AttributeFactory);
+			  nextInputStreamToken.addAttribute(typeof(CharTermAttribute));
+			  OffsetAttribute newOffsetAtt = nextInputStreamToken.addAttribute(typeof(OffsetAttribute));
+			  newOffsetAtt.setOffset(offsetAtt.endOffset(), offsetAtt.endOffset());
+			  // Recurse/loop just once:
+			  return getNextToken(target);
+			}
+			else
+			{
+			  newTarget = null;
+			}
+		  }
+		}
+		else
+		{
+		  newTarget = null;
+		}
+		return newTarget;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		if (!exhausted)
+		{
+		  base.end();
+		}
+		else
+		{
+		  restoreState(endState);
+		}
+	  }
+
+	  /// <summary>
+	  /// <para>Fills <seealso cref="#inputWindow"/> with input stream tokens, if available, 
+	  /// shifting to the right if the window was previously full.
+	  /// </para>
+	  /// <para>Resets <seealso cref="#gramSize"/> to its minimum value.
+	  /// 
+	  /// </para>
+	  /// </summary>
+	  /// <exception cref="IOException"> if there's a problem getting the next token </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void shiftInputWindow() throws java.io.IOException
+	  private void shiftInputWindow()
+	  {
+		InputWindowToken firstToken = null;
+		if (inputWindow.Count > 0)
+		{
+		  firstToken = inputWindow.RemoveFirst();
+		}
+		while (inputWindow.Count < maxShingleSize)
+		{
+		  if (null != firstToken) // recycle the firstToken, if available
+		  {
+			if (null != getNextToken(firstToken))
+			{
+			  inputWindow.AddLast(firstToken); // the firstToken becomes the last
+			  firstToken = null;
+			}
+			else
+			{
+			  break; // end of input stream
+			}
+		  }
+		  else
+		  {
+			InputWindowToken nextToken = getNextToken(null);
+			if (null != nextToken)
+			{
+			  inputWindow.AddLast(nextToken);
+			}
+			else
+			{
+			  break; // end of input stream
+			}
+		  }
+		}
+		if (outputUnigramsIfNoShingles && noShingleOutput && gramSize.minValue > 1 && inputWindow.Count < minShingleSize)
+		{
+		  gramSize.minValue = 1;
+		}
+		gramSize.reset();
+		isOutputHere = false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		gramSize.reset();
+		inputWindow.Clear();
+		nextInputStreamToken = null;
+		isNextInputStreamToken = false;
+		numFillerTokensToInsert = 0;
+		isOutputHere = false;
+		noShingleOutput = true;
+		exhausted = false;
+		endState = null;
+		if (outputUnigramsIfNoShingles && !outputUnigrams)
+		{
+		  // Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
+		  gramSize.minValue = minShingleSize;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// <para>An instance of this class is used to maintain the number of input
+	  /// stream tokens that will be used to compose the next unigram or shingle:
+	  /// <seealso cref="#gramSize"/>.
+	  /// </para>
+	  /// <para><code>gramSize</code> will take on values from the circular sequence
+	  /// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+	  /// </para>
+	  /// <para>1 is included in the circular sequence only if 
+	  /// <seealso cref="#outputUnigrams"/> = true.
+	  /// </para>
+	  /// </summary>
+	  private class CircularSequence
+	  {
+		  private readonly ShingleFilter outerInstance;
+
+		internal int value;
+		internal int previousValue;
+		internal int minValue;
+
+		public CircularSequence(ShingleFilter outerInstance)
+		{
+			this.outerInstance = outerInstance;
+		  minValue = outerInstance.outputUnigrams ? 1 : outerInstance.minShingleSize;
+		  reset();
+		}
+
+		/// <returns> the current value. </returns>
+		/// <seealso cref= #advance() </seealso>
+		public virtual int Value
+		{
+			get
+			{
+			  return value;
+			}
+		}
+
+		/// <summary>
+		/// <para>Increments this circular number's value to the next member in the
+		/// circular sequence
+		/// <code>gramSize</code> will take on values from the circular sequence
+		/// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+		/// </para>
+		/// <para>1 is included in the circular sequence only if 
+		/// <seealso cref="#outputUnigrams"/> = true.
+		/// </para>
+		/// </summary>
+		public virtual void advance()
+		{
+		  previousValue = value;
+		  if (value == 1)
+		  {
+			value = outerInstance.minShingleSize;
+		  }
+		  else if (value == outerInstance.maxShingleSize)
+		  {
+			reset();
+		  }
+		  else
+		  {
+			++value;
+		  }
+		}
+
+		/// <summary>
+		/// <para>Sets this circular number's value to the first member of the 
+		/// circular sequence
+		/// </para>
+		/// <para><code>gramSize</code> will take on values from the circular sequence
+		/// <b>{ [ 1, ] <seealso cref="#minShingleSize"/> [ , ... , <seealso cref="#maxShingleSize"/> ] }</b>.
+		/// </para>
+		/// <para>1 is included in the circular sequence only if 
+		/// <seealso cref="#outputUnigrams"/> = true.
+		/// </para>
+		/// </summary>
+		public virtual void reset()
+		{
+		  previousValue = value = minValue;
+		}
+
+		/// <summary>
+		/// <para>Returns true if the current value is the first member of the circular
+		/// sequence.
+		/// </para>
+		/// <para>If <seealso cref="#outputUnigrams"/> = true, the first member of the circular
+		/// sequence will be 1; otherwise, it will be <seealso cref="#minShingleSize"/>.
+		/// 
+		/// </para>
+		/// </summary>
+		/// <returns> true if the current value is the first member of the circular
+		///  sequence; false otherwise </returns>
+		public virtual bool atMinValue()
+		{
+		  return value == minValue;
+		}
+
+		/// <returns> the value this instance had before the last advance() call </returns>
+		public virtual int PreviousValue
+		{
+			get
+			{
+			  return previousValue;
+			}
+		}
+	  }
+
+	  private class InputWindowToken
+	  {
+		  private readonly ShingleFilter outerInstance;
+
+		internal readonly AttributeSource attSource;
+		internal readonly CharTermAttribute termAtt;
+		internal readonly OffsetAttribute offsetAtt;
+		internal bool isFiller = false;
+
+		public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource)
+		{
+			this.outerInstance = outerInstance;
+		  this.attSource = attSource;
+		  this.termAtt = attSource.getAttribute(typeof(CharTermAttribute));
+		  this.offsetAtt = attSource.getAttribute(typeof(OffsetAttribute));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
new file mode 100644
index 0000000..429e9ce
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleFilterFactory.cs
@@ -0,0 +1,86 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.shingle
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ShingleFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_shingle" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ShingleFilterFactory" minShingleSize="2" maxShingleSize="2"
+	///             outputUnigrams="true" outputUnigramsIfNoShingles="false" tokenSeparator=" " fillerToken="_"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ShingleFilterFactory : TokenFilterFactory
+	{
+	  private readonly int minShingleSize;
+	  private readonly int maxShingleSize;
+	  private readonly bool outputUnigrams;
+	  private readonly bool outputUnigramsIfNoShingles;
+	  private readonly string tokenSeparator;
+	  private readonly string fillerToken;
+
+	  /// <summary>
+	  /// Creates a new ShingleFilterFactory </summary>
+	  public ShingleFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxShingleSize = getInt(args, "maxShingleSize", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
+		if (maxShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Invalid maxShingleSize (" + maxShingleSize + ") - must be at least 2");
+		}
+		minShingleSize = getInt(args, "minShingleSize", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
+		if (minShingleSize < 2)
+		{
+		  throw new System.ArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be at least 2");
+		}
+		if (minShingleSize > maxShingleSize)
+		{
+		  throw new System.ArgumentException("Invalid minShingleSize (" + minShingleSize + ") - must be no greater than maxShingleSize (" + maxShingleSize + ")");
+		}
+		outputUnigrams = getBoolean(args, "outputUnigrams", true);
+		outputUnigramsIfNoShingles = getBoolean(args, "outputUnigramsIfNoShingles", false);
+		tokenSeparator = get(args, "tokenSeparator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
+		fillerToken = get(args, "fillerToken", ShingleFilter.DEFAULT_FILLER_TOKEN);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ShingleFilter create(TokenStream input)
+	  {
+		ShingleFilter r = new ShingleFilter(input, minShingleSize, maxShingleSize);
+		r.OutputUnigrams = outputUnigrams;
+		r.OutputUnigramsIfNoShingles = outputUnigramsIfNoShingles;
+		r.TokenSeparator = tokenSeparator;
+		r.FillerToken = fillerToken;
+		return r;
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
new file mode 100644
index 0000000..a04fd51
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
@@ -0,0 +1,79 @@
+using System;
+
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Attempts to parse the <seealso cref="CharTermAttribute#buffer()"/> as a Date using a <seealso cref="java.text.DateFormat"/>.
+	/// If the value is a Date, it will add it to the sink.
+	/// <p/> 
+	/// 
+	/// 
+	/// </summary>
+	public class DateRecognizerSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  public const string DATE_TYPE = "date";
+
+	  protected internal DateFormat dateFormat;
+	  protected internal CharTermAttribute termAtt;
+
+	  /// <summary>
+	  /// Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
+	  /// DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as 
+	  /// the <seealso cref="java.text.DateFormat"/> object.
+	  /// </summary>
+	  public DateRecognizerSinkFilter() : this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT))
+	  {
+	  }
+
+	  public DateRecognizerSinkFilter(DateFormat dateFormat)
+	  {
+		this.dateFormat = dateFormat;
+	  }
+
+	  public override bool accept(AttributeSource source)
+	  {
+		if (termAtt == null)
+		{
+		  termAtt = source.addAttribute(typeof(CharTermAttribute));
+		}
+		try
+		{
+		  DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date
+		  if (date != null)
+		  {
+			return true;
+		  }
+		}
+		catch (ParseException)
+		{
+
+		}
+
+		return false;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
new file mode 100644
index 0000000..f6857d9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
@@ -0,0 +1,300 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using AttributeImpl = org.apache.lucene.util.AttributeImpl;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// This TokenFilter provides the ability to set aside attribute states
+	/// that have already been analyzed.  This is useful in situations where multiple fields share
+	/// many common analysis steps and then go their separate ways.
+	/// <p/>
+	/// It is also useful for doing things like entity extraction or proper noun analysis as
+	/// part of the analysis workflow and saving off those tokens for use in another field.
+	/// 
+	/// <pre class="prettyprint">
+	/// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
+	/// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+	/// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+	/// 
+	/// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
+	/// source2.addSinkTokenStream(sink1);
+	/// source2.addSinkTokenStream(sink2);
+	/// 
+	/// TokenStream final1 = new LowerCaseFilter(version, source1);
+	/// TokenStream final2 = source2;
+	/// TokenStream final3 = new EntityDetect(sink1);
+	/// TokenStream final4 = new URLDetect(sink2);
+	/// 
+	/// d.add(new TextField("f1", final1, Field.Store.NO));
+	/// d.add(new TextField("f2", final2, Field.Store.NO));
+	/// d.add(new TextField("f3", final3, Field.Store.NO));
+	/// d.add(new TextField("f4", final4, Field.Store.NO));
+	/// </pre>
+	/// In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
+	/// <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
+	/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+	/// It is important, that tees are consumed before sinks (in the above example, the field names must be
+	/// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+	/// add another sink and then pass all tokens to the sinks at once using <seealso cref="#consumeAllTokens"/>.
+	/// This TokenFilter is exhausted after this. In the above example, change
+	/// the example above to:
+	/// <pre class="prettyprint">
+	/// ...
+	/// TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
+	/// TokenStream final2 = source2.newSinkTokenStream();
+	/// sink1.consumeAllTokens();
+	/// sink2.consumeAllTokens();
+	/// ...
+	/// </pre>
+	/// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+	/// <para>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+	/// </para>
+	/// </summary>
+	public sealed class TeeSinkTokenFilter : TokenFilter
+	{
+	  private readonly IList<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
+
+	  /// <summary>
+	  /// Instantiates a new TeeSinkTokenFilter.
+	  /// </summary>
+	  public TeeSinkTokenFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream.
+	  /// </summary>
+	  public SinkTokenStream newSinkTokenStream()
+	  {
+		return newSinkTokenStream(ACCEPT_ALL_FILTER);
+	  }
+
+	  /// <summary>
+	  /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream
+	  /// that pass the supplied filter. </summary>
+	  /// <seealso cref= SinkFilter </seealso>
+	  public SinkTokenStream newSinkTokenStream(SinkFilter filter)
+	  {
+		SinkTokenStream sink = new SinkTokenStream(this.cloneAttributes(), filter);
+		this.sinks.Add(new WeakReference<>(sink));
+		return sink;
+	  }
+
+	  /// <summary>
+	  /// Adds a <seealso cref="SinkTokenStream"/> created by another <code>TeeSinkTokenFilter</code>
+	  /// to this one. The supplied stream will also receive all consumed tokens.
+	  /// This method can be used to pass tokens from two different tees to one sink.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public void addSinkTokenStream(final SinkTokenStream sink)
+	  public void addSinkTokenStream(SinkTokenStream sink)
+	  {
+		// check that sink has correct factory
+		if (!this.AttributeFactory.Equals(sink.AttributeFactory))
+		{
+		  throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+		}
+		// add eventually missing attribute impls to the existing sink
+		for (IEnumerator<AttributeImpl> it = this.cloneAttributes().AttributeImplsIterator; it.MoveNext();)
+		{
+		  sink.addAttributeImpl(it.Current);
+		}
+		this.sinks.Add(new WeakReference<>(sink));
+	  }
+
+	  /// <summary>
+	  /// <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
+	  /// when itself is consumed. To be sure, that all tokens from the input
+	  /// stream are passed to the sinks, you can call this methods.
+	  /// This instance is exhausted after this, but all sinks are instant available.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void consumeAllTokens() throws java.io.IOException
+	  public void consumeAllTokens()
+	  {
+		while (incrementToken())
+		{
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  // capture state lazily - maybe no SinkFilter accepts this state
+		  AttributeSource.State state = null;
+		  foreach (WeakReference<SinkTokenStream> @ref in sinks)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
+			SinkTokenStream sink = @ref.get();
+			if (sink != null)
+			{
+			  if (sink.accept(this))
+			  {
+				if (state == null)
+				{
+				  state = this.captureState();
+				}
+				sink.addState(state);
+			  }
+			}
+		  }
+		  return true;
+		}
+
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		AttributeSource.State finalState = captureState();
+		foreach (WeakReference<SinkTokenStream> @ref in sinks)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
+		  SinkTokenStream sink = @ref.get();
+		  if (sink != null)
+		  {
+			sink.FinalState = finalState;
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// A filter that decides which <seealso cref="AttributeSource"/> states to store in the sink.
+	  /// </summary>
+	  public abstract class SinkFilter
+	  {
+		/// <summary>
+		/// Returns true, iff the current state of the passed-in <seealso cref="AttributeSource"/> shall be stored
+		/// in the sink. 
+		/// </summary>
+		public abstract bool accept(AttributeSource source);
+
+		/// <summary>
+		/// Called by <seealso cref="SinkTokenStream#reset()"/>. This method does nothing by default
+		/// and can optionally be overridden.
+		/// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void reset() throws java.io.IOException
+		public virtual void reset()
+		{
+		  // nothing to do; can be overridden
+		}
+	  }
+
+	  /// <summary>
+	  /// TokenStream output from a tee with optional filtering.
+	  /// </summary>
+	  public sealed class SinkTokenStream : TokenStream
+	  {
+		internal readonly IList<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
+		internal AttributeSource.State finalState;
+		internal IEnumerator<AttributeSource.State> it = null;
+		internal SinkFilter filter;
+
+		internal SinkTokenStream(AttributeSource source, SinkFilter filter) : base(source)
+		{
+		  this.filter = filter;
+		}
+
+		internal bool accept(AttributeSource source)
+		{
+		  return filter.accept(source);
+		}
+
+		internal void addState(AttributeSource.State state)
+		{
+		  if (it != null)
+		  {
+			throw new System.InvalidOperationException("The tee must be consumed before sinks are consumed.");
+		  }
+		  cachedStates.Add(state);
+		}
+
+		internal AttributeSource.State FinalState
+		{
+			set
+			{
+			  this.finalState = value;
+			}
+		}
+
+		public override bool incrementToken()
+		{
+		  // lazy init the iterator
+		  if (it == null)
+		  {
+			it = cachedStates.GetEnumerator();
+		  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  if (!it.hasNext())
+		  {
+			return false;
+		  }
+
+//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
+		  AttributeSource.State state = it.next();
+		  restoreState(state);
+		  return true;
+		}
+
+		public override void end()
+		{
+		  if (finalState != null)
+		  {
+			restoreState(finalState);
+		  }
+		}
+
+		public override void reset()
+		{
+		  it = cachedStates.GetEnumerator();
+		}
+	  }
+
+	  private static readonly SinkFilter ACCEPT_ALL_FILTER = new SinkFilterAnonymousInnerClassHelper();
+
+	  private class SinkFilterAnonymousInnerClassHelper : SinkFilter
+	  {
+		  public SinkFilterAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public override bool accept(AttributeSource source)
+		  {
+			return true;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
new file mode 100644
index 0000000..568fea6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
@@ -0,0 +1,73 @@
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/// <summary>
+	/// Licensed to the Apache Software Foundation (ASF) under one or more
+	/// contributor license agreements.  See the NOTICE file distributed with
+	/// this work for additional information regarding copyright ownership.
+	/// The ASF licenses this file to You under the Apache License, Version 2.0
+	/// (the "License"); you may not use this file except in compliance with
+	/// the License.  You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
+	/// 
+	/// 
+	/// </summary>
+	public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  private int lower;
+	  private int upper;
+	  private int count;
+
+	  public TokenRangeSinkFilter(int lower, int upper)
+	  {
+		if (lower < 1)
+		{
+		  throw new System.ArgumentException("lower must be greater than zero");
+		}
+		if (lower > upper)
+		{
+		  throw new System.ArgumentException("lower must not be greater than upper");
+		}
+		this.lower = lower;
+		this.upper = upper;
+	  }
+
+
+	  public override bool accept(AttributeSource source)
+	  {
+		try
+		{
+		  if (count >= lower && count < upper)
+		  {
+			return true;
+		  }
+		  return false;
+		}
+		finally
+		{
+		  count++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		count = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
new file mode 100644
index 0000000..f844a1c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
@@ -0,0 +1,50 @@
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using AttributeSource = org.apache.lucene.util.AttributeSource;
+
+	/// <summary>
+	/// Adds a token to the sink if it has a specific type.
+	/// </summary>
+	public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
+	{
+	  private string typeToMatch;
+	  private TypeAttribute typeAtt;
+
+	  public TokenTypeSinkFilter(string typeToMatch)
+	  {
+		this.typeToMatch = typeToMatch;
+	  }
+
+	  public override bool accept(AttributeSource source)
+	  {
+		if (typeAtt == null)
+		{
+		  typeAtt = source.addAttribute(typeof(TypeAttribute));
+		}
+
+		//check to see if this is a Category
+		return (typeToMatch.Equals(typeAtt.type()));
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
new file mode 100644
index 0000000..1ce0ffd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballAnalyzer.cs
@@ -0,0 +1,102 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using EnglishPossessiveFilter = org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+	using org.apache.lucene.analysis.standard;
+	using TurkishLowerCaseFilter = org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
+	/// LowerCaseFilter}, <seealso cref="StopFilter"/> and <seealso cref="SnowballFilter"/>.
+	/// 
+	/// Available stemmers are listed in org.tartarus.snowball.ext.  The name of a
+	/// stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+	/// <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+	/// 
+	/// <para><b>NOTE</b>: This class uses the same <seealso cref="Version"/>
+	/// dependent settings as <seealso cref="StandardAnalyzer"/>, with the following addition:
+	/// <ul>
+	///   <li> As of 3.1, uses <seealso cref="TurkishLowerCaseFilter"/> for Turkish language.
+	/// </ul>
+	/// </para> </summary>
+	/// @deprecated (3.1) Use the language-specific analyzer in modules/analysis instead. 
+	/// This analyzer will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use the language-specific analyzer in modules/analysis instead.")]
+	public sealed class SnowballAnalyzer : Analyzer
+	{
+	  private string name;
+	  private CharArraySet stopSet;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Builds the named analyzer with no stop words. </summary>
+	  public SnowballAnalyzer(Version matchVersion, string name)
+	  {
+		this.name = name;
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Builds the named analyzer with the given stop words. </summary>
+	  public SnowballAnalyzer(Version matchVersion, string name, CharArraySet stopWords) : this(matchVersion, name)
+	  {
+		stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopWords));
+	  }
+
+	  /// <summary>
+	  /// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
+	  ///    StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
+	  ///    and a <seealso cref="SnowballFilter"/> 
+	  /// </summary>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, tokenizer);
+		// remove the possessive 's for english stemmers
+		if (matchVersion.onOrAfter(Version.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
+		{
+		  result = new EnglishPossessiveFilter(result);
+		}
+		// Use a special lowercase filter for turkish, the stemmer expects it.
+		if (matchVersion.onOrAfter(Version.LUCENE_31) && name.Equals("Turkish"))
+		{
+		  result = new TurkishLowerCaseFilter(result);
+		}
+		else
+		{
+		  result = new LowerCaseFilter(matchVersion, result);
+		}
+		if (stopSet != null)
+		{
+		  result = new StopFilter(matchVersion, result, stopSet);
+		}
+		result = new SnowballFilter(result, name);
+		return new TokenStreamComponents(tokenizer, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
new file mode 100644
index 0000000..58a8361
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballFilter.cs
@@ -0,0 +1,129 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TurkishLowerCaseFilter = org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
+	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
+
+	/// <summary>
+	/// A filter that stems words using a Snowball-generated stemmer.
+	/// 
+	/// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
+	/// <para><b>NOTE</b>: SnowballFilter expects lowercased text.
+	/// <ul>
+	///  <li>For the Turkish language, see <seealso cref="TurkishLowerCaseFilter"/>.
+	///  <li>For other languages, see <seealso cref="LowerCaseFilter"/>.
+	/// </ul>
+	/// </para>
+	/// 
+	/// <para>
+	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	/// certain terms from being passed to the stemmer
+	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	/// in a previous <seealso cref="TokenStream"/>.
+	/// 
+	/// Note: For including the original term as well as the stemmed version, see
+	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	/// </para>
+	/// 
+	/// 
+	/// </summary>
+	public sealed class SnowballFilter : TokenFilter
+	{
+
+	  private readonly SnowballProgram stemmer;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SnowballFilter(TokenStream input, SnowballProgram stemmer) : base(input)
+	  {
+		this.stemmer = stemmer;
+	  }
+
+	  /// <summary>
+	  /// Construct the named stemming filter.
+	  /// 
+	  /// Available stemmers are listed in <seealso cref="org.tartarus.snowball.ext"/>.
+	  /// The name of a stemmer is the part of the class name before "Stemmer",
+	  /// e.g., the stemmer in <seealso cref="org.tartarus.snowball.ext.EnglishStemmer"/> is named "English".
+	  /// </summary>
+	  /// <param name="in"> the input tokens to stem </param>
+	  /// <param name="name"> the name of a stemmer </param>
+	  public SnowballFilter(TokenStream @in, string name) : base(@in)
+	  {
+		//Class.forName is frowned upon in place of the ResourceLoader but in this case,
+		// the factory will use the other constructor so that the program is already loaded.
+		try
+		{
+		  Type stemClass = Type.GetType("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(typeof(SnowballProgram));
+		  stemmer = stemClass.newInstance();
+		}
+		catch (Exception e)
+		{
+		  throw new System.ArgumentException("Invalid stemmer class specified: " + name, e);
+		}
+	  }
+
+	  /// <summary>
+	  /// Returns the next input Token, after being stemmed </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+			char[] termBuffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAtt.length();
+			int length = termAtt.length();
+			stemmer.setCurrent(termBuffer, length);
+			stemmer.stem();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char finalTerm[] = stemmer.getCurrentBuffer();
+			char[] finalTerm = stemmer.CurrentBuffer;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newLength = stemmer.getCurrentBufferLength();
+			int newLength = stemmer.CurrentBufferLength;
+			if (finalTerm != termBuffer)
+			{
+			  termAtt.copyBuffer(finalTerm, 0, newLength);
+			}
+			else
+			{
+			  termAtt.Length = newLength;
+			}
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
new file mode 100644
index 0000000..310391e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Snowball/SnowballPorterFilterFactory.cs
@@ -0,0 +1,101 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using SnowballProgram = org.tartarus.snowball.SnowballProgram;
+
+	/// <summary>
+	/// Factory for <seealso cref="SnowballFilter"/>, with configurable language
+	/// <para>
+	/// Note: Use of the "Lovins" stemmer is not recommended, as it is implemented with reflection.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </para>
+	/// </summary>
+	public class SnowballPorterFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string PROTECTED_TOKENS = "protected";
+
+	  private readonly string language;
+	  private readonly string wordFiles;
+	  private Type stemClass;
+	  private CharArraySet protectedWords = null;
+
+	  /// <summary>
+	  /// Creates a new SnowballPorterFilterFactory </summary>
+	  public SnowballPorterFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		language = get(args, "language", "English");
+		wordFiles = get(args, PROTECTED_TOKENS);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		string className = "org.tartarus.snowball.ext." + language + "Stemmer";
+		stemClass = loader.newInstance(className, typeof(SnowballProgram)).GetType();
+
+		if (wordFiles != null)
+		{
+		  protectedWords = getWordSet(loader, wordFiles, false);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		SnowballProgram program;
+		try
+		{
+		  program = stemClass.newInstance();
+		}
+		catch (Exception e)
+		{
+		  throw new Exception("Error instantiating stemmer for language " + language + "from class " + stemClass, e);
+		}
+
+		if (protectedWords != null)
+		{
+		  input = new SetKeywordMarkerFilter(input, protectedWords);
+		}
+		return new SnowballFilter(input, program);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
new file mode 100644
index 0000000..f2387f1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -0,0 +1,161 @@
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using LowerCaseFilter = LowerCaseFilter;
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/// <summary>
+	/// Filters <seealso cref="ClassicTokenizer"/> with <seealso cref="ClassicFilter"/>, {@link
+	/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+	/// English stop words.
+	/// 
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ClassicAnalyzer:
+	/// <ul>
+	///   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+	///         supplementary characters in stopwords
+	///   <li> As of 2.9, StopFilter preserves position
+	///        increments
+	///   <li> As of 2.4, Tokens incorrectly identified as acronyms
+	///        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+	/// </ul>
+	/// 
+	/// ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
+	/// As of 3.1, <seealso cref="StandardAnalyzer"/> implements Unicode text segmentation,
+	/// as specified by UAX#29.
+	/// </para>
+	/// </summary>
+	public sealed class ClassicAnalyzer : StopwordAnalyzerBase
+	{
+
+	  /// <summary>
+	  /// Default maximum allowed token length </summary>
+	  public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+	  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// An unmodifiable set containing some common English words that are usually not
+	  /// useful for searching. 
+	  /// </summary>
+	  public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopWords"> stop words  </param>
+	  public ClassicAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words ({@link
+	  /// #STOP_WORDS_SET}). </summary>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  public ClassicAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the stop words from the given reader. </summary>
+	  /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+	  /// <param name="matchVersion"> Lucene version to match See {@link
+	  /// <a href="#version">above</a>} </param>
+	  /// <param name="stopwords"> Reader to read stop words from  </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public ClassicAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
+	  public ClassicAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Set maximum allowed token length.  If a token is seen
+	  /// that exceeds this length then it is discarded.  This
+	  /// setting only takes effect the next time tokenStream or
+	  /// tokenStream is called.
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+		ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+		src.MaxTokenLength = maxTokenLength;
+		TokenStream tok = new ClassicFilter(src);
+		tok = new LowerCaseFilter(matchVersion, tok);
+		tok = new StopFilter(matchVersion, tok, stopwords);
+		return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+	  }
+
+	  private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+	  {
+		  private readonly ClassicAnalyzer outerInstance;
+
+		  private Reader reader;
+		  private org.apache.lucene.analysis.standard.ClassicTokenizer src;
+
+		  public TokenStreamComponentsAnonymousInnerClassHelper(ClassicAnalyzer outerInstance, org.apache.lucene.analysis.standard.ClassicTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reader = reader;
+			  this.src = src;
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+		  protected internal override Reader Reader
+		  {
+			  set
+			  {
+				src.MaxTokenLength = outerInstance.maxTokenLength;
+				base.Reader = value;
+			  }
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
new file mode 100644
index 0000000..9ee4b32
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilter.cs
@@ -0,0 +1,92 @@
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+	/// <summary>
+	/// Normalizes tokens extracted with <seealso cref="ClassicTokenizer"/>. </summary>
+
+	public class ClassicFilter : TokenFilter
+	{
+
+	  /// <summary>
+	  /// Construct filtering <i>in</i>. </summary>
+	  public ClassicFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+	  private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+
+	  // this filters uses attribute type
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Returns the next token in the stream, or null at EOS.
+	  /// <para>Removes <tt>'s</tt> from the end of words.
+	  /// </para>
+	  /// <para>Removes dots from acronyms.
+	  /// </para>
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+		char[] buffer = termAtt.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufferLength = termAtt.length();
+		int bufferLength = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String type = typeAtt.type();
+		string type = typeAtt.type();
+
+		if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+		{
+		  // Strip last 2 characters off
+		  termAtt.Length = bufferLength - 2;
+		} // remove dots
+		else if (type == ACRONYM_TYPE)
+		{
+		  int upto = 0;
+		  for (int i = 0;i < bufferLength;i++)
+		  {
+			char c = buffer[i];
+			if (c != '.')
+			{
+			  buffer[upto++] = c;
+			}
+		  }
+		  termAtt.Length = upto;
+		}
+
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
new file mode 100644
index 0000000..2107ccc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ClassicFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.ClassicTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ClassicFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ClassicFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ClassicFilterFactory </summary>
+	  public ClassicFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenFilter create(TokenStream input)
+	  {
+		return new ClassicFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
new file mode 100644
index 0000000..a41f48d
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.standard
+{
+
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A grammar-based tokenizer constructed with JFlex
+	/// 
+	/// <para> This should be a good tokenizer for most European-language documents:
+	/// 
+	/// <ul>
+	///   <li>Splits words at punctuation characters, removing punctuation. However, a 
+	///     dot that's not followed by whitespace is considered part of a token.
+	///   <li>Splits words at hyphens, unless there's a number in the token, in which case
+	///     the whole token is interpreted as a product number and is not split.
+	///   <li>Recognizes email addresses and internet hostnames as one token.
+	/// </ul>
+	/// 
+	/// </para>
+	/// <para>Many applications have specific tokenizer needs.  If this tokenizer does
+	/// not suit your application, please consider copying this source code
+	/// directory to your project and maintaining your own grammar-based tokenizer.
+	/// 
+	/// ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+	/// As of 3.1, <seealso cref="StandardTokenizer"/> implements Unicode text segmentation,
+	/// as specified by UAX#29.
+	/// </para>
+	/// </summary>
+
+	public sealed class ClassicTokenizer : Tokenizer
+	{
+	  /// <summary>
+	  /// A private instance of the JFlex-constructed scanner </summary>
+	  private StandardTokenizerInterface scanner;
+
+	  public const int ALPHANUM = 0;
+	  public const int APOSTROPHE = 1;
+	  public const int ACRONYM = 2;
+	  public const int COMPANY = 3;
+	  public const int EMAIL = 4;
+	  public const int HOST = 5;
+	  public const int NUM = 6;
+	  public const int CJ = 7;
+
+	  public const int ACRONYM_DEP = 8;
+
+	  /// <summary>
+	  /// String token types that correspond to token type int constants </summary>
+	  public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>"};
+
+	  private int skippedPositions;
+
+	  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+	  /// <summary>
+	  /// Set the max allowed token length.  Any token longer
+	  ///  than this is skipped. 
+	  /// </summary>
+	  public int MaxTokenLength
+	  {
+		  set
+		  {
+			if (value < 1)
+			{
+			  throw new System.ArgumentException("maxTokenLength must be greater than zero");
+			}
+			this.maxTokenLength = value;
+		  }
+		  get
+		  {
+			return maxTokenLength;
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Creates a new instance of the <seealso cref="ClassicTokenizer"/>.  Attaches
+	  /// the <code>input</code> to the newly created JFlex scanner.
+	  /// </summary>
+	  /// <param name="input"> The input reader
+	  /// 
+	  /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+	  public ClassicTokenizer(Version matchVersion, Reader input) : base(input)
+	  {
+		init(matchVersion);
+	  }
+
+	  /// <summary>
+	  /// Creates a new ClassicTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/> 
+	  /// </summary>
+	  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
+	  {
+		init(matchVersion);
+	  }
+
+	  private void init(Version matchVersion)
+	  {
+		this.scanner = new ClassicTokenizerImpl(input);
+	  }
+
+	  // this tokenizer generates three attributes:
+	  // term offset, positionIncrement and type
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+	  /*
+	   * (non-Javadoc)
+	   *
+	   * @see org.apache.lucene.analysis.TokenStream#next()
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		skippedPositions = 0;
+
+		while (true)
+		{
+		  int tokenType = scanner.NextToken;
+
+		  if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+		  {
+			return false;
+		  }
+
+		  if (scanner.yylength() <= maxTokenLength)
+		  {
+			posIncrAtt.PositionIncrement = skippedPositions + 1;
+			scanner.getText(termAtt);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = scanner.yychar();
+			int start = scanner.yychar();
+			offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+
+			if (tokenType == ClassicTokenizer.ACRONYM_DEP)
+			{
+			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST];
+			  termAtt.Length = termAtt.length() - 1; // remove extra '.'
+			}
+			else
+			{
+			  typeAtt.Type = ClassicTokenizer.TOKEN_TYPES[tokenType];
+			}
+			return true;
+		  }
+		  else
+			// When we skip a too-long term, we still increment the
+			// position increment
+		  {
+			skippedPositions++;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+		offsetAtt.setOffset(finalOffset, finalOffset);
+		// adjust any skipped tokens
+		posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		base.close();
+		scanner.yyreset(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		scanner.yyreset(input);
+		skippedPositions = 0;
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message