lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [15/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:19 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
new file mode 100644
index 0000000..fa5d5da
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -0,0 +1,625 @@
+using System;
+using System.Text;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using WhitespaceTokenizer = WhitespaceTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
+
+	/// <summary>
+	/// Old Broken version of <seealso cref="WordDelimiterFilter"/>
+	/// </summary>
+	[Obsolete]
+	public sealed class Lucene47WordDelimiterFilter : TokenFilter
+	{
+		private bool InstanceFieldsInitialized = false;
+
+		private void InitializeInstanceFields()
+		{
+			concat = new WordDelimiterConcatenation(this);
+			concatAll = new WordDelimiterConcatenation(this);
+		}
+
+
+	  public const int LOWER = 0x01;
+	  public const int UPPER = 0x02;
+	  public const int DIGIT = 0x04;
+	  public const int SUBWORD_DELIM = 0x08;
+
+	  // combinations: for testing, not for setting bits
+	  public const int ALPHA = 0x03;
+	  public const int ALPHANUM = 0x07;
+
+	  /// <summary>
+	  /// Causes parts of words to be generated:
+	  /// <p/>
+	  /// "PowerShot" => "Power" "Shot"
+	  /// </summary>
+	  public const int GENERATE_WORD_PARTS = 1;
+
+	  /// <summary>
+	  /// Causes number subwords to be generated:
+	  /// <p/>
+	  /// "500-42" => "500" "42"
+	  /// </summary>
+	  public const int GENERATE_NUMBER_PARTS = 2;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_WORDS = 4;
+
+	  /// <summary>
+	  /// Causes maximum runs of word parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi" => "wifi"
+	  /// </summary>
+	  public const int CATENATE_NUMBERS = 8;
+
+	  /// <summary>
+	  /// Causes all subword parts to be catenated:
+	  /// <p/>
+	  /// "wi-fi-4000" => "wifi4000"
+	  /// </summary>
+	  public const int CATENATE_ALL = 16;
+
+	  /// <summary>
+	  /// Causes original words are preserved and added to the subword list (Defaults to false)
+	  /// <p/>
+	  /// "500-42" => "500" "42" "500-42"
+	  /// </summary>
+	  public const int PRESERVE_ORIGINAL = 32;
+
+	  /// <summary>
+	  /// If not set, causes case changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens)
+	  /// </summary>
+	  public const int SPLIT_ON_CASE_CHANGE = 64;
+
+	  /// <summary>
+	  /// If not set, causes numeric changes to be ignored (subwords will only be generated
+	  /// given SUBWORD_DELIM tokens).
+	  /// </summary>
+	  public const int SPLIT_ON_NUMERICS = 128;
+
+	  /// <summary>
+	  /// Causes trailing "'s" to be removed for each subword
+	  /// <p/>
+	  /// "O'Neil's" => "O", "Neil"
+	  /// </summary>
+	  public const int STEM_ENGLISH_POSSESSIVE = 256;
+
+	  /// <summary>
+	  /// If not null is the set of tokens to protect from being delimited
+	  /// 
+	  /// </summary>
+	  internal readonly CharArraySet protWords;
+
+	  private readonly int flags;
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
+
+	  // used for iterating word delimiter breaks
+	  private readonly WordDelimiterIterator iterator;
+
+	  // used for concatenating runs of similar typed subwords (word,number)
+	  private WordDelimiterConcatenation concat;
+	  // number of subwords last output by concat.
+	  private int lastConcatCount = 0;
+
+	  // used for catenate all
+	  private WordDelimiterConcatenation concatAll;
+
+	  // used for accumulating position increment gaps
+	  private int accumPosInc = 0;
+
+	  private char[] savedBuffer = new char[1024];
+	  private int savedStartOffset;
+	  private int savedEndOffset;
+	  private string savedType;
+	  private bool hasSavedState = false;
+	  // if length by start + end offsets doesn't match the term text then assume
+	  // this is a synonym and don't adjust the offsets.
+	  private bool hasIllegalOffsets = false;
+
+	  // for a run of the same subword type within a word, have we output anything?
+	  private bool hasOutputToken = false;
+	  // when preserve original is on, have we output any token following it?
+	  // this token must have posInc=0!
+	  private bool hasOutputFollowingOriginal = false;
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="charTypeTable"> table containing character types </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public Lucene47WordDelimiterFilter(TokenStream @in, sbyte[] charTypeTable, int configurationFlags, CharArraySet protWords) : base(@in)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+		this.flags = configurationFlags;
+		this.protWords = protWords;
+		this.iterator = new WordDelimiterIterator(charTypeTable, has(SPLIT_ON_CASE_CHANGE), has(SPLIT_ON_NUMERICS), has(STEM_ENGLISH_POSSESSIVE));
+	  }
+
+	  /// <summary>
+	  /// Creates a new WordDelimiterFilter using <seealso cref="WordDelimiterIterator#DEFAULT_WORD_DELIM_TABLE"/>
+	  /// as its charTypeTable
+	  /// </summary>
+	  /// <param name="in"> TokenStream to be filtered </param>
+	  /// <param name="configurationFlags"> Flags configuring the filter </param>
+	  /// <param name="protWords"> If not null is the set of tokens to protect from being delimited </param>
+	  public Lucene47WordDelimiterFilter(TokenStream @in, int configurationFlags, CharArraySet protWords) : this(@in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, configurationFlags, protWords)
+	  {
+		  if (!InstanceFieldsInitialized)
+		  {
+			  InitializeInstanceFields();
+			  InstanceFieldsInitialized = true;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (true)
+		{
+		  if (!hasSavedState)
+		  {
+			// process a new input word
+			if (!input.incrementToken())
+			{
+			  return false;
+			}
+
+			int termLength = termAttribute.length();
+			char[] termBuffer = termAttribute.buffer();
+
+			accumPosInc += posIncAttribute.PositionIncrement;
+
+			iterator.setText(termBuffer, termLength);
+			iterator.next();
+
+			// word of no delimiters, or protected word: just return it
+			if ((iterator.current == 0 && iterator.end == termLength) || (protWords != null && protWords.contains(termBuffer, 0, termLength)))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  return true;
+			}
+
+			// word of simply delimiters
+			if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL))
+			{
+			  // if the posInc is 1, simply ignore it in the accumulation
+			  if (posIncAttribute.PositionIncrement == 1)
+			  {
+				accumPosInc--;
+			  }
+			  continue;
+			}
+
+			saveState();
+
+			hasOutputToken = false;
+			hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
+			lastConcatCount = 0;
+
+			if (has(PRESERVE_ORIGINAL))
+			{
+			  posIncAttribute.PositionIncrement = accumPosInc;
+			  accumPosInc = 0;
+			  return true;
+			}
+		  }
+
+		  // at the end of the string, output any concatenations
+		  if (iterator.end == WordDelimiterIterator.DONE)
+		  {
+			if (!concat.Empty)
+			{
+			  if (flushConcatenation(concat))
+			  {
+				return true;
+			  }
+			}
+
+			if (!concatAll.Empty)
+			{
+			  // only if we haven't output this same combo above!
+			  if (concatAll.subwordCount > lastConcatCount)
+			  {
+				concatAll.writeAndClear();
+				return true;
+			  }
+			  concatAll.clear();
+			}
+
+			// no saved concatenations, on to the next input word
+			hasSavedState = false;
+			continue;
+		  }
+
+		  // word surrounded by delimiters: always output
+		  if (iterator.SingleWord)
+		  {
+			generatePart(true);
+			iterator.next();
+			return true;
+		  }
+
+		  int wordType = iterator.type();
+
+		  // do we already have queued up incompatible concatenations?
+		  if (!concat.Empty && (concat.type & wordType) == 0)
+		  {
+			if (flushConcatenation(concat))
+			{
+			  hasOutputToken = false;
+			  return true;
+			}
+			hasOutputToken = false;
+		  }
+
+		  // add subwords depending upon options
+		  if (shouldConcatenate(wordType))
+		  {
+			if (concat.Empty)
+			{
+			  concat.type = wordType;
+			}
+			concatenate(concat);
+		  }
+
+		  // add all subwords (catenateAll)
+		  if (has(CATENATE_ALL))
+		  {
+			concatenate(concatAll);
+		  }
+
+		  // if we should output the word or number part
+		  if (shouldGenerateParts(wordType))
+		  {
+			generatePart(false);
+			iterator.next();
+			return true;
+		  }
+
+		  iterator.next();
+		}
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hasSavedState = false;
+		concat.clear();
+		concatAll.clear();
+		accumPosInc = 0;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Saves the existing attribute states
+	  /// </summary>
+	  private void saveState()
+	  {
+		// otherwise, we have delimiters, save state
+		savedStartOffset = offsetAttribute.startOffset();
+		savedEndOffset = offsetAttribute.endOffset();
+		// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
+		hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
+		savedType = typeAttribute.type();
+
+		if (savedBuffer.Length < termAttribute.length())
+		{
+		  savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
+		}
+
+		Array.Copy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
+		iterator.text = savedBuffer;
+
+		hasSavedState = true;
+	  }
+
+	  /// <summary>
+	  /// Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing.
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation that will be flushed </param>
+	  /// <returns> {@code true} if the concatenation was written before it was cleared, {@code false} otherwise </returns>
+	  private bool flushConcatenation(WordDelimiterConcatenation concatenation)
+	  {
+		lastConcatCount = concatenation.subwordCount;
+		if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type))
+		{
+		  concatenation.writeAndClear();
+		  return true;
+		}
+		concatenation.clear();
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Determines whether to concatenate a word or number if the current word is the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the current word used to determine if it should be concatenated </param>
+	  /// <returns> {@code true} if concatenation should occur, {@code false} otherwise </returns>
+	  private bool shouldConcatenate(int wordType)
+	  {
+		return (has(CATENATE_WORDS) && isAlpha(wordType)) || (has(CATENATE_NUMBERS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Determines whether a word/number part should be generated for a word of the given type
+	  /// </summary>
+	  /// <param name="wordType"> Type of the word used to determine if a word/number part should be generated </param>
+	  /// <returns> {@code true} if a word/number part should be generated, {@code false} otherwise </returns>
+	  private bool shouldGenerateParts(int wordType)
+	  {
+		return (has(GENERATE_WORD_PARTS) && isAlpha(wordType)) || (has(GENERATE_NUMBER_PARTS) && isDigit(wordType));
+	  }
+
+	  /// <summary>
+	  /// Concatenates the saved buffer to the given WordDelimiterConcatenation
+	  /// </summary>
+	  /// <param name="concatenation"> WordDelimiterConcatenation to concatenate the buffer to </param>
+	  private void concatenate(WordDelimiterConcatenation concatenation)
+	  {
+		if (concatenation.Empty)
+		{
+		  concatenation.startOffset = savedStartOffset + iterator.current;
+		}
+		concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current);
+		concatenation.endOffset = savedStartOffset + iterator.end;
+	  }
+
+	  /// <summary>
+	  /// Generates a word/number part, updating the appropriate attributes
+	  /// </summary>
+	  /// <param name="isSingleWord"> {@code true} if the generation is occurring from a single word, {@code false} otherwise </param>
+	  private void generatePart(bool isSingleWord)
+	  {
+		clearAttributes();
+		termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
+
+		int startOffset = savedStartOffset + iterator.current;
+		int endOffset = savedStartOffset + iterator.end;
+
+		if (hasIllegalOffsets)
+		{
+		  // historically this filter did this regardless for 'isSingleWord', 
+		  // but we must do a sanity check:
+		  if (isSingleWord && startOffset <= savedEndOffset)
+		  {
+			offsetAttribute.setOffset(startOffset, savedEndOffset);
+		  }
+		  else
+		  {
+			offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
+		  }
+		}
+		else
+		{
+		  offsetAttribute.setOffset(startOffset, endOffset);
+		}
+		posIncAttribute.PositionIncrement = position(false);
+		typeAttribute.Type = savedType;
+	  }
+
+	  /// <summary>
+	  /// Get the position increment gap for a subword or concatenation
+	  /// </summary>
+	  /// <param name="inject"> true if this token wants to be injected </param>
+	  /// <returns> position increment gap </returns>
+	  private int position(bool inject)
+	  {
+		int posInc = accumPosInc;
+
+		if (hasOutputToken)
+		{
+		  accumPosInc = 0;
+		  return inject ? 0 : Math.Max(1, posInc);
+		}
+
+		hasOutputToken = true;
+
+		if (!hasOutputFollowingOriginal)
+		{
+		  // the first token following the original is 0 regardless
+		  hasOutputFollowingOriginal = true;
+		  return 0;
+		}
+		// clear the accumulated position increment
+		accumPosInc = 0;
+		return Math.Max(1, posInc);
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#ALPHA"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains ALPHA, {@code false} otherwise </returns>
+	  internal static bool isAlpha(int type)
+	  {
+		return (type & ALPHA) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#DIGIT"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains DIGIT, {@code false} otherwise </returns>
+	  internal static bool isDigit(int type)
+	  {
+		return (type & DIGIT) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#SUBWORD_DELIM"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise </returns>
+	  internal static bool isSubwordDelim(int type)
+	  {
+		return (type & SUBWORD_DELIM) != 0;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given word type includes <seealso cref="#UPPER"/>
+	  /// </summary>
+	  /// <param name="type"> Word type to check </param>
+	  /// <returns> {@code true} if the type contains UPPER, {@code false} otherwise </returns>
+	  internal static bool isUpper(int type)
+	  {
+		return (type & UPPER) != 0;
+	  }
+
+	  /// <summary>
+	  /// Determines whether the given flag is set
+	  /// </summary>
+	  /// <param name="flag"> Flag to see if set </param>
+	  /// <returns> {@code true} if flag is set </returns>
+	  private bool has(int flag)
+	  {
+		return (flags & flag) != 0;
+	  }
+
+	  // ================================================= Inner Classes =================================================
+
+	  /// <summary>
+	  /// A WDF concatenated 'run'
+	  /// </summary>
+	  internal sealed class WordDelimiterConcatenation
+	  {
+		  private readonly Lucene47WordDelimiterFilter outerInstance;
+
+		  public WordDelimiterConcatenation(Lucene47WordDelimiterFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		internal readonly StringBuilder buffer = new StringBuilder();
+		internal int startOffset;
+		internal int endOffset;
+		internal int type;
+		internal int subwordCount;
+
+		/// <summary>
+		/// Appends the given text of the given length, to the concetenation at the given offset
+		/// </summary>
+		/// <param name="text"> Text to append </param>
+		/// <param name="offset"> Offset in the concetenation to add the text </param>
+		/// <param name="length"> Length of the text to append </param>
+		internal void append(char[] text, int offset, int length)
+		{
+		  buffer.Append(text, offset, length);
+		  subwordCount++;
+		}
+
+		/// <summary>
+		/// Writes the concatenation to the attributes
+		/// </summary>
+		internal void write()
+		{
+		  clearAttributes();
+		  if (outerInstance.termAttribute.length() < buffer.Length)
+		  {
+			outerInstance.termAttribute.resizeBuffer(buffer.Length);
+		  }
+		  char[] termbuffer = outerInstance.termAttribute.buffer();
+
+		  buffer.getChars(0, buffer.Length, termbuffer, 0);
+		  outerInstance.termAttribute.Length = buffer.Length;
+
+		  if (outerInstance.hasIllegalOffsets)
+		  {
+			outerInstance.offsetAttribute.setOffset(outerInstance.savedStartOffset, outerInstance.savedEndOffset);
+		  }
+		  else
+		  {
+			outerInstance.offsetAttribute.setOffset(startOffset, endOffset);
+		  }
+		  outerInstance.posIncAttribute.PositionIncrement = outerInstance.position(true);
+		  outerInstance.typeAttribute.Type = outerInstance.savedType;
+		  outerInstance.accumPosInc = 0;
+		}
+
+		/// <summary>
+		/// Determines if the concatenation is empty
+		/// </summary>
+		/// <returns> {@code true} if the concatenation is empty, {@code false} otherwise </returns>
+		internal bool Empty
+		{
+			get
+			{
+			  return buffer.Length == 0;
+			}
+		}
+
+		/// <summary>
+		/// Clears the concatenation and resets its state
+		/// </summary>
+		internal void clear()
+		{
+		  buffer.Length = 0;
+		  startOffset = endOffset = type = subwordCount = 0;
+		}
+
+		/// <summary>
+		/// Convenience method for the common scenario of having to write the concetenation and then clearing its state
+		/// </summary>
+		internal void writeAndClear()
+		{
+		  write();
+		  clear();
+		}
+	  }
+	  // questions:
+	  // negative numbers?  -42 indexed as just 42?
+	  // dollar sign?  $42
+	  // percent sign?  33%
+	  // downsides:  if source text is "powershot" then a query of "PowerShot" won't match!
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
new file mode 100644
index 0000000..77cbe8e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -0,0 +1,566 @@
+using System;
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopAnalyzer = StopAnalyzer;
+	using StopFilter = StopFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
+	/// <seealso cref="java.io.Reader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
+	/// (with behaviour identical to <seealso cref="String#split(String)"/>),
+	/// and that combines the functionality of
+	/// <seealso cref="LetterTokenizer"/>,
+	/// <seealso cref="LowerCaseTokenizer"/>,
+	/// <seealso cref="WhitespaceTokenizer"/>,
+	/// <seealso cref="StopFilter"/> into a single efficient
+	/// multi-purpose class.
+	/// <para>
+	/// If you are unsure how exactly a regular expression should look like, consider 
+	/// prototyping by simply trying various expressions on some test texts via
+	/// <seealso cref="String#split(String)"/>. Once you are satisfied, give that regex to 
+	/// PatternAnalyzer. Also see <a target="_blank" 
+	/// href="http://java.sun.com/docs/books/tutorial/extra/regex/">Java Regular Expression Tutorial</a>.
+	/// </para>
+	/// <para>
+	/// This class can be considerably faster than the "normal" Lucene tokenizers. 
+	/// It can also serve as a building block in a compound Lucene
+	/// <seealso cref="org.apache.lucene.analysis.TokenFilter"/> chain. For example as in this 
+	/// stemming example:
+	/// <pre>
+	/// PatternAnalyzer pat = ...
+	/// TokenStream tokenStream = new SnowballFilter(
+	///     pat.tokenStream("content", "James is running round in the woods"), 
+	///     "English"));
+	/// </pre>
+	/// </para>
+	/// </summary>
+	/// @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. 
+	[Obsolete("(4.0) use the pattern-based analysis in the analysis/pattern package instead.")]
+	public sealed class PatternAnalyzer : Analyzer
+	{
+
+	  /// <summary>
+	  /// <code>"\\W+"</code>; Divides text at non-letters (NOT Character.isLetter(c)) </summary>
+	  public static readonly Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
+
+	  /// <summary>
+	  /// <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) </summary>
+	  public static readonly Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+
+	  private static readonly CharArraySet EXTENDED_ENGLISH_STOP_WORDS = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("a", "about", "above", "across", "adj", "after", "afterwards", "again", "against", "albeit", "all", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anywhere", "are", "around", "as", "at", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "co", "could", "down", "during", "each", "eg", "either", "else", "elsewhere", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except", "few", "first", "for", "former", "formerly", "from", "further", "had", "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", 
 "him", "himself", "his", "how", "however", "i", "ie", "if", "in", "inc", "indeed", "into", "is", "it", "its", "itself", "last", "latter", "latterly", "least", "less", "ltd", "many", "may", "me", "meanwhile", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "namely", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "noone", "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own", "per", "perhaps", "rather", "s", "same", "seem", "seemed", "seeming", "seems", "several", "she", "should", "since", "so", "some", "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "t", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefor", "therein", "thereupon", "these", "they", "this", "those", "though", "through", "throughout", 
 "thru", "thus", "to", "together", "too", "toward", "towards", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereafter", "whereas", "whereat", "whereby", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "with", "within", "without", "would", "xsubj", "xcal", "xauthor", "xother ", "xnote", "yet", "you", "your", "yours", "yourself", "yourselves"), true));
+
+	  /// <summary>
+	  /// A lower-casing word analyzer with English stop words (can be shared
+	  /// freely across threads without harm); global per class loader.
+	  /// </summary>
+	  public static readonly PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+
+	  /// <summary>
+	  /// A lower-casing word analyzer with <b>extended </b> English stop words
+	  /// (can be shared freely across threads without harm); global per class
+	  /// loader. The stop words are borrowed from
+	  /// http://thomas.loc.gov/home/stopwords.html, see
+	  /// http://thomas.loc.gov/home/all.about.inquery.html
+	  /// </summary>
+	  public static readonly PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
+
+	  private readonly Pattern pattern;
+	  private readonly bool toLowerCase;
+	  private readonly CharArraySet stopWords;
+
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Constructs a new instance with the given parameters.
+	  /// </summary>
+	  /// <param name="matchVersion"> currently does nothing </param>
+	  /// <param name="pattern">
+	  ///            a regular expression delimiting tokens </param>
+	  /// <param name="toLowerCase">
+	  ///            if <code>true</code> returns tokens after applying
+	  ///            String.toLowerCase() </param>
+	  /// <param name="stopWords">
+	  ///            if non-null, ignores all tokens that are contained in the
+	  ///            given stop set (after previously having applied toLowerCase()
+	  ///            if applicable). For example, created via
+	  ///            <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
+	  ///            <seealso cref="org.apache.lucene.analysis.util.WordlistLoader"/>as in
+	  ///            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
+	  ///            or <a href="http://www.unine.ch/info/clef/">other stop words
+	  ///            lists </a>. </param>
+	  public PatternAnalyzer(Version matchVersion, Pattern pattern, bool toLowerCase, CharArraySet stopWords)
+	  {
+		if (pattern == null)
+		{
+		  throw new System.ArgumentException("pattern must not be null");
+		}
+
+		if (eqPattern(NON_WORD_PATTERN, pattern))
+		{
+			pattern = NON_WORD_PATTERN;
+		}
+		else if (eqPattern(WHITESPACE_PATTERN, pattern))
+		{
+			pattern = WHITESPACE_PATTERN;
+		}
+
+		if (stopWords != null && stopWords.size() == 0)
+		{
+			stopWords = null;
+		}
+
+		this.pattern = pattern;
+		this.toLowerCase = toLowerCase;
+		this.stopWords = stopWords;
+		this.matchVersion = matchVersion;
+	  }
+
+	  /// <summary>
+	  /// Creates a token stream that tokenizes the given string into token terms
+	  /// (aka words).
+	  /// </summary>
+	  /// <param name="fieldName">
+	  ///            the name of the field to tokenize (currently ignored). </param>
+	  /// <param name="reader">
+	  ///            reader (e.g. charfilter) of the original text. can be null. </param>
+	  /// <param name="text">
+	  ///            the string to tokenize </param>
+	  /// <returns> a new token stream </returns>
+	  public TokenStreamComponents createComponents(string fieldName, Reader reader, string text)
+	  {
+		// Ideally the Analyzer superclass should have a method with the same signature, 
+		// with a default impl that simply delegates to the StringReader flavour. 
+		if (reader == null)
+		{
+		  reader = new FastStringReader(text);
+		}
+
+		if (pattern == NON_WORD_PATTERN) // fast path
+		{
+		  return new TokenStreamComponents(new FastStringTokenizer(reader, true, toLowerCase, stopWords));
+		} // fast path
+		else if (pattern == WHITESPACE_PATTERN)
+		{
+		  return new TokenStreamComponents(new FastStringTokenizer(reader, false, toLowerCase, stopWords));
+		}
+
+		Tokenizer tokenizer = new PatternTokenizer(reader, pattern, toLowerCase);
+		TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+		return new TokenStreamComponents(tokenizer, result);
+	  }
+
+	  /// <summary>
+	  /// Creates a token stream that tokenizes all the text in the given Reader;
+	  /// This implementation forwards to <code>tokenStream(String, Reader, String)</code> and is
+	  /// less efficient than <code>tokenStream(String, Reader, String)</code>.
+	  /// </summary>
+	  /// <param name="fieldName">
+	  ///            the name of the field to tokenize (currently ignored). </param>
+	  /// <param name="reader">
+	  ///            the reader delivering the text </param>
+	  /// <returns> a new token stream </returns>
+	  public override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+		return createComponents(fieldName, reader, null);
+	  }
+
+	  /// <summary>
+	  /// Indicates whether some other object is "equal to" this one.
+	  /// </summary>
+	  /// <param name="other">
+	  ///            the reference object with which to compare. </param>
+	  /// <returns> true if equal, false otherwise </returns>
+	  public override bool Equals(object other)
+	  {
+		if (this == other)
+		{
+			return true;
+		}
+		if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER)
+		{
+			return false;
+		}
+		if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER)
+		{
+			return false;
+		}
+
+		if (other is PatternAnalyzer)
+		{
+		  PatternAnalyzer p2 = (PatternAnalyzer) other;
+		  return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords);
+		}
+		return false;
+	  }
+
+	  /// <summary>
+	  /// Returns a hash code value for the object.
+	  /// </summary>
+	  /// <returns> the hash code. </returns>
+	  public override int GetHashCode()
+	  {
+		if (this == DEFAULT_ANALYZER) // fast path
+		{
+			return -1218418418;
+		}
+		if (this == EXTENDED_ANALYZER) // fast path
+		{
+			return 1303507063;
+		}
+
+		int h = 1;
+		h = 31 * h + pattern.pattern().GetHashCode();
+		h = 31 * h + pattern.flags();
+		h = 31 * h + (toLowerCase ? 1231 : 1237);
+		h = 31 * h + (stopWords != null ? stopWords.GetHashCode() : 0);
+		return h;
+	  }
+
+	  /// <summary>
+	  /// equality where o1 and/or o2 can be null </summary>
+	  private static bool eq(object o1, object o2)
+	  {
+		return (o1 == o2) || (o1 != null ? o1.Equals(o2) : false);
+	  }
+
+	  /// <summary>
+	  /// assumes p1 and p2 are not null </summary>
+	  private static bool eqPattern(Pattern p1, Pattern p2)
+	  {
+		return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().Equals(p2.pattern()));
+	  }
+
+	  /// <summary>
+	  /// Reads until end-of-stream and returns all read chars, finally closes the stream.
+	  /// </summary>
+	  /// <param name="input"> the input stream </param>
+	  /// <exception cref="IOException"> if an I/O error occurs while reading the stream </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static String toString(java.io.Reader input) throws java.io.IOException
+	  private static string ToString(Reader input)
+	  {
+		if (input is FastStringReader) // fast path
+		{
+		  return ((FastStringReader) input).String;
+		}
+
+		try
+		{
+		  int len = 256;
+		  char[] buffer = new char[len];
+		  char[] output = new char[len];
+
+		  len = 0;
+		  int n;
+		  while ((n = input.read(buffer)) >= 0)
+		  {
+			if (len + n > output.Length) // grow capacity
+			{
+			  char[] tmp = new char[Math.Max(output.Length << 1, len + n)];
+			  Array.Copy(output, 0, tmp, 0, len);
+			  Array.Copy(buffer, 0, tmp, len, n);
+			  buffer = output; // use larger buffer for future larger bulk reads
+			  output = tmp;
+			}
+			else
+			{
+			  Array.Copy(buffer, 0, output, len, n);
+			}
+			len += n;
+		  }
+
+		  return new string(output, 0, len);
+		}
+		finally
+		{
+		  input.close();
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// The work horse; performance isn't fantastic, but it's not nearly as bad
+	  /// as one might think - kudos to the Sun regex developers.
+	  /// </summary>
+	  private sealed class PatternTokenizer : Tokenizer
+	  {
+
+		internal readonly Pattern pattern;
+		internal string str;
+		internal readonly bool toLowerCase;
+		internal Matcher matcher;
+		internal int pos = 0;
+		internal bool initialized = false;
+		internal static readonly Locale locale = Locale.Default;
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		public PatternTokenizer(Reader input, Pattern pattern, bool toLowerCase) : base(input)
+		{
+		  this.pattern = pattern;
+		  this.matcher = pattern.matcher("");
+		  this.toLowerCase = toLowerCase;
+		}
+
+		public override bool incrementToken()
+		{
+		  if (!initialized)
+		  {
+			throw new System.InvalidOperationException("Consumer did not call reset().");
+		  }
+		  if (matcher == null)
+		  {
+			  return false;
+		  }
+		  clearAttributes();
+		  while (true) // loop takes care of leading and trailing boundary cases
+		  {
+			int start = pos;
+			int end_Renamed;
+			bool isMatch = matcher.find();
+			if (isMatch)
+			{
+			  end_Renamed = matcher.start();
+			  pos = matcher.end();
+			}
+			else
+			{
+			  end_Renamed = str.Length;
+			  matcher = null; // we're finished
+			}
+
+			if (start != end_Renamed) // non-empty match (header/trailer)
+			{
+			  string text = str.Substring(start, end_Renamed - start);
+			  if (toLowerCase)
+			  {
+				  text = text.ToLower(locale);
+			  }
+			  termAtt.setEmpty().append(text);
+			  offsetAtt.setOffset(correctOffset(start), correctOffset(end_Renamed));
+			  return true;
+			}
+			if (!isMatch)
+			{
+				return false;
+			}
+		  }
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = correctOffset(str.length());
+		  int finalOffset = correctOffset(str.Length);
+		  this.offsetAtt.setOffset(finalOffset, finalOffset);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+		public override void close()
+		{
+		  base.close();
+		  this.initialized = false;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  this.str = PatternAnalyzer.ToString(input);
+		  this.matcher = pattern.matcher(this.str);
+		  this.pos = 0;
+		  this.initialized = true;
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// Special-case class for best performance in common cases; this class is
+	  /// otherwise unnecessary.
+	  /// </summary>
+	  private sealed class FastStringTokenizer : Tokenizer
+	  {
+
+		internal string str;
+		internal int pos;
+		internal readonly bool isLetter;
+		internal readonly bool toLowerCase;
+		internal readonly CharArraySet stopWords;
+		internal static readonly Locale locale = Locale.Default;
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+		public FastStringTokenizer(Reader input, bool isLetter, bool toLowerCase, CharArraySet stopWords) : base(input)
+		{
+		  this.isLetter = isLetter;
+		  this.toLowerCase = toLowerCase;
+		  this.stopWords = stopWords;
+		}
+
+		public override bool incrementToken()
+		{
+		  if (str == null)
+		  {
+			throw new System.InvalidOperationException("Consumer did not call reset().");
+		  }
+		  clearAttributes();
+		  // cache loop instance vars (performance)
+		  string s = str;
+		  int len = s.Length;
+		  int i = pos;
+		  bool letter = isLetter;
+
+		  int start = 0;
+		  string text;
+		  do
+		  {
+			// find beginning of token
+			text = null;
+			while (i < len && !isTokenChar(s[i], letter))
+			{
+			  i++;
+			}
+
+			if (i < len) // found beginning; now find end of token
+			{
+			  start = i;
+			  while (i < len && isTokenChar(s[i], letter))
+			  {
+				i++;
+			  }
+
+			  text = s.Substring(start, i - start);
+			  if (toLowerCase)
+			  {
+				  text = text.ToLower(locale);
+			  }
+	//          if (toLowerCase) {            
+	////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
+	////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809
+	//            text = s.substring(start, i).toLowerCase(); 
+	////            char[] chars = new char[i-start];
+	////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));
+	////            text = new String(chars);
+	//          } else {
+	//            text = s.substring(start, i);
+	//          }
+			}
+		  } while (text != null && isStopWord(text));
+
+		  pos = i;
+		  if (text == null)
+		  {
+			return false;
+		  }
+		  termAtt.setEmpty().append(text);
+		  offsetAtt.setOffset(correctOffset(start), correctOffset(i));
+		  return true;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  // set final offset
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int finalOffset = str.length();
+		  int finalOffset = str.Length;
+		  this.offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
+		}
+
+		internal bool isTokenChar(char c, bool isLetter)
+		{
+		  return isLetter ? char.IsLetter(c) :!char.IsWhiteSpace(c);
+		}
+
+		internal bool isStopWord(string text)
+		{
+		  return stopWords != null && stopWords.contains(text);
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+		public override void close()
+		{
+		  base.close();
+		  this.str = null;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  this.str = PatternAnalyzer.ToString(input);
+		  this.pos = 0;
+		}
+	  }
+
+
+	  ///////////////////////////////////////////////////////////////////////////////
+	  // Nested classes:
+	  ///////////////////////////////////////////////////////////////////////////////
+	  /// <summary>
+	  /// A StringReader that exposes it's contained string for fast direct access.
+	  /// Might make sense to generalize this to CharSequence and make it public?
+	  /// </summary>
+	  internal sealed class FastStringReader : StringReader
+	  {
+
+		internal readonly string s;
+
+		internal FastStringReader(string s) : base(s)
+		{
+		  this.s = s;
+		}
+
+		internal string String
+		{
+			get
+			{
+			  return s;
+			}
+		}
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
new file mode 100644
index 0000000..4402d5a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -0,0 +1,60 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+	/// that matches the provided pattern is marked as a keyword by setting
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+	/// </summary>
+	public sealed class PatternKeywordMarkerFilter : KeywordMarkerFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly Matcher matcher;
+
+	  /// <summary>
+	  /// Create a new <seealso cref="PatternKeywordMarkerFilter"/>, that marks the current
+	  /// token as a keyword if the tokens term buffer matches the provided
+	  /// <seealso cref="Pattern"/> via the <seealso cref="KeywordAttribute"/>.
+	  /// </summary>
+	  /// <param name="in">
+	  ///          TokenStream to filter </param>
+	  /// <param name="pattern">
+	  ///          the pattern to apply to the incoming term buffer
+	  ///  </param>
+	  public PatternKeywordMarkerFilter(TokenStream @in, Pattern pattern) : base(@in)
+	  {
+		this.matcher = pattern.matcher("");
+	  }
+
+	  protected internal override bool Keyword
+	  {
+		  get
+		  {
+			matcher.reset(termAtt);
+			return matcher.matches();
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
new file mode 100644
index 0000000..f61b230
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PerFieldAnalyzerWrapper.cs
@@ -0,0 +1,93 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+
+	/// <summary>
+	/// This analyzer is used to facilitate scenarios where different
+	/// fields require different analysis techniques.  Use the Map
+	/// argument in <seealso cref="#PerFieldAnalyzerWrapper(Analyzer, java.util.Map)"/>
+	/// to add non-default analyzers for fields.
+	/// 
+	/// <para>Example usage:
+	/// 
+	/// <pre class="prettyprint">
+	/// {@code
+	/// Map<String,Analyzer> analyzerPerField = new HashMap<>();
+	/// analyzerPerField.put("firstname", new KeywordAnalyzer());
+	/// analyzerPerField.put("lastname", new KeywordAnalyzer());
+	/// 
+	/// PerFieldAnalyzerWrapper aWrapper =
+	///   new PerFieldAnalyzerWrapper(new StandardAnalyzer(version), analyzerPerField);
+	/// }
+	/// </pre>
+	/// 
+	/// </para>
+	/// <para>In this example, StandardAnalyzer will be used for all fields except "firstname"
+	/// and "lastname", for which KeywordAnalyzer will be used.
+	/// 
+	/// </para>
+	/// <para>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+	/// and query parsing.
+	/// </para>
+	/// </summary>
+	public sealed class PerFieldAnalyzerWrapper : AnalyzerWrapper
+	{
+	  private readonly Analyzer defaultAnalyzer;
+	  private readonly IDictionary<string, Analyzer> fieldAnalyzers;
+
+	  /// <summary>
+	  /// Constructs with default analyzer.
+	  /// </summary>
+	  /// <param name="defaultAnalyzer"> Any fields not specifically
+	  /// defined to use a different analyzer will use the one provided here. </param>
+	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) : this(defaultAnalyzer, null)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Constructs with default analyzer and a map of analyzers to use for 
+	  /// specific fields.
+	  /// </summary>
+	  /// <param name="defaultAnalyzer"> Any fields not specifically
+	  /// defined to use a different analyzer will use the one provided here. </param>
+	  /// <param name="fieldAnalyzers"> a Map (String field name to the Analyzer) to be 
+	  /// used for those fields  </param>
+	  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IDictionary<string, Analyzer> fieldAnalyzers) : base(PER_FIELD_REUSE_STRATEGY)
+	  {
+		this.defaultAnalyzer = defaultAnalyzer;
+		this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : System.Linq.Enumerable.Empty<string, Analyzer>();
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		Analyzer analyzer = fieldAnalyzers[fieldName];
+		return (analyzer != null) ? analyzer : defaultAnalyzer;
+	  }
+
+	  public override string ToString()
+	  {
+		return "PerFieldAnalyzerWrapper(" + fieldAnalyzers + ", default=" + defaultAnalyzer + ")";
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
new file mode 100644
index 0000000..de8b8ba
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -0,0 +1,112 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// Links two <seealso cref="PrefixAwareTokenFilter"/>.
+	/// <p/>
+	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
+	/// </summary>
+	public class PrefixAndSuffixAwareTokenFilter : TokenStream
+	{
+
+	  private PrefixAwareTokenFilter suffix;
+
+	  public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream suffix) : base(suffix)
+	  {
+		prefix = new PrefixAwareTokenFilterAnonymousInnerClassHelper(this, prefix, input);
+		this.suffix = new PrefixAwareTokenFilterAnonymousInnerClassHelper2(this, prefix, suffix);
+	  }
+
+	  private class PrefixAwareTokenFilterAnonymousInnerClassHelper : PrefixAwareTokenFilter
+	  {
+		  private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
+
+		  public PrefixAwareTokenFilterAnonymousInnerClassHelper(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream input) : base(prefix, input)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+		  {
+			return outerInstance.updateInputToken(suffixToken, lastInputToken);
+		  }
+	  }
+
+	  private class PrefixAwareTokenFilterAnonymousInnerClassHelper2 : PrefixAwareTokenFilter
+	  {
+		  private readonly PrefixAndSuffixAwareTokenFilter outerInstance;
+
+		  public PrefixAwareTokenFilterAnonymousInnerClassHelper2(PrefixAndSuffixAwareTokenFilter outerInstance, TokenStream prefix, TokenStream suffix) : base(prefix, suffix)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  public override Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+		  {
+			return outerInstance.updateSuffixToken(suffixToken, lastInputToken);
+		  }
+	  }
+
+	  public virtual Token updateInputToken(Token inputToken, Token lastPrefixToken)
+	  {
+		inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), lastPrefixToken.endOffset() + inputToken.endOffset());
+		return inputToken;
+	  }
+
+	  public virtual Token updateSuffixToken(Token suffixToken, Token lastInputToken)
+	  {
+		suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(), lastInputToken.endOffset() + suffixToken.endOffset());
+		return suffixToken;
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		return suffix.incrementToken();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		suffix.reset();
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		suffix.close();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		suffix.end();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
new file mode 100644
index 0000000..7835e7a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PrefixAwareTokenFilter.cs
@@ -0,0 +1,246 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+	/// <summary>
+	/// Joins two token streams and leaves the last token of the first stream available
+	/// to be used when updating the token values in the second stream based on that token.
+	/// 
+	/// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+	/// <p/>
+	/// <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
+	/// the ones located in org.apache.lucene.analysis.tokenattributes. 
+	/// </summary>
+	public class PrefixAwareTokenFilter : TokenStream
+	{
+
+	  private TokenStream prefix;
+	  private TokenStream suffix;
+
+	  private CharTermAttribute termAtt;
+	  private PositionIncrementAttribute posIncrAtt;
+	  private PayloadAttribute payloadAtt;
+	  private OffsetAttribute offsetAtt;
+	  private TypeAttribute typeAtt;
+	  private FlagsAttribute flagsAtt;
+
+	  private CharTermAttribute p_termAtt;
+	  private PositionIncrementAttribute p_posIncrAtt;
+	  private PayloadAttribute p_payloadAtt;
+	  private OffsetAttribute p_offsetAtt;
+	  private TypeAttribute p_typeAtt;
+	  private FlagsAttribute p_flagsAtt;
+
+	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
+	  {
+		this.suffix = suffix;
+		this.prefix = prefix;
+		prefixExhausted = false;
+
+		termAtt = addAttribute(typeof(CharTermAttribute));
+		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+		payloadAtt = addAttribute(typeof(PayloadAttribute));
+		offsetAtt = addAttribute(typeof(OffsetAttribute));
+		typeAtt = addAttribute(typeof(TypeAttribute));
+		flagsAtt = addAttribute(typeof(FlagsAttribute));
+
+		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
+		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
+		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
+		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
+		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
+		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
+	  }
+
+	  private Token previousPrefixToken = new Token();
+	  private Token reusableToken = new Token();
+
+	  private bool prefixExhausted;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!prefixExhausted)
+		{
+		  Token nextToken = getNextPrefixInputToken(reusableToken);
+		  if (nextToken == null)
+		  {
+			prefixExhausted = true;
+		  }
+		  else
+		  {
+			previousPrefixToken.reinit(nextToken);
+			// Make it a deep copy
+			BytesRef p = previousPrefixToken.Payload;
+			if (p != null)
+			{
+			  previousPrefixToken.Payload = p.clone();
+			}
+			CurrentToken = nextToken;
+			return true;
+		  }
+		}
+
+		Token nextToken = getNextSuffixInputToken(reusableToken);
+		if (nextToken == null)
+		{
+		  return false;
+		}
+
+		nextToken = updateSuffixToken(nextToken, previousPrefixToken);
+		CurrentToken = nextToken;
+		return true;
+	  }
+
+	  private Token CurrentToken
+	  {
+		  set
+		  {
+			if (value == null)
+			{
+				return;
+			}
+			clearAttributes();
+			termAtt.copyBuffer(value.buffer(), 0, value.length());
+			posIncrAtt.PositionIncrement = value.PositionIncrement;
+			flagsAtt.Flags = value.Flags;
+			offsetAtt.setOffset(value.startOffset(), value.endOffset());
+			typeAtt.Type = value.type();
+			payloadAtt.Payload = value.Payload;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextPrefixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
+	  private Token getNextPrefixInputToken(Token token)
+	  {
+		if (!prefix.incrementToken())
+		{
+			return null;
+		}
+		token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
+		token.PositionIncrement = p_posIncrAtt.PositionIncrement;
+		token.Flags = p_flagsAtt.Flags;
+		token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
+		token.Type = p_typeAtt.type();
+		token.Payload = p_payloadAtt.Payload;
+		return token;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.Token getNextSuffixInputToken(org.apache.lucene.analysis.Token token) throws java.io.IOException
+	  private Token getNextSuffixInputToken(Token token)
+	  {
+		if (!suffix.incrementToken())
+		{
+			return null;
+		}
+		token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
+		token.PositionIncrement = posIncrAtt.PositionIncrement;
+		token.Flags = flagsAtt.Flags;
+		token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
+		token.Type = typeAtt.type();
+		token.Payload = payloadAtt.Payload;
+		return token;
+	  }
+
+	  /// <summary>
+	  /// The default implementation adds last prefix token end offset to the suffix token start and end offsets.
+	  /// </summary>
+	  /// <param name="suffixToken"> a token from the suffix stream </param>
+	  /// <param name="lastPrefixToken"> the last token from the prefix stream </param>
+	  /// <returns> consumer token </returns>
+	  public virtual Token updateSuffixToken(Token suffixToken, Token lastPrefixToken)
+	  {
+		suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(), lastPrefixToken.endOffset() + suffixToken.endOffset());
+		return suffixToken;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+	  public override void end()
+	  {
+		prefix.end();
+		suffix.end();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+	  public override void close()
+	  {
+		prefix.close();
+		suffix.close();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		if (prefix != null)
+		{
+		  prefixExhausted = false;
+		  prefix.reset();
+		}
+		if (suffix != null)
+		{
+		  suffix.reset();
+		}
+
+
+	  }
+
+	  public virtual TokenStream Prefix
+	  {
+		  get
+		  {
+			return prefix;
+		  }
+		  set
+		  {
+			this.prefix = value;
+		  }
+	  }
+
+
+	  public virtual TokenStream Suffix
+	  {
+		  get
+		  {
+			return suffix;
+		  }
+		  set
+		  {
+			this.suffix = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
new file mode 100644
index 0000000..9c2586f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
@@ -0,0 +1,99 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
+	/// </summary>
+	public sealed class RemoveDuplicatesTokenFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+
+	  // use a fixed version, as we don't care about case sensitivity.
+	  private readonly CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+
+	  /// <summary>
+	  /// Creates a new RemoveDuplicatesTokenFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream that will be filtered </param>
+	  public RemoveDuplicatesTokenFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (input.incrementToken())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char term[] = termAttribute.buffer();
+		  char[] term = termAttribute.buffer();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int length = termAttribute.length();
+		  int length = termAttribute.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int posIncrement = posIncAttribute.getPositionIncrement();
+		  int posIncrement = posIncAttribute.PositionIncrement;
+
+		  if (posIncrement > 0)
+		  {
+			previous.clear();
+		  }
+
+		  bool duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
+
+		  // clone the term, and add to the set of seen terms.
+		  char[] saved = new char[length];
+		  Array.Copy(term, 0, saved, 0, length);
+		  previous.add(saved);
+
+		  if (!duplicate)
+		  {
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		previous.clear();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
new file mode 100644
index 0000000..bae261e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="RemoveDuplicatesTokenFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class RemoveDuplicatesTokenFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RemoveDuplicatesTokenFilterFactory </summary>
+	  public RemoveDuplicatesTokenFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override RemoveDuplicatesTokenFilter create(TokenStream input)
+	  {
+		return new RemoveDuplicatesTokenFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
new file mode 100644
index 0000000..06ecebc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
@@ -0,0 +1,135 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// This filter folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o.
+	/// It also discriminate against use of double vowels aa, ae, ao, oe and oo, leaving just the first one.
+	/// <p/>
+	/// It's is a semantically more destructive solution than <seealso cref="ScandinavianNormalizationFilter"/> but
+	/// can in addition help with matching raksmorgas as räksmörgås.
+	/// <p/>
+	/// blåbærsyltetøj == blåbärsyltetöj == blaabaarsyltetoej == blabarsyltetoj
+	/// räksmörgås == ræksmørgås == ræksmörgaos == raeksmoergaas == raksmorgas
+	/// <p/>
+	/// Background:
+	/// Swedish åäö are in fact the same letters as Norwegian and Danish åæø and thus interchangeable
+	/// when used between these languages. They are however folded differently when people type
+	/// them on a keyboard lacking these characters.
+	/// <p/>
+	/// In that situation almost all Swedish people use a, a, o instead of å, ä, ö.
+	/// <p/>
+	/// Norwegians and Danes on the other hand usually type aa, ae and oe instead of å, æ and ø.
+	/// Some do however use a, a, o, oo, ao and sometimes permutations of everything above.
+	/// <p/>
+	/// This filter solves that mismatch problem, but might also cause new.
+	/// <p/> </summary>
+	/// <seealso cref= ScandinavianNormalizationFilter </seealso>
+	public sealed class ScandinavianFoldingFilter : TokenFilter
+	{
+
+	  public ScandinavianFoldingFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  private readonly CharTermAttribute charTermAttribute = addAttribute(typeof(CharTermAttribute));
+
+	  private const char AA = '\u00C5'; // Å
+	  private const char aa = '\u00E5'; // å
+	  private const char AE = '\u00C6'; // Æ
+	  private const char ae = '\u00E6'; // æ
+	  private const char AE_se = '\u00C4'; // Ä
+	  private const char ae_se = '\u00E4'; // ä
+	  private const char OE = '\u00D8'; // Ø
+	  private const char oe = '\u00F8'; // ø
+	  private const char OE_se = '\u00D6'; // Ö
+	  private const char oe_se = '\u00F6'; //ö
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttribute.buffer();
+		int length = charTermAttribute.length();
+
+
+		int i;
+		for (i = 0; i < length; i++)
+		{
+
+		  if (buffer[i] == aa || buffer[i] == ae_se || buffer[i] == ae)
+		  {
+
+			buffer[i] = 'a';
+
+		  }
+		  else if (buffer[i] == AA || buffer[i] == AE_se || buffer[i] == AE)
+		  {
+
+			buffer[i] = 'A';
+
+		  }
+		  else if (buffer[i] == oe || buffer[i] == oe_se)
+		  {
+
+			buffer[i] = 'o';
+
+		  }
+		  else if (buffer[i] == OE || buffer[i] == OE_se)
+		  {
+
+			buffer[i] = 'O';
+
+		  }
+		  else if (length - 1 > i)
+		  {
+
+			if ((buffer[i] == 'a' || buffer[i] == 'A') && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+
+			}
+			else if ((buffer[i] == 'o' || buffer[i] == 'O') && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+
+			}
+		  }
+		}
+
+		charTermAttribute.Length = length;
+
+
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
new file mode 100644
index 0000000..5b16722
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilterFactory.cs
@@ -0,0 +1,53 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ScandinavianFoldingFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_scandfold" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ScandinavianFoldingFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ScandinavianFoldingFilterFactory : TokenFilterFactory
+	{
+
+	  public ScandinavianFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ScandinavianFoldingFilter create(TokenStream input)
+	  {
+		return new ScandinavianFoldingFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
new file mode 100644
index 0000000..3113949
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
@@ -0,0 +1,145 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+
+	/// <summary>
+	/// This filter normalize use of the interchangeable Scandinavian characters æÆäÄöÖøØ
+	/// and folded variants (aa, ao, ae, oe and oo) by transforming them to åÅæÆøØ.
+	/// <p/>
+	/// It's a semantically less destructive solution than <seealso cref="ScandinavianFoldingFilter"/>,
+	/// most useful when a person with a Norwegian or Danish keyboard queries a Swedish index
+	/// and vice versa. This filter does <b>not</b>  the common Swedish folds of å and ä to a nor ö to o.
+	/// <p/>
+	/// blåbærsyltetøj == blåbärsyltetöj == blaabaarsyltetoej but not blabarsyltetoj
+	/// räksmörgås == ræksmørgås == ræksmörgaos == raeksmoergaas but not raksmorgas
+	/// <p/> </summary>
+	/// <seealso cref= ScandinavianFoldingFilter </seealso>
+	public sealed class ScandinavianNormalizationFilter : TokenFilter
+	{
+
+	  public ScandinavianNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+	  private readonly CharTermAttribute charTermAttribute = addAttribute(typeof(CharTermAttribute));
+
+	  private const char AA = '\u00C5'; // Å
+	  private const char aa = '\u00E5'; // å
+	  private const char AE = '\u00C6'; // Æ
+	  private const char ae = '\u00E6'; // æ
+	  private const char AE_se = '\u00C4'; // Ä
+	  private const char ae_se = '\u00E4'; // ä
+	  private const char OE = '\u00D8'; // Ø
+	  private const char oe = '\u00F8'; // ø
+	  private const char OE_se = '\u00D6'; // Ö
+	  private const char oe_se = '\u00F6'; //ö
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttribute.buffer();
+		int length = charTermAttribute.length();
+
+
+		int i;
+		for (i = 0; i < length; i++)
+		{
+
+		  if (buffer[i] == ae_se)
+		  {
+			buffer[i] = ae;
+
+		  }
+		  else if (buffer[i] == AE_se)
+		  {
+			buffer[i] = AE;
+
+		  }
+		  else if (buffer[i] == oe_se)
+		  {
+			buffer[i] = oe;
+
+		  }
+		  else if (buffer[i] == OE_se)
+		  {
+			buffer[i] = OE;
+
+		  }
+		  else if (length - 1 > i)
+		  {
+
+			if (buffer[i] == 'a' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'o' || buffer[i + 1] == 'A' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = aa;
+
+			}
+			else if (buffer[i] == 'A' && (buffer[i + 1] == 'a' || buffer[i + 1] == 'A' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = AA;
+
+			}
+			else if (buffer[i] == 'a' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = ae;
+
+			}
+			else if (buffer[i] == 'A' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = AE;
+
+			}
+			else if (buffer[i] == 'o' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = oe;
+
+			}
+			else if (buffer[i] == 'O' && (buffer[i + 1] == 'e' || buffer[i + 1] == 'E' || buffer[i + 1] == 'o' || buffer[i + 1] == 'O'))
+			{
+			  length = StemmerUtil.delete(buffer, i + 1, length);
+			  buffer[i] = OE;
+
+			}
+
+		  }
+		}
+
+		charTermAttribute.Length = length;
+
+
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
new file mode 100644
index 0000000..e3d3c7e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilterFactory.cs
@@ -0,0 +1,53 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_scandnorm" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ScandinavianNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ScandinavianNormalizationFilterFactory : TokenFilterFactory
+	{
+
+	  public ScandinavianNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ScandinavianNormalizationFilter create(TokenStream input)
+	  {
+		return new ScandinavianNormalizationFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
new file mode 100644
index 0000000..b732319
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
@@ -0,0 +1,59 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+	/// contained in the provided set is marked as a keyword by setting
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+	/// </summary>
+	public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly CharArraySet keywordSet;
+
+	  /// <summary>
+	  /// Create a new KeywordSetMarkerFilter, that marks the current token as a
+	  /// keyword if the tokens term buffer is contained in the given set via the
+	  /// <seealso cref="KeywordAttribute"/>.
+	  /// </summary>
+	  /// <param name="in">
+	  ///          TokenStream to filter </param>
+	  /// <param name="keywordSet">
+	  ///          the keywords set to lookup the current termbuffer </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SetKeywordMarkerFilter(final org.apache.lucene.analysis.TokenStream in, final org.apache.lucene.analysis.util.CharArraySet keywordSet)
+	  public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet) : base(@in)
+	  {
+		this.keywordSet = keywordSet;
+	  }
+
+	  protected internal override bool Keyword
+	  {
+		  get
+		  {
+			return keywordSet.contains(termAtt.buffer(), 0, termAtt.length());
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file


Mime
View raw message