lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [16/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:20 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
new file mode 100644
index 0000000..17d787a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -0,0 +1,69 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ASCIIFoldingFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class ASCIIFoldingFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+	  private readonly bool preserveOriginal;
+
+	  /// <summary>
+	  /// Creates a new ASCIIFoldingFilterFactory </summary>
+	  public ASCIIFoldingFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		preserveOriginal = getBoolean(args, "preserveOriginal", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ASCIIFoldingFilter create(TokenStream input)
+	  {
+		return new ASCIIFoldingFilter(input, preserveOriginal);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
new file mode 100644
index 0000000..94c8d4b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -0,0 +1,208 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// A filter to apply normal capitalization rules to Tokens.  It will make the first letter
+	/// capital and the rest lower case.
+	/// <p/>
+	/// This filter is particularly useful to build nice looking facet parameters.  This filter
+	/// is not appropriate if you intend to use a prefix query.
+	/// </summary>
+	public sealed class CapitalizationFilter : TokenFilter
+	{
+	  public static readonly int DEFAULT_MAX_WORD_COUNT = int.MaxValue;
+	  public static readonly int DEFAULT_MAX_TOKEN_LENGTH = int.MaxValue;
+
+	  private readonly bool onlyFirstWord;
+	  private readonly CharArraySet keep;
+	  private readonly bool forceFirstLetter;
+	  private readonly ICollection<char[]> okPrefix;
+
+	  private readonly int minWordLength;
+	  private readonly int maxWordCount;
+	  private readonly int maxTokenLength;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Creates a CapitalizationFilter with the default parameters.
+	  /// <para>
+	  /// Calls {@link #CapitalizationFilter(TokenStream, boolean, CharArraySet, boolean, Collection, int, int, int)
+	  ///   CapitalizationFilter(in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)}
+	  /// </para>
+	  /// </summary>
+	  public CapitalizationFilter(TokenStream @in) : this(@in, true, null, true, null, 0, DEFAULT_MAX_WORD_COUNT, DEFAULT_MAX_TOKEN_LENGTH)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a CapitalizationFilter with the specified parameters. </summary>
+	  /// <param name="in"> input tokenstream </param>
+	  /// <param name="onlyFirstWord"> should each word be capitalized or all of the words? </param>
+	  /// <param name="keep"> a keep word list.  Each word that should be kept separated by whitespace. </param>
+	  /// <param name="forceFirstLetter"> Force the first letter to be capitalized even if it is in the keep list. </param>
+	  /// <param name="okPrefix"> do not change word capitalization if a word begins with something in this list. </param>
+	  /// <param name="minWordLength"> how long the word needs to be to get capitalization applied.  If the
+	  ///                      minWordLength is 3, "and" > "And" but "or" stays "or". </param>
+	  /// <param name="maxWordCount"> if the token contains more then maxWordCount words, the capitalization is
+	  ///                     assumed to be correct. </param>
+	  /// <param name="maxTokenLength"> ??? </param>
+	  public CapitalizationFilter(TokenStream @in, bool onlyFirstWord, CharArraySet keep, bool forceFirstLetter, ICollection<char[]> okPrefix, int minWordLength, int maxWordCount, int maxTokenLength) : base(@in)
+	  {
+		this.onlyFirstWord = onlyFirstWord;
+		this.keep = keep;
+		this.forceFirstLetter = forceFirstLetter;
+		this.okPrefix = okPrefix;
+		this.minWordLength = minWordLength;
+		this.maxWordCount = maxWordCount;
+		this.maxTokenLength = maxTokenLength;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+			return false;
+		}
+
+		char[] termBuffer = termAtt.buffer();
+		int termBufferLength = termAtt.length();
+		char[] backup = null;
+
+		if (maxWordCount < DEFAULT_MAX_WORD_COUNT)
+		{
+		  //make a backup in case we exceed the word count
+		  backup = new char[termBufferLength];
+		  Array.Copy(termBuffer, 0, backup, 0, termBufferLength);
+		}
+
+		if (termBufferLength < maxTokenLength)
+		{
+		  int wordCount = 0;
+
+		  int lastWordStart = 0;
+		  for (int i = 0; i < termBufferLength; i++)
+		  {
+			char c = termBuffer[i];
+			if (c <= ' ' || c == '.')
+			{
+			  int len = i - lastWordStart;
+			  if (len > 0)
+			  {
+				processWord(termBuffer, lastWordStart, len, wordCount++);
+				lastWordStart = i + 1;
+				i++;
+			  }
+			}
+		  }
+
+		  // process the last word
+		  if (lastWordStart < termBufferLength)
+		  {
+			processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+		  }
+
+		  if (wordCount > maxWordCount)
+		  {
+			termAtt.copyBuffer(backup, 0, termBufferLength);
+		  }
+		}
+
+		return true;
+	  }
+
+	  private void processWord(char[] buffer, int offset, int length, int wordCount)
+	  {
+		if (length < 1)
+		{
+		  return;
+		}
+
+		if (onlyFirstWord && wordCount > 0)
+		{
+		  for (int i = 0; i < length; i++)
+		  {
+			buffer[offset + i] = char.ToLower(buffer[offset + i]);
+
+		  }
+		  return;
+		}
+
+		if (keep != null && keep.contains(buffer, offset, length))
+		{
+		  if (wordCount == 0 && forceFirstLetter)
+		  {
+			buffer[offset] = char.ToUpper(buffer[offset]);
+		  }
+		  return;
+		}
+
+		if (length < minWordLength)
+		{
+		  return;
+		}
+
+		if (okPrefix != null)
+		{
+		  foreach (char[] prefix in okPrefix)
+		  {
+			if (length >= prefix.Length) //don't bother checking if the buffer length is less than the prefix
+			{
+			  bool match = true;
+			  for (int i = 0; i < prefix.Length; i++)
+			  {
+				if (prefix[i] != buffer[offset + i])
+				{
+				  match = false;
+				  break;
+				}
+			  }
+			  if (match == true)
+			  {
+				return;
+			  }
+			}
+		  }
+		}
+
+		// We know it has at least one character
+		/*char[] chars = w.toCharArray();
+		StringBuilder word = new StringBuilder( w.length() );
+		word.append( Character.toUpperCase( chars[0] ) );*/
+		buffer[offset] = char.ToUpper(buffer[offset]);
+
+		for (int i = 1; i < length; i++)
+		{
+		  buffer[offset + i] = char.ToLower(buffer[offset + i]);
+		}
+		//return word.toString();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
new file mode 100644
index 0000000..bd4f335
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -0,0 +1,117 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="CapitalizationFilter"/>.
+	/// <p/>
+	/// The factory takes parameters:<br/>
+	/// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
+	/// "keep" - a keep word list.  Each word that should be kept separated by whitespace.<br/>
+	/// "keepIgnoreCase - true or false.  If true, the keep list will be considered case-insensitive.<br/>
+	/// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
+	/// "okPrefix" - do not change word capitalization if a word begins with something in this list.
+	/// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
+	/// "Mckinley"<br/>
+	/// "minWordLength" - how long the word needs to be to get capitalization applied.  If the
+	/// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
+	/// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
+	/// assumed to be correct.<br/>
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+	///           keep="java solr lucene" keepIgnoreCase="false"
+	///           okPrefix="McK McD McA"/&gt;   
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since solr 1.3
+	/// </summary>
+	public class CapitalizationFilterFactory : TokenFilterFactory
+	{
+	  public const string KEEP = "keep";
+	  public const string KEEP_IGNORE_CASE = "keepIgnoreCase";
+	  public const string OK_PREFIX = "okPrefix";
+	  public const string MIN_WORD_LENGTH = "minWordLength";
+	  public const string MAX_WORD_COUNT = "maxWordCount";
+	  public const string MAX_TOKEN_LENGTH = "maxTokenLength";
+	  public const string ONLY_FIRST_WORD = "onlyFirstWord";
+	  public const string FORCE_FIRST_LETTER = "forceFirstLetter";
+
+	  internal CharArraySet keep;
+
+	  internal ICollection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
+
+	  internal readonly int minWordLength; // don't modify capitalization for words shorter then this
+	  internal readonly int maxWordCount;
+	  internal readonly int maxTokenLength;
+	  internal readonly bool onlyFirstWord;
+	  internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
+
+	  /// <summary>
+	  /// Creates a new CapitalizationFilterFactory </summary>
+	  public CapitalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		bool ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
+		HashSet<string> k = getSet(args, KEEP);
+		if (k != null)
+		{
+		  keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+		  keep.addAll(k);
+		}
+
+		k = getSet(args, OK_PREFIX);
+		if (k != null)
+		{
+		  okPrefix = new List<>();
+		  foreach (string item in k)
+		  {
+			okPrefix.Add(item.ToCharArray());
+		  }
+		}
+
+		minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
+		maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
+		maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
+		onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
+		forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CapitalizationFilter create(TokenStream input)
+	  {
+		return new CapitalizationFilter(input, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
new file mode 100644
index 0000000..b410fe9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilter.cs
@@ -0,0 +1,82 @@
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Removes words that are too long or too short from the stream.
+	/// <para>
+	/// Note: Length is calculated as the number of Unicode codepoints.
+	/// </para>
+	/// </summary>
+	public sealed class CodepointCountFilter : FilteringTokenFilter
+	{
+
+	  private readonly int min;
+	  private readonly int max;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// <summary>
+	  /// Create a new <seealso cref="CodepointCountFilter"/>. This will filter out tokens whose
+	  /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="Character#codePointCount(char[], int, int)"/>
+	  /// &lt; min) or too long (<seealso cref="Character#codePointCount(char[], int, int)"/> &gt; max). </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="min">     the minimum length </param>
+	  /// <param name="max">     the maximum length </param>
+	  public CodepointCountFilter(Version version, TokenStream @in, int min, int max) : base(version, @in)
+	  {
+		this.min = min;
+		this.max = max;
+	  }
+
+	  public override bool accept()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int max32 = termAtt.length();
+		int max32 = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int min32 = max32 >> 1;
+		int min32 = max32 >> 1;
+		if (min32 >= min && max32 <= max)
+		{
+		  // definitely within range
+		  return true;
+		}
+		else if (min32 > max || max32 < min)
+		{
+		  // definitely not
+		  return false;
+		}
+		else
+		{
+		  // we must count to be sure
+		  int len = char.codePointCount(termAtt.buffer(), 0, termAtt.length());
+		  return (len >= min && len <= max);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
new file mode 100644
index 0000000..e85fd1e
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -0,0 +1,61 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="CodepointCountFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.CodepointCountFilterFactory" min="0" max="1" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class CodepointCountFilterFactory : TokenFilterFactory
+	{
+	  internal readonly int min;
+	  internal readonly int max;
+	  public const string MIN_KEY = "min";
+	  public const string MAX_KEY = "max";
+
+	  /// <summary>
+	  /// Creates a new CodepointCountFilterFactory </summary>
+	  public CodepointCountFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		min = requireInt(args, MIN_KEY);
+		max = requireInt(args, MAX_KEY);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CodepointCountFilter create(TokenStream input)
+	  {
+		return new CodepointCountFilter(luceneMatchVersion, input, min, max);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
new file mode 100644
index 0000000..38af481
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -0,0 +1,34 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// An always exhausted token stream.
+	/// </summary>
+	public sealed class EmptyTokenStream : TokenStream
+	{
+
+	  public override bool incrementToken()
+	  {
+		return false;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
new file mode 100644
index 0000000..96a2dfa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -0,0 +1,164 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
+	/// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
+	/// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together.
+	/// This filter should be used on indexing time only.
+	/// Example field definition in schema.xml:
+	/// <pre class="prettyprint">
+	/// &lt;fieldtype name="text" class="solr.TextField" positionIncrementGap="100"&gt;
+	///  &lt;analyzer type="index"&gt;
+	///    &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///      &lt;filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/&gt;
+	///      &lt;filter class="solr.StopFilterFactory" ignoreCase="true"/&gt;
+	///      &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
+	///      &lt;filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/&gt;
+	///      &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///      &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///  &lt;/analyzer&gt;
+	///  &lt;analyzer type="query"&gt;
+	///      &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///      &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/&gt;
+	///      &lt;filter class="solr.StopFilterFactory" ignoreCase="true"/&gt;
+	///      &lt;filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/&gt;
+	///      &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///      &lt;filter class="solr.RemoveDuplicatesTokenFilterFactory"/&gt;
+	///  &lt;/analyzer&gt;
+	/// &lt;/fieldtype&gt;
+	/// </pre>
+	/// 
+	/// </summary>
+	public sealed class HyphenatedWordsFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
+
+	  private readonly StringBuilder hyphenated = new StringBuilder();
+	  private State savedState;
+	  private bool exhausted = false;
+	  private int lastEndOffset = 0;
+
+	  /// <summary>
+	  /// Creates a new HyphenatedWordsFilter
+	  /// </summary>
+	  /// <param name="in"> TokenStream that will be filtered </param>
+	  public HyphenatedWordsFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		while (!exhausted && input.incrementToken())
+		{
+		  char[] term = termAttribute.buffer();
+		  int termLength = termAttribute.length();
+		  lastEndOffset = offsetAttribute.endOffset();
+
+		  if (termLength > 0 && term[termLength - 1] == '-')
+		  {
+			// a hyphenated word
+			// capture the state of the first token only
+			if (savedState == null)
+			{
+			  savedState = captureState();
+			}
+			hyphenated.Append(term, 0, termLength - 1);
+		  }
+		  else if (savedState == null)
+		  {
+			// not part of a hyphenated word.
+			return true;
+		  }
+		  else
+		  {
+			// the final portion of a hyphenated word
+			hyphenated.Append(term, 0, termLength);
+			unhyphenate();
+			return true;
+		  }
+		}
+
+		exhausted = true;
+
+		if (savedState != null)
+		{
+		  // the final term ends with a hyphen
+		  // add back the hyphen, for backwards compatibility.
+		  hyphenated.Append('-');
+		  unhyphenate();
+		  return true;
+		}
+
+		return false;
+	  }
+
+	  /// <summary>
+	  /// {@inheritDoc}
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		hyphenated.Length = 0;
+		savedState = null;
+		exhausted = false;
+		lastEndOffset = 0;
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Writes the joined unhyphenated term
+	  /// </summary>
+	  private void unhyphenate()
+	  {
+		restoreState(savedState);
+		savedState = null;
+
+		char[] term = termAttribute.buffer();
+		int length = hyphenated.Length;
+		if (length > termAttribute.length())
+		{
+		  term = termAttribute.resizeBuffer(length);
+		}
+
+		hyphenated.getChars(0, length, term, 0);
+		termAttribute.Length = length;
+		offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset);
+		hyphenated.Length = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
new file mode 100644
index 0000000..946cd57
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HyphenatedWordsFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HyphenatedWordsFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HyphenatedWordsFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HyphenatedWordsFilterFactory </summary>
+	  public HyphenatedWordsFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override HyphenatedWordsFilter create(TokenStream input)
+	  {
+		return new HyphenatedWordsFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
new file mode 100644
index 0000000..f110d37
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -0,0 +1,67 @@
+using System;
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A TokenFilter that only keeps tokens with text contained in the
+	/// required words.  This filter behaves like the inverse of StopFilter.
+	/// 
+	/// @since solr 1.3
+	/// </summary>
+	public sealed class KeepWordFilter : FilteringTokenFilter
+	{
+	  private readonly CharArraySet words;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+	  [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+	  public KeepWordFilter(Version version, bool enablePositionIncrements, TokenStream @in, CharArraySet words) : base(version, enablePositionIncrements, @in)
+	  {
+		this.words = words;
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="KeepWordFilter"/>.
+	  /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
+	  /// used by this filter and should not be modified.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="words">   the words to keep </param>
+	  public KeepWordFilter(Version version, TokenStream @in, CharArraySet words) : base(version, @in)
+	  {
+		this.words = words;
+	  }
+
+	  public override bool accept()
+	  {
+		return words.contains(termAtt.buffer(), 0, termAtt.length());
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
new file mode 100644
index 0000000..8aa687f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -0,0 +1,113 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+
+	/// <summary>
+	/// Factory for <seealso cref="KeepWordFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class KeepWordFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private readonly bool ignoreCase;
+	  private readonly bool enablePositionIncrements;
+	  private readonly string wordFiles;
+	  private CharArraySet words;
+
+	  /// <summary>
+	  /// Creates a new KeepWordFilterFactory </summary>
+	  public KeepWordFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		wordFiles = get(args, "words");
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (wordFiles != null)
+		{
+		  words = getWordSet(loader, wordFiles, ignoreCase);
+		}
+	  }
+
+	  public virtual bool EnablePositionIncrements
+	  {
+		  get
+		  {
+			return enablePositionIncrements;
+		  }
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public virtual CharArraySet Words
+	  {
+		  get
+		  {
+			return words;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		// if the set is null, it means it was empty
+		if (words == null)
+		{
+		  return input;
+		}
+		else
+		{
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") final org.apache.lucene.analysis.TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+		  TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+		  return filter;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
new file mode 100644
index 0000000..8918274
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -0,0 +1,61 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>.
+	/// </summary>
+	/// <seealso cref= KeywordAttribute </seealso>
+	public abstract class KeywordMarkerFilter : TokenFilter
+	{
+
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Creates a new <seealso cref="KeywordMarkerFilter"/> </summary>
+	  /// <param name="in"> the input stream </param>
+	  protected internal KeywordMarkerFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (Keyword)
+		  {
+			keywordAttr.Keyword = true;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  protected internal abstract bool Keyword {get;}
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
new file mode 100644
index 0000000..14eeafa
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
@@ -0,0 +1,99 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="KeywordMarkerFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class KeywordMarkerFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  public const string PROTECTED_TOKENS = "protected";
+	  public const string PATTERN = "pattern";
+	  private readonly string wordFiles;
+	  private readonly string stringPattern;
+	  private readonly bool ignoreCase;
+	  private Pattern pattern;
+	  private CharArraySet protectedWords;
+
+	  /// <summary>
+	  /// Creates a new KeywordMarkerFilterFactory </summary>
+	  public KeywordMarkerFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		wordFiles = get(args, PROTECTED_TOKENS);
+		stringPattern = get(args, PATTERN);
+		ignoreCase = getBoolean(args, "ignoreCase", false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		if (wordFiles != null)
+		{
+		  protectedWords = getWordSet(loader, wordFiles, ignoreCase);
+		}
+		if (stringPattern != null)
+		{
+		  pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
+		}
+	  }
+
+	  public virtual bool IgnoreCase
+	  {
+		  get
+		  {
+			return ignoreCase;
+		  }
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		if (pattern != null)
+		{
+		  input = new PatternKeywordMarkerFilter(input, pattern);
+		}
+		if (protectedWords != null)
+		{
+		  input = new SetKeywordMarkerFilter(input, protectedWords);
+		}
+		return input;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
new file mode 100644
index 0000000..f584199
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilter.cs
@@ -0,0 +1,75 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+
+	/// <summary>
+	/// This TokenFilter emits each incoming token twice once as keyword and once non-keyword, in other words once with
+	/// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> set to <code>true</code> and once set to <code>false</code>.
+	/// This is useful if used with a stem filter that respects the <seealso cref="KeywordAttribute"/> to index the stemmed and the
+	/// un-stemmed version of a term into the same field.
+	/// </summary>
+	public sealed class KeywordRepeatFilter : TokenFilter
+	{
+
+	  private readonly KeywordAttribute keywordAttribute = addAttribute(typeof(KeywordAttribute));
+	  private readonly PositionIncrementAttribute posIncAttr = addAttribute(typeof(PositionIncrementAttribute));
+	  private State state;
+
+	  /// <summary>
+	  /// Construct a token stream filtering the given input.
+	  /// </summary>
+	  public KeywordRepeatFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (state != null)
+		{
+		  restoreState(state);
+		  posIncAttr.PositionIncrement = 0;
+		  keywordAttribute.Keyword = false;
+		  state = null;
+		  return true;
+		}
+		if (input.incrementToken())
+		{
+		  state = captureState();
+		  keywordAttribute.Keyword = true;
+		  return true;
+		}
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		state = null;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
new file mode 100644
index 0000000..b6f7b86
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -0,0 +1,52 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="KeywordRepeatFilter"/>.
+	/// 
+	/// Since <seealso cref="KeywordRepeatFilter"/> emits two tokens for every input token, and any tokens that aren't transformed
+	/// later in the analysis chain will be in the document twice. Therefore, consider adding
+	/// <seealso cref="RemoveDuplicatesTokenFilterFactory"/> later in the analysis chain.
+	/// </summary>
+	public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new KeywordRepeatFilterFactory </summary>
+	  public KeywordRepeatFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new KeywordRepeatFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
new file mode 100644
index 0000000..802ff26
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -0,0 +1,89 @@
+using System;
+using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using FilteringTokenFilter = FilteringTokenFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Removes words that are too long or too short from the stream.
+	/// <para>
+	/// Note: Length is calculated as the number of UTF-16 code units.
+	/// </para>
+	/// </summary>
+	public sealed class LengthFilter : FilteringTokenFilter
+	{
+
+	  private readonly int min;
+	  private readonly int max;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+	  /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4. 
+	  [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+	  public LengthFilter(Version version, bool enablePositionIncrements, TokenStream @in, int min, int max) : base(version, enablePositionIncrements, @in)
+	  {
+		if (min < 0)
+		{
+		  throw new System.ArgumentException("minimum length must be greater than or equal to zero");
+		}
+		if (min > max)
+		{
+		  throw new System.ArgumentException("maximum length must not be greater than minimum length");
+		}
+		this.min = min;
+		this.max = max;
+	  }
+
+	  /// <summary>
+	  /// Create a new <seealso cref="LengthFilter"/>. This will filter out tokens whose
+	  /// <seealso cref="CharTermAttribute"/> is either too short (<seealso cref="CharTermAttribute#length()"/>
+	  /// &lt; min) or too long (<seealso cref="CharTermAttribute#length()"/> &gt; max). </summary>
+	  /// <param name="version"> the Lucene match version </param>
+	  /// <param name="in">      the <seealso cref="TokenStream"/> to consume </param>
+	  /// <param name="min">     the minimum length </param>
+	  /// <param name="max">     the maximum length </param>
+	  public LengthFilter(Version version, TokenStream @in, int min, int max) : base(version, @in)
+	  {
+		if (min < 0)
+		{
+		  throw new System.ArgumentException("minimum length must be greater than or equal to zero");
+		}
+		if (min > max)
+		{
+		  throw new System.ArgumentException("maximum length must not be greater than minimum length");
+		}
+		this.min = min;
+		this.max = max;
+	  }
+
+	  public override bool accept()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+		int len = termAtt.length();
+		return (len >= min && len <= max);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
new file mode 100644
index 0000000..6f0e4a3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LengthFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LengthFilterFactory" min="0" max="1" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class LengthFilterFactory : TokenFilterFactory
+	{
+	  internal readonly int min;
+	  internal readonly int max;
+	  internal readonly bool enablePositionIncrements;
+	  public const string MIN_KEY = "min";
+	  public const string MAX_KEY = "max";
+
+	  /// <summary>
+	  /// Creates a new LengthFilterFactory </summary>
+	  public LengthFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		min = requireInt(args, MIN_KEY);
+		max = requireInt(args, MAX_KEY);
+		enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override LengthFilter create(TokenStream input)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") final LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+		  LengthFilter filter = new LengthFilter(luceneMatchVersion, enablePositionIncrements, input,min,max);
+		return filter;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
new file mode 100644
index 0000000..58e9d60
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountAnalyzer.cs
@@ -0,0 +1,68 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// This Analyzer limits the number of tokens while indexing. It is
+	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>. </summary>
+	/// <seealso cref= LimitTokenCountFilter </seealso>
+	public sealed class LimitTokenCountAnalyzer : AnalyzerWrapper
+	{
+	  private readonly Analyzer @delegate;
+	  private readonly int maxTokenCount;
+	  private readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Build an analyzer that limits the maximum number of tokens per field.
+	  /// This analyzer will not consume any tokens beyond the maxTokenCount limit
+	  /// </summary>
+	  /// <seealso cref= #LimitTokenCountAnalyzer(Analyzer,int,boolean) </seealso>
+	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount) : this(@delegate, maxTokenCount, false)
+	  {
+	  }
+	  /// <summary>
+	  /// Build an analyzer that limits the maximum number of tokens per field. </summary>
+	  /// <param name="delegate"> the analyzer to wrap </param>
+	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the delegate should be consumed even if maxTokenCount is reached. </param>
+	  public LimitTokenCountAnalyzer(Analyzer @delegate, int maxTokenCount, bool consumeAllTokens) : base(@delegate.ReuseStrategy)
+	  {
+		this.@delegate = @delegate;
+		this.maxTokenCount = maxTokenCount;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		return new TokenStreamComponents(components.Tokenizer, new LimitTokenCountFilter(components.TokenStream, maxTokenCount, consumeAllTokens));
+	  }
+
+	  public override string ToString()
+	  {
+		return "LimitTokenCountAnalyzer(" + @delegate.ToString() + ", maxTokenCount=" + maxTokenCount + ", consumeAllTokens=" + consumeAllTokens + ")";
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
new file mode 100644
index 0000000..4b60687
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilter.cs
@@ -0,0 +1,109 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// This TokenFilter limits the number of tokens while indexing. It is
+	/// a replacement for the maximum field length setting inside <seealso cref="org.apache.lucene.index.IndexWriter"/>.
+	/// <para>
+	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+	/// once the limit has been reached, which can result in {@code reset()} being 
+	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
+	/// {@code TokenStream} implementations this should be acceptable, and faster 
+	/// then consuming the full stream. If you are wrapping a {@code TokenStream} 
+	/// which requires that the full stream of tokens be exhausted in order to 
+	/// function properly, use the 
+	/// <seealso cref="#LimitTokenCountFilter(TokenStream,int,boolean) consumeAllTokens"/> 
+	/// option.
+	/// </para>
+	/// </summary>
+	public sealed class LimitTokenCountFilter : TokenFilter
+	{
+
+	  private readonly int maxTokenCount;
+	  private readonly bool consumeAllTokens;
+	  private int tokenCount = 0;
+	  private bool exhausted = false;
+
+	  /// <summary>
+	  /// Build a filter that only accepts tokens up to a maximum number.
+	  /// This filter will not consume any tokens beyond the maxTokenCount limit
+	  /// </summary>
+	  /// <seealso cref= #LimitTokenCountFilter(TokenStream,int,boolean) </seealso>
+	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount) : this(@in, maxTokenCount, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Build an filter that limits the maximum number of tokens per field. </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenCount"> max number of tokens to produce </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the input must be consumed even if maxTokenCount is reached. </param>
+	  public LimitTokenCountFilter(TokenStream @in, int maxTokenCount, bool consumeAllTokens) : base(@in)
+	  {
+		if (maxTokenCount < 1)
+		{
+		  throw new System.ArgumentException("maxTokenCount must be greater than zero");
+		}
+		this.maxTokenCount = maxTokenCount;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (exhausted)
+		{
+		  return false;
+		}
+		else if (tokenCount < maxTokenCount)
+		{
+		  if (input.incrementToken())
+		  {
+			tokenCount++;
+			return true;
+		  }
+		  else
+		  {
+			exhausted = true;
+			return false;
+		  }
+		}
+		else
+		{
+		  while (consumeAllTokens && input.incrementToken()) // NOOP
+		  {
+		  }
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		tokenCount = 0;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
new file mode 100644
index 0000000..ac55037
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenCountFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LimitTokenCountFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_lngthcnt" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10" consumeAllTokens="false" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// <para>
+	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+	/// See <seealso cref="LimitTokenCountFilter"/> for an explanation of it's use.
+	/// </para>
+	/// </summary>
+	public class LimitTokenCountFilterFactory : TokenFilterFactory
+	{
+
+	  public const string MAX_TOKEN_COUNT_KEY = "maxTokenCount";
+	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+	  internal readonly int maxTokenCount;
+	  internal readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Creates a new LimitTokenCountFilterFactory </summary>
+	  public LimitTokenCountFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxTokenCount = requireInt(args, MAX_TOKEN_COUNT_KEY);
+		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new LimitTokenCountFilter(input, maxTokenCount, consumeAllTokens);
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
new file mode 100644
index 0000000..931e492
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilter.cs
@@ -0,0 +1,116 @@
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// This TokenFilter limits its emitted tokens to those with positions that
+	/// are not greater than the configured limit.
+	/// <para>
+	/// By default, this filter ignores any tokens in the wrapped {@code TokenStream}
+	/// once the limit has been exceeded, which can result in {@code reset()} being 
+	/// called prior to {@code incrementToken()} returning {@code false}.  For most 
+	/// {@code TokenStream} implementations this should be acceptable, and faster 
+	/// then consuming the full stream. If you are wrapping a {@code TokenStream}
+	/// which requires that the full stream of tokens be exhausted in order to 
+	/// function properly, use the 
+	/// <seealso cref="#LimitTokenPositionFilter(TokenStream,int,boolean) consumeAllTokens"/>
+	/// option.
+	/// </para>
+	/// </summary>
+	public sealed class LimitTokenPositionFilter : TokenFilter
+	{
+
+	  private readonly int maxTokenPosition;
+	  private readonly bool consumeAllTokens;
+	  private int tokenPosition = 0;
+	  private bool exhausted = false;
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  /// <summary>
+	  /// Build a filter that only accepts tokens up to and including the given maximum position.
+	  /// This filter will not consume any tokens with position greater than the maxTokenPosition limit.
+	  /// </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1)
+	  /// </param>
+	  /// <seealso cref= #LimitTokenPositionFilter(TokenStream,int,boolean) </seealso>
+	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition) : this(@in, maxTokenPosition, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Build a filter that limits the maximum position of tokens to emit.
+	  /// </summary>
+	  /// <param name="in"> the stream to wrap </param>
+	  /// <param name="maxTokenPosition"> max position of tokens to produce (1st token always has position 1) </param>
+	  /// <param name="consumeAllTokens"> whether all tokens from the wrapped input stream must be consumed
+	  ///                         even if maxTokenPosition is exceeded. </param>
+	  public LimitTokenPositionFilter(TokenStream @in, int maxTokenPosition, bool consumeAllTokens) : base(@in)
+	  {
+		if (maxTokenPosition < 1)
+		{
+		  throw new System.ArgumentException("maxTokenPosition must be greater than zero");
+		}
+		this.maxTokenPosition = maxTokenPosition;
+		this.consumeAllTokens = consumeAllTokens;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (exhausted)
+		{
+		  return false;
+		}
+		if (input.incrementToken())
+		{
+		  tokenPosition += posIncAtt.PositionIncrement;
+		  if (tokenPosition <= maxTokenPosition)
+		  {
+			return true;
+		  }
+		  else
+		  {
+			while (consumeAllTokens && input.incrementToken()) // NOOP
+			{
+			}
+			exhausted = true;
+			return false;
+		  }
+		}
+		else
+		{
+		  exhausted = true;
+		  return false;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		tokenPosition = 0;
+		exhausted = false;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
new file mode 100644
index 0000000..69877e8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LimitTokenPositionFilterFactory.cs
@@ -0,0 +1,66 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.miscellaneous
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="LimitTokenPositionFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_limit_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LimitTokenPositionFilterFactory" maxTokenPosition="3" consumeAllTokens="false" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// <para>
+	/// The {@code consumeAllTokens} property is optional and defaults to {@code false}.  
+	/// See <seealso cref="LimitTokenPositionFilter"/> for an explanation of its use.
+	/// </para>
+	/// </summary>
+	public class LimitTokenPositionFilterFactory : TokenFilterFactory
+	{
+
+	  public const string MAX_TOKEN_POSITION_KEY = "maxTokenPosition";
+	  public const string CONSUME_ALL_TOKENS_KEY = "consumeAllTokens";
+	  internal readonly int maxTokenPosition;
+	  internal readonly bool consumeAllTokens;
+
+	  /// <summary>
+	  /// Creates a new LimitTokenPositionFilterFactory </summary>
+	  public LimitTokenPositionFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		maxTokenPosition = requireInt(args, MAX_TOKEN_POSITION_KEY);
+		consumeAllTokens = getBoolean(args, CONSUME_ALL_TOKENS_KEY, false);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new LimitTokenPositionFilter(input, maxTokenPosition, consumeAllTokens);
+	  }
+
+	}
+
+}
\ No newline at end of file


Mime
View raw message