lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [12/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:16 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
new file mode 100644
index 0000000..c6506ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Nl/DutchStemmer.cs
@@ -0,0 +1,477 @@
+using System;
+using System.Collections;
+using System.Text;
+
+namespace org.apache.lucene.analysis.nl
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	/// <summary>
+	/// A stemmer for Dutch words. 
+	/// <para>
+	/// The algorithm is an implementation of
+	/// the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+	/// algorithm in Martin Porter's snowball project.
+	/// </para> </summary>
+	/// @deprecated (3.1) Use <seealso cref="org.tartarus.snowball.ext.DutchStemmer"/> instead, 
+	/// which has the same functionality. This filter will be removed in Lucene 5.0 
+	[Obsolete("(3.1) Use <seealso cref="org.tartarus.snowball.ext.DutchStemmer"/> instead,")]
+	public class DutchStemmer
+	{
+	  private static readonly Locale locale = new Locale("nl", "NL");
+
+	  /// <summary>
+	  /// Buffer for the terms while stemming them.
+	  /// </summary>
+	  private StringBuilder sb = new StringBuilder();
+	  private bool _removedE;
+	  private IDictionary _stemDict;
+
+	  private int _R1;
+	  private int _R2;
+
+	  //TODO convert to internal
+	  /*
+	   * Stems the given term to an unique <tt>discriminator</tt>.
+	   *
+	   * @param term The term that should be stemmed.
+	   * @return Discriminator for <tt>term</tt>
+	   */
+	  public virtual string stem(string term)
+	  {
+		term = term.ToLower(locale);
+		if (!isStemmable(term))
+		{
+		  return term;
+		}
+		if (_stemDict != null && _stemDict.Contains(term))
+		{
+		  if (_stemDict[term] is string)
+		  {
+			return (string) _stemDict[term];
+		  }
+		  else
+		  {
+			return null;
+		  }
+		}
+
+		// Reset the StringBuilder.
+		sb.Remove(0, sb.Length);
+		sb.Insert(0, term);
+		// Stemming starts here...
+		substitute(sb);
+		storeYandI(sb);
+		_R1 = getRIndex(sb, 0);
+		_R1 = Math.Max(3, _R1);
+		step1(sb);
+		step2(sb);
+		_R2 = getRIndex(sb, _R1);
+		step3a(sb);
+		step3b(sb);
+		step4(sb);
+		reStoreYandI(sb);
+		return sb.ToString();
+	  }
+
+	  private bool enEnding(StringBuilder sb)
+	  {
+		string[] enend = new string[]{"ene", "en"};
+		for (int i = 0; i < enend.Length; i++)
+		{
+		  string end = enend[i];
+		  string s = sb.ToString();
+		  int index = s.Length - end.Length;
+		  if (s.EndsWith(end, StringComparison.Ordinal) && index >= _R1 && isValidEnEnding(sb, index - 1))
+		  {
+			sb.Remove(index, index + end.Length - index);
+			unDouble(sb, index);
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+
+	  private void step1(StringBuilder sb)
+	  {
+		if (_R1 >= sb.Length)
+		{
+		  return;
+		}
+
+		string s = sb.ToString();
+		int lengthR1 = sb.Length - _R1;
+		int index;
+
+		if (s.EndsWith("heden", StringComparison.Ordinal))
+		{
+		  sb.Remove(_R1, lengthR1 + _R1 - _R1).Insert(_R1, sb.Substring(_R1, lengthR1).replaceAll("heden", "heid"));
+		  return;
+		}
+
+		if (enEnding(sb))
+		{
+		  return;
+		}
+
+		if (s.EndsWith("se", StringComparison.Ordinal) && (index = s.Length - 2) >= _R1 && isValidSEnding(sb, index - 1))
+		{
+		  sb.Remove(index, index + 2 - index);
+		  return;
+		}
+		if (s.EndsWith("s", StringComparison.Ordinal) && (index = s.Length - 1) >= _R1 && isValidSEnding(sb, index - 1))
+		{
+		  sb.Remove(index, index + 1 - index);
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete suffix e if in R1 and
+	  /// preceded by a non-vowel, and then undouble the ending
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step2(StringBuilder sb)
+	  {
+		_removedE = false;
+		if (_R1 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = s.Length - 1;
+		if (index >= _R1 && s.EndsWith("e", StringComparison.Ordinal) && !isVowel(sb[index - 1]))
+		{
+		  sb.Remove(index, index + 1 - index);
+		  unDouble(sb);
+		  _removedE = true;
+		}
+	  }
+
+	  /// <summary>
+	  /// Delete "heid"
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step3a(StringBuilder sb)
+	  {
+		if (_R2 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = s.Length - 4;
+		if (s.EndsWith("heid", StringComparison.Ordinal) && index >= _R2 && sb[index - 1] != 'c')
+		{
+		  sb.Remove(index, index + 4 - index); //remove heid
+		  enEnding(sb);
+		}
+	  }
+
+	  /// <summary>
+	  /// <para>A d-suffix, or derivational suffix, enables a new word,
+	  /// often with a different grammatical category, or with a different
+	  /// sense, to be built from another word. Whether a d-suffix can be
+	  /// attached is discovered not from the rules of grammar, but by
+	  /// referring to a dictionary. So in English, ness can be added to
+	  /// certain adjectives to form corresponding nouns (littleness,
+	  /// kindness, foolishness ...) but not to all adjectives
+	  /// (not for example, to big, cruel, wise ...) d-suffixes can be
+	  /// used to change meaning, often in rather exotic ways.</para>
+	  /// Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step3b(StringBuilder sb)
+	  {
+		if (_R2 >= sb.Length)
+		{
+		  return;
+		}
+		string s = sb.ToString();
+		int index = 0;
+
+		if ((s.EndsWith("end", StringComparison.Ordinal) || s.EndsWith("ing", StringComparison.Ordinal)) && (index = s.Length - 3) >= _R2)
+		{
+		  sb.Remove(index, index + 3 - index);
+		  if (sb[index - 2] == 'i' && sb[index - 1] == 'g')
+		  {
+			if (sb[index - 3] != 'e' & index - 2 >= _R2)
+			{
+			  index -= 2;
+			  sb.Remove(index, index + 2 - index);
+			}
+		  }
+		  else
+		  {
+			unDouble(sb, index);
+		  }
+		  return;
+		}
+		if (s.EndsWith("ig", StringComparison.Ordinal) && (index = s.Length - 2) >= _R2)
+		{
+		  if (sb[index - 1] != 'e')
+		  {
+			sb.Remove(index, index + 2 - index);
+		  }
+		  return;
+		}
+		if (s.EndsWith("lijk", StringComparison.Ordinal) && (index = s.Length - 4) >= _R2)
+		{
+		  sb.Remove(index, index + 4 - index);
+		  step2(sb);
+		  return;
+		}
+		if (s.EndsWith("baar", StringComparison.Ordinal) && (index = s.Length - 4) >= _R2)
+		{
+		  sb.Remove(index, index + 4 - index);
+		  return;
+		}
+		if (s.EndsWith("bar", StringComparison.Ordinal) && (index = s.Length - 3) >= _R2)
+		{
+		  if (_removedE)
+		  {
+			sb.Remove(index, index + 3 - index);
+		  }
+		  return;
+		}
+	  }
+
+	  /// <summary>
+	  /// undouble vowel
+	  /// If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+	  /// </summary>
+	  /// <param name="sb"> String being stemmed </param>
+	  private void step4(StringBuilder sb)
+	  {
+		if (sb.Length < 4)
+		{
+		  return;
+		}
+		string end = StringHelperClass.SubstringSpecial(sb, sb.Length - 4, sb.Length);
+		char c = end[0];
+		char v1 = end[1];
+		char v2 = end[2];
+		char d = end[3];
+		if (v1 == v2 && d != 'I' && v1 != 'i' && isVowel(v1) && !isVowel(d) && !isVowel(c))
+		{
+		  sb.Remove(sb.Length - 2, sb.Length - 1 - sb.Length - 2);
+		}
+	  }
+
+	  /// <summary>
+	  /// Checks if a term could be stemmed.
+	  /// </summary>
+	  /// <returns> true if, and only if, the given term consists in letters. </returns>
+	  private bool isStemmable(string term)
+	  {
+		for (int c = 0; c < term.Length; c++)
+		{
+		  if (!char.IsLetter(term[c]))
+		  {
+			  return false;
+		  }
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+	  /// </summary>
+	  private void substitute(StringBuilder buffer)
+	  {
+		for (int i = 0; i < buffer.Length; i++)
+		{
+		  switch (buffer[i])
+		  {
+			case 'ä':
+			case 'á':
+			{
+				buffer[i] = 'a';
+				break;
+			}
+			case 'ë':
+			case 'é':
+			{
+				buffer[i] = 'e';
+				break;
+			}
+			case 'ü':
+			case 'ú':
+			{
+				buffer[i] = 'u';
+				break;
+			}
+			case 'ï':
+			case 'i':
+			{
+				buffer[i] = 'i';
+				break;
+			}
+			case 'ö':
+			case 'ó':
+			{
+				buffer[i] = 'o';
+				break;
+			}
+		  }
+		}
+	  }
+
+	  /*private boolean isValidSEnding(StringBuilder sb) {
+	    return isValidSEnding(sb, sb.length() - 1);
+	  }*/
+
+	  private bool isValidSEnding(StringBuilder sb, int index)
+	  {
+		char c = sb[index];
+		if (isVowel(c) || c == 'j')
+		{
+		  return false;
+		}
+		return true;
+	  }
+
+	  /*private boolean isValidEnEnding(StringBuilder sb) {
+	    return isValidEnEnding(sb, sb.length() - 1);
+	  }*/
+
+	  private bool isValidEnEnding(StringBuilder sb, int index)
+	  {
+		char c = sb[index];
+		if (isVowel(c))
+		{
+		  return false;
+		}
+		if (c < 3)
+		{
+		  return false;
+		}
+		// ends with "gem"?
+		if (c == 'm' && sb[index - 2] == 'g' && sb[index - 1] == 'e')
+		{
+		  return false;
+		}
+		return true;
+	  }
+
+	  private void unDouble(StringBuilder sb)
+	  {
+		unDouble(sb, sb.Length);
+	  }
+
+	  private void unDouble(StringBuilder sb, int endIndex)
+	  {
+		string s = sb.Substring(0, endIndex);
+		if (s.EndsWith("kk", StringComparison.Ordinal) || s.EndsWith("tt", StringComparison.Ordinal) || s.EndsWith("dd", StringComparison.Ordinal) || s.EndsWith("nn", StringComparison.Ordinal) || s.EndsWith("mm", StringComparison.Ordinal) || s.EndsWith("ff", StringComparison.Ordinal))
+		{
+		  sb.Remove(endIndex - 1, endIndex - endIndex - 1);
+		}
+	  }
+
+	  private int getRIndex(StringBuilder sb, int start)
+	  {
+		if (start == 0)
+		{
+		  start = 1;
+		}
+		int i = start;
+		for (; i < sb.Length; i++)
+		{
+		  //first non-vowel preceded by a vowel
+		  if (!isVowel(sb[i]) && isVowel(sb[i - 1]))
+		  {
+			return i + 1;
+		  }
+		}
+		return i + 1;
+	  }
+
+	  private void storeYandI(StringBuilder sb)
+	  {
+		if (sb[0] == 'y')
+		{
+		  sb[0] = 'Y';
+		}
+
+		int last = sb.Length - 1;
+
+		for (int i = 1; i < last; i++)
+		{
+		  switch (sb[i])
+		  {
+			case 'i':
+			{
+				if (isVowel(sb[i - 1]) && isVowel(sb[i + 1]))
+				{
+				  sb[i] = 'I';
+				}
+				break;
+			}
+			case 'y':
+			{
+				if (isVowel(sb[i - 1]))
+				{
+				  sb[i] = 'Y';
+				}
+				break;
+			}
+		  }
+		}
+		if (last > 0 && sb[last] == 'y' && isVowel(sb[last - 1]))
+		{
+		  sb[last] = 'Y';
+		}
+	  }
+
+	  private void reStoreYandI(StringBuilder sb)
+	  {
+		string tmp = sb.ToString();
+		sb.Remove(0, sb.Length);
+		sb.Insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
+	  }
+
+	  private bool isVowel(char c)
+	  {
+		switch (c)
+		{
+		  case 'e':
+		  case 'a':
+		  case 'o':
+		  case 'i':
+		  case 'u':
+		  case 'y':
+		  case 'è':
+		  {
+			  return true;
+		  }
+		}
+		return false;
+	  }
+
+	  internal virtual IDictionary StemDictionary
+	  {
+		  set
+		  {
+			_stemDict = value;
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
new file mode 100644
index 0000000..5cc9abf
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianAnalyzer.cs
@@ -0,0 +1,140 @@
+using System;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using NorwegianStemmer = org.tartarus.snowball.ext.NorwegianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Norwegian.
+	/// </summary>
+	public sealed class NorwegianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Norwegian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public NorwegianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public NorwegianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new NorwegianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
new file mode 100644
index 0000000..bf10424
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilter.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="NorwegianLightStemmer"/> to stem Norwegian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class NorwegianLightStemFilter : TokenFilter
+	{
+	  private readonly NorwegianLightStemmer stemmer;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Calls {@link #NorwegianLightStemFilter(TokenStream, int) 
+	  /// NorwegianLightStemFilter(input, BOKMAAL)}
+	  /// </summary>
+	  public NorwegianLightStemFilter(TokenStream input) : this(input, NorwegianLightStemmer.BOKMAAL)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilter </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianLightStemFilter(TokenStream input, int flags) : base(input)
+	  {
+		stemmer = new NorwegianLightStemmer(flags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
new file mode 100644
index 0000000..ad94de5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemFilterFactory.cs
@@ -0,0 +1,79 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Factory for <seealso cref="NorwegianLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NorwegianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  private readonly int flags;
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilterFactory </summary>
+	  public NorwegianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		string variant = get(args, "variant");
+		if (variant == null || "nb".Equals(variant))
+		{
+		  flags = BOKMAAL;
+		}
+		else if ("nn".Equals(variant))
+		{
+		  flags = NYNORSK;
+		}
+		else if ("no".Equals(variant))
+		{
+		  flags = BOKMAAL | NYNORSK;
+		}
+		else
+		{
+		  throw new System.ArgumentException("invalid variant: " + variant);
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new NorwegianLightStemFilter(input, flags);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
new file mode 100644
index 0000000..5efa799
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianLightStemmer.cs
@@ -0,0 +1,158 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Norwegian.
+	/// <para>
+	/// Parts of this stemmer is adapted from SwedishLightStemFilter, except
+	/// that while the Swedish one has a pre-defined rule set and a corresponding
+	/// corpus to validate against whereas the Norwegian one is hand crafted.
+	/// </para>
+	/// </summary>
+	public class NorwegianLightStemmer
+	{
+	  /// <summary>
+	  /// Constant to remove Bokmål-specific endings </summary>
+	  public const int BOKMAAL = 1;
+	  /// <summary>
+	  /// Constant to remove Nynorsk-specific endings </summary>
+	  public const int NYNORSK = 2;
+
+	  internal readonly bool useBokmaal;
+	  internal readonly bool useNynorsk;
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemmer </summary>
+	  /// <param name="flags"> set to <seealso cref="#BOKMAAL"/>, <seealso cref="#NYNORSK"/>, or both. </param>
+	  public NorwegianLightStemmer(int flags)
+	  {
+		if (flags <= 0 || flags > BOKMAAL + NYNORSK)
+		{
+		  throw new System.ArgumentException("invalid flags");
+		}
+		useBokmaal = (flags & BOKMAAL) != 0;
+		useNynorsk = (flags & NYNORSK) != 0;
+	  }
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		// Remove posessive -s (bilens -> bilen) and continue checking 
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		// Remove common endings, single-pass
+		if (len > 7 && ((StemmerUtil.EndsWith(s, len, "heter") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heten") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "heita") && useNynorsk))) // general ending (hemmeleg-heita -> hemmeleg) -  general ending (hemmelig-heten -> hemmelig) -  general ending (hemmelig-heter -> hemmelig)
+		{
+		  return len - 5;
+		}
+
+		// Remove Nynorsk common endings, single-pass
+		if (len > 8 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heiter") || StemmerUtil.EndsWith(s, len, "leiken") || StemmerUtil.EndsWith(s, len, "leikar"))) // general ending (trygg-leikar -> trygg) -  general ending (trygg-leiken -> trygg) -  general ending (hemmeleg-heiter -> hemmeleg)
+		{
+		  return len - 6;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "dom") || (StemmerUtil.EndsWith(s, len, "het") && useBokmaal))) // general ending (hemmelig-het -> hemmelig) -  general ending (kristen-dom -> kristen)
+		{
+		  return len - 3;
+		}
+
+		if (len > 6 && useNynorsk && (StemmerUtil.EndsWith(s, len, "heit") || StemmerUtil.EndsWith(s, len, "semd") || StemmerUtil.EndsWith(s, len, "leik"))) // general ending (trygg-leik -> trygg) -  general ending (verk-semd -> verk) -  general ending (hemmeleg-heit -> hemmeleg)
+		{
+		  return len - 4;
+		}
+
+		if (len > 7 && (StemmerUtil.EndsWith(s, len, "elser") || StemmerUtil.EndsWith(s, len, "elsen"))) // general ending (føl-elsen -> føl) -  general ending (føl-elser -> føl)
+		{
+		  return len - 5;
+		}
+
+		if (len > 6 && ((StemmerUtil.EndsWith(s, len, "ende") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ande") && useNynorsk) || StemmerUtil.EndsWith(s, len, "else") || (StemmerUtil.EndsWith(s, len, "este") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aste") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "eren") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "aren") && useNynorsk))) // masc -  masc -  adj (fin-aste -> fin) -  adj (fin-este -> fin) -  general ending (føl-else -> føl) -  (sov-ande -> sov) -  (sov-ende -> sov)
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && ((StemmerUtil.EndsWith(s, len, "ere") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "are") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "est") && useBokmaal) || (StemmerUtil.EndsWith(s, len, "ast") && useNynorsk) || StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) -  masc/fem/neutr pl definite (hus-ene) -  adj (fin-ast -> fin) -  adj (fin-est -> fin) -  adj (fin-are -> fin) -  adj (fin-ere -> fin)
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk) || (StemmerUtil.EndsWith(s, len, "st") && useBokmaal) || StemmerUtil.EndsWith(s, len, "te"))) // adj (billig-st -> billig) -  masc pl indefinite -  neutr definite -  masc/fem definite -  masc/fem indefinite
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a': // fem definite
+			case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak)
+			case 'n':
+			  return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
new file mode 100644
index 0000000..670446b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilter.cs
@@ -0,0 +1,79 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="NorwegianMinimalStemmer"/> to stem Norwegian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class NorwegianMinimalStemFilter : TokenFilter
+	{
+	  private readonly NorwegianMinimalStemmer stemmer;
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  /// <summary>
+	  /// Calls {@link #NorwegianMinimalStemFilter(TokenStream, int) 
+	  /// NorwegianMinimalStemFilter(input, BOKMAAL)}
+	  /// </summary>
+	  public NorwegianMinimalStemFilter(TokenStream input) : this(input, NorwegianLightStemmer.BOKMAAL)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new NorwegianLightStemFilter </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianMinimalStemFilter(TokenStream input, int flags) : base(input)
+	  {
+		this.stemmer = new NorwegianMinimalStemmer(flags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
new file mode 100644
index 0000000..1f629bc
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemFilterFactory.cs
@@ -0,0 +1,79 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Factory for <seealso cref="NorwegianMinimalStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class NorwegianMinimalStemFilterFactory : TokenFilterFactory
+	{
+
+	  private readonly int flags;
+
+	  /// <summary>
+	  /// Creates a new NorwegianMinimalStemFilterFactory </summary>
+	  public NorwegianMinimalStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		string variant = get(args, "variant");
+		if (variant == null || "nb".Equals(variant))
+		{
+		  flags = BOKMAAL;
+		}
+		else if ("nn".Equals(variant))
+		{
+		  flags = NYNORSK;
+		}
+		else if ("no".Equals(variant))
+		{
+		  flags = BOKMAAL | NYNORSK;
+		}
+		else
+		{
+		  throw new System.ArgumentException("invalid variant: " + variant);
+		}
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new NorwegianMinimalStemFilter(input, flags);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
new file mode 100644
index 0000000..6a4a94a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/No/NorwegianMinimalStemmer.cs
@@ -0,0 +1,121 @@
+namespace org.apache.lucene.analysis.no
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
+	/// <summary>
+	/// Minimal Stemmer for Norwegian Bokmål (no-nb) and Nynorsk (no-nn)
+	/// <para>
+	/// Stems known plural forms for Norwegian nouns only, together with genitiv -s
+	/// </para>
+	/// </summary>
+	public class NorwegianMinimalStemmer
+	{
+	  internal readonly bool useBokmaal;
+	  internal readonly bool useNynorsk;
+
+	  /// <summary>
+	  /// Creates a new NorwegianMinimalStemmer </summary>
+	  /// <param name="flags"> set to <seealso cref="NorwegianLightStemmer#BOKMAAL"/>, 
+	  ///                     <seealso cref="NorwegianLightStemmer#NYNORSK"/>, or both. </param>
+	  public NorwegianMinimalStemmer(int flags)
+	  {
+		if (flags <= 0 || flags > BOKMAAL + NYNORSK)
+		{
+		  throw new System.ArgumentException("invalid flags");
+		}
+		useBokmaal = (flags & BOKMAAL) != 0;
+		useNynorsk = (flags & NYNORSK) != 0;
+	  }
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		// Remove genitiv s
+		if (len > 4 && s[len - 1] == 's')
+		{
+		  len--;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "ene") || (StemmerUtil.EndsWith(s, len, "ane") && useNynorsk))) // masc pl definite (gut-ane) -  masc/fem/neutr pl definite (hus-ene)
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "er") || StemmerUtil.EndsWith(s, len, "en") || StemmerUtil.EndsWith(s, len, "et") || (StemmerUtil.EndsWith(s, len, "ar") && useNynorsk))) // masc pl indefinite -  neutr definite -  masc/fem definite -  masc/fem indefinite
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a': // fem definite
+			case 'e': // to get correct stem for nouns ending in -e (kake -> kak, kaker -> kak)
+			  return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
new file mode 100644
index 0000000..b826cd6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
@@ -0,0 +1,242 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.path
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Tokenizer for path-like hierarchies.
+	/// <para>
+	/// Take something like:
+	/// 
+	/// <pre>
+	///  /something/something/else
+	/// </pre>
+	/// 
+	/// and make:
+	/// 
+	/// <pre>
+	///  /something
+	///  /something/something
+	///  /something/something/else
+	/// </pre>
+	/// </para>
+	/// </summary>
+	public class PathHierarchyTokenizer : Tokenizer
+	{
+
+	  public PathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
+	  {
+		if (bufferSize < 0)
+		{
+		  throw new System.ArgumentException("bufferSize cannot be negative");
+		}
+		if (skip < 0)
+		{
+		  throw new System.ArgumentException("skip cannot be negative");
+		}
+		termAtt.resizeBuffer(bufferSize);
+
+		this.delimiter = delimiter;
+		this.replacement = replacement;
+		this.skip = skip;
+		resultToken = new StringBuilder(bufferSize);
+	  }
+
+	  private const int DEFAULT_BUFFER_SIZE = 1024;
+	  public const char DEFAULT_DELIMITER = '/';
+	  public const int DEFAULT_SKIP = 0;
+
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly int skip;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private int startPosition = 0;
+	  private int skipped = 0;
+	  private bool endDelimiter = false;
+	  private StringBuilder resultToken;
+
+	  private int charsRead = 0;
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		termAtt.append(resultToken);
+		if (resultToken.Length == 0)
+		{
+		  posAtt.PositionIncrement = 1;
+		}
+		else
+		{
+		  posAtt.PositionIncrement = 0;
+		}
+		int length = 0;
+		bool added = false;
+		if (endDelimiter)
+		{
+		  termAtt.append(replacement);
+		  length++;
+		  endDelimiter = false;
+		  added = true;
+		}
+
+		while (true)
+		{
+		  int c = input.read();
+		  if (c >= 0)
+		  {
+			charsRead++;
+		  }
+		  else
+		  {
+			if (skipped > skip)
+			{
+			  length += resultToken.Length;
+			  termAtt.Length = length;
+			   offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
+			  if (added)
+			  {
+				resultToken.Length = 0;
+				resultToken.Append(termAtt.buffer(), 0, length);
+			  }
+			  return added;
+			}
+			else
+			{
+			  return false;
+			}
+		  }
+		  if (!added)
+		  {
+			added = true;
+			skipped++;
+			if (skipped > skip)
+			{
+			  termAtt.append(c == delimiter ? replacement : (char)c);
+			  length++;
+			}
+			else
+			{
+			  startPosition++;
+			}
+		  }
+		  else
+		  {
+			if (c == delimiter)
+			{
+			  if (skipped > skip)
+			  {
+				endDelimiter = true;
+				break;
+			  }
+			  skipped++;
+			  if (skipped > skip)
+			  {
+				termAtt.append(replacement);
+				length++;
+			  }
+			  else
+			  {
+				startPosition++;
+			  }
+			}
+			else
+			{
+			  if (skipped > skip)
+			  {
+				termAtt.append((char)c);
+				length++;
+			  }
+			  else
+			  {
+				startPosition++;
+			  }
+			}
+		  }
+		}
+		length += resultToken.Length;
+		termAtt.Length = length;
+		offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
+		resultToken.Length = 0;
+		resultToken.Append(termAtt.buffer(), 0, length);
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		int finalOffset = correctOffset(charsRead);
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		resultToken.Length = 0;
+		charsRead = 0;
+		endDelimiter = false;
+		skipped = 0;
+		startPosition = 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
new file mode 100644
index 0000000..7dd1e62
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
@@ -0,0 +1,105 @@
+using System.Collections.Generic;
+using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+
+namespace org.apache.lucene.analysis.path
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PathHierarchyTokenizer"/>. 
+	/// <para>
+	/// This factory is typically configured for use only in the <code>index</code> 
+	/// Analyzer (or only in the <code>query</code> Analyzer, but never both).
+	/// </para>
+	/// <para>
+	/// For example, in the configuration below a query for 
+	/// <code>Books/NonFic</code> will match documents indexed with values like 
+	/// <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>, 
+	/// <code>Books/NonFic/Science/Physics</code>, etc. But it will not match 
+	/// documents indexed with values like <code>Books</code>, or 
+	/// <code>Books/Fic</code>...
+	/// </para>
+	/// 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
+	///   &lt;analyzer type="index"&gt;
+	///     &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
+	///   &lt;/analyzer&gt;
+	///   &lt;analyzer type="query"&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;
+	/// </pre>
+	/// <para>
+	/// In this example however we see the oposite configuration, so that a query 
+	/// for <code>Books/NonFic/Science/Physics</code> would match documents 
+	/// containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>, 
+	/// or <code>Books/NonFic/Science/Physics</code>, but not 
+	/// <code>Books/NonFic/Science/Physics/Theory</code> or 
+	/// <code>Books/NonFic/Law</code>.
+	/// </para>
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="descendent_path" class="solr.TextField"&gt;
+	///   &lt;analyzer type="index"&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory" /&gt;
+	///   &lt;/analyzer&gt;
+	///   &lt;analyzer type="query"&gt;
+	///     &lt;tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;
+	/// </pre>
+	/// </summary>
+	public class PathHierarchyTokenizerFactory : TokenizerFactory
+	{
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly bool reverse;
+	  private readonly int skip;
+
+	  /// <summary>
+	  /// Creates a new PathHierarchyTokenizerFactory </summary>
+	  public PathHierarchyTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
+		replacement = getChar(args, "replace", delimiter);
+		reverse = getBoolean(args, "reverse", false);
+		skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override Tokenizer create(AttributeFactory factory, Reader input)
+	  {
+		if (reverse)
+		{
+		  return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+		}
+		return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+	  }
+	}
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
new file mode 100644
index 0000000..00b5880
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
@@ -0,0 +1,214 @@
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.path
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+	/// <summary>
+	/// Tokenizer for domain-like hierarchies.
+	/// <para>
+	/// Take something like:
+	/// 
+	/// <pre>
+	/// www.site.co.uk
+	/// </pre>
+	/// 
+	/// and make:
+	/// 
+	/// <pre>
+	/// www.site.co.uk
+	/// site.co.uk
+	/// co.uk
+	/// uk
+	/// </pre>
+	/// 
+	/// </para>
+	/// </summary>
+	public class ReversePathHierarchyTokenizer : Tokenizer
+	{
+
+	  public ReversePathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+	  {
+	  }
+
+	  public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+	  {
+	  }
+	  public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
+	  {
+		if (bufferSize < 0)
+		{
+		  throw new System.ArgumentException("bufferSize cannot be negative");
+		}
+		if (skip < 0)
+		{
+		  throw new System.ArgumentException("skip cannot be negative");
+		}
+		termAtt.resizeBuffer(bufferSize);
+		this.delimiter = delimiter;
+		this.replacement = replacement;
+		this.skip = skip;
+		resultToken = new StringBuilder(bufferSize);
+		resultTokenBuffer = new char[bufferSize];
+		delimiterPositions = new List<>(bufferSize / 10);
+	  }
+
+	  private const int DEFAULT_BUFFER_SIZE = 1024;
+	  public const char DEFAULT_DELIMITER = '/';
+	  public const int DEFAULT_SKIP = 0;
+
+	  private readonly char delimiter;
+	  private readonly char replacement;
+	  private readonly int skip;
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+	  private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
+
+	  private int endPosition = 0;
+	  private int finalOffset = 0;
+	  private int skipped = 0;
+	  private StringBuilder resultToken;
+
+	  private IList<int?> delimiterPositions;
+	  private int delimitersCount = -1;
+	  private char[] resultTokenBuffer;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		clearAttributes();
+		if (delimitersCount == -1)
+		{
+		  int length = 0;
+		  delimiterPositions.Add(0);
+		  while (true)
+		  {
+			int c = input.read();
+			if (c < 0)
+			{
+			  break;
+			}
+			length++;
+			if (c == delimiter)
+			{
+			  delimiterPositions.Add(length);
+			  resultToken.Append(replacement);
+			}
+			else
+			{
+			  resultToken.Append((char)c);
+			}
+		  }
+		  delimitersCount = delimiterPositions.Count;
+		  if (delimiterPositions[delimitersCount - 1] < length)
+		  {
+			delimiterPositions.Add(length);
+			delimitersCount++;
+		  }
+		  if (resultTokenBuffer.Length < resultToken.Length)
+		  {
+			resultTokenBuffer = new char[resultToken.Length];
+		  }
+		  resultToken.getChars(0, resultToken.Length, resultTokenBuffer, 0);
+		  resultToken.Length = 0;
+		  int idx = delimitersCount - 1 - skip;
+		  if (idx >= 0)
+		  {
+			// otherwise its ok, because we will skip and return false
+			endPosition = delimiterPositions[idx];
+		  }
+		  finalOffset = correctOffset(length);
+		  posAtt.PositionIncrement = 1;
+		}
+		else
+		{
+		  posAtt.PositionIncrement = 0;
+		}
+
+		while (skipped < delimitersCount - skip - 1)
+		{
+		  int start = delimiterPositions[skipped];
+		  termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
+		  offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
+		  skipped++;
+		  return true;
+		}
+
+		return false;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
+	  public override void end()
+	  {
+		base.end();
+		// set final offset
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		resultToken.Length = 0;
+		finalOffset = 0;
+		endPosition = 0;
+		skipped = 0;
+		delimitersCount = -1;
+		delimiterPositions.Clear();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
new file mode 100644
index 0000000..5b47526
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupFilterFactory.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternCaptureGroupTokenFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptncapturegroup" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.PatternCaptureGroupFilterFactory" pattern="([^a-z])" preserve_original="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	/// <seealso cref= PatternCaptureGroupTokenFilter </seealso>
+	public class PatternCaptureGroupFilterFactory : TokenFilterFactory
+	{
+	  private Pattern pattern;
+	  private bool preserveOriginal = true;
+
+	  public PatternCaptureGroupFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, "pattern");
+		preserveOriginal = args.ContainsKey("preserve_original") ? bool.Parse(args["preserve_original"]) : true;
+	  }
+	  public override PatternCaptureGroupTokenFilter create(TokenStream input)
+	  {
+		return new PatternCaptureGroupTokenFilter(input, preserveOriginal, pattern);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
new file mode 100644
index 0000000..887b749
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternCaptureGroupTokenFilter.cs
@@ -0,0 +1,227 @@
+using System.Diagnostics;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// CaptureGroup uses Java regexes to emit multiple tokens - one for each capture
+	/// group in one or more patterns.
+	/// 
+	/// <para>
+	/// For example, a pattern like:
+	/// </para>
+	/// 
+	/// <para>
+	/// <code>"(https?://([a-zA-Z\-_0-9.]+))"</code>
+	/// </para>
+	/// 
+	/// <para>
+	/// when matched against the string "http://www.foo.com/index" would return the
+	/// tokens "https://www.foo.com" and "www.foo.com".
+	/// </para>
+	/// 
+	/// <para>
+	/// If none of the patterns match, or if preserveOriginal is true, the original
+	/// token will be preserved.
+	/// </para>
+	/// <para>
+	/// Each pattern is matched as often as it can be, so the pattern
+	/// <code> "(...)"</code>, when matched against <code>"abcdefghi"</code> would
+	/// produce <code>["abc","def","ghi"]</code>
+	/// </para>
+	/// <para>
+	/// A camelCaseFilter could be written as:
+	/// </para>
+	/// <para>
+	/// <code>
+	///   "([A-Z]{2,})",                                 <br />
+	///   "(?&lt;![A-Z])([A-Z][a-z]+)",                     <br />
+	///   "(?:^|\\b|(?&lt;=[0-9_])|(?&lt;=[A-Z]{2}))([a-z]+)", <br />
+	///   "([0-9]+)"
+	/// </code>
+	/// </para>
+	/// <para>
+	/// plus if <seealso cref="#preserveOriginal"/> is true, it would also return
+	/// <code>"camelCaseFilter</code>
+	/// </para>
+	/// </summary>
+	public sealed class PatternCaptureGroupTokenFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute charTermAttr = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posAttr = addAttribute(typeof(PositionIncrementAttribute));
+	  private State state;
+	  private readonly Matcher[] matchers;
+	  private readonly CharsRef spare = new CharsRef();
+	  private readonly int[] groupCounts;
+	  private readonly bool preserveOriginal;
+	  private int[] currentGroup;
+	  private int currentMatcher;
+
+	  /// <param name="input">
+	  ///          the input <seealso cref="TokenStream"/> </param>
+	  /// <param name="preserveOriginal">
+	  ///          set to true to return the original token even if one of the
+	  ///          patterns matches </param>
+	  /// <param name="patterns">
+	  ///          an array of <seealso cref="Pattern"/> objects to match against each token </param>
+
+	  public PatternCaptureGroupTokenFilter(TokenStream input, bool preserveOriginal, params Pattern[] patterns) : base(input)
+	  {
+		this.preserveOriginal = preserveOriginal;
+		this.matchers = new Matcher[patterns.Length];
+		this.groupCounts = new int[patterns.Length];
+		this.currentGroup = new int[patterns.Length];
+		for (int i = 0; i < patterns.Length; i++)
+		{
+		  this.matchers[i] = patterns[i].matcher("");
+		  this.groupCounts[i] = this.matchers[i].groupCount();
+		  this.currentGroup[i] = -1;
+		}
+	  }
+
+	  private bool nextCapture()
+	  {
+		int min_offset = int.MaxValue;
+		currentMatcher = -1;
+		Matcher matcher;
+
+		for (int i = 0; i < matchers.Length; i++)
+		{
+		  matcher = matchers[i];
+		  if (currentGroup[i] == -1)
+		  {
+			currentGroup[i] = matcher.find() ? 1 : 0;
+		  }
+		  if (currentGroup[i] != 0)
+		  {
+			while (currentGroup[i] < groupCounts[i] + 1)
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matcher.start(currentGroup[i]);
+			  int start = matcher.start(currentGroup[i]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matcher.end(currentGroup[i]);
+			  int end = matcher.end(currentGroup[i]);
+			  if (start == end || preserveOriginal && start == 0 && spare.length == end)
+			  {
+				currentGroup[i]++;
+				continue;
+			  }
+			  if (start < min_offset)
+			  {
+				min_offset = start;
+				currentMatcher = i;
+			  }
+			  break;
+			}
+			if (currentGroup[i] == groupCounts[i] + 1)
+			{
+			  currentGroup[i] = -1;
+			  i--;
+			}
+		  }
+		}
+		return currentMatcher != -1;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+
+		if (currentMatcher != -1 && nextCapture())
+		{
+		  Debug.Assert(state != null);
+		  clearAttributes();
+		  restoreState(state);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+		  int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+		  int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+
+		  posAttr.PositionIncrement = 0;
+		  charTermAttr.copyBuffer(spare.chars, start, end - start);
+		  currentGroup[currentMatcher]++;
+		  return true;
+		}
+
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		char[] buffer = charTermAttr.buffer();
+		int length = charTermAttr.length();
+		spare.copyChars(buffer, 0, length);
+		state = captureState();
+
+		for (int i = 0; i < matchers.Length; i++)
+		{
+		  matchers[i].reset(spare);
+		  currentGroup[i] = -1;
+		}
+
+		if (preserveOriginal)
+		{
+		  currentMatcher = 0;
+		}
+		else if (nextCapture())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+		  int start = matchers[currentMatcher].start(currentGroup[currentMatcher]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+		  int end = matchers[currentMatcher].end(currentGroup[currentMatcher]);
+
+		  // if we start at 0 we can simply set the length and save the copy
+		  if (start == 0)
+		  {
+			charTermAttr.Length = end;
+		  }
+		  else
+		  {
+			charTermAttr.copyBuffer(spare.chars, start, end - start);
+		  }
+		  currentGroup[currentMatcher]++;
+		}
+		return true;
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		state = null;
+		currentMatcher = -1;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
new file mode 100644
index 0000000..886668a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilter.cs
@@ -0,0 +1,179 @@
+using System;
+using System.Text;
+using BaseCharFilter = Lucene.Net.Analysis.CharFilter.BaseCharFilter;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+
+	using BaseCharFilter = BaseCharFilter;
+
+	/// <summary>
+	/// CharFilter that uses a regular expression for the target of replace string.
+	/// The pattern match will be done in each "block" in char stream.
+	/// 
+	/// <para>
+	/// ex1) source="aa&nbsp;&nbsp;bb&nbsp;aa&nbsp;bb", pattern="(aa)\\s+(bb)" replacement="$1#$2"<br/>
+	/// output="aa#bb&nbsp;aa#bb"
+	/// </para>
+	/// 
+	/// NOTE: If you produce a phrase that has different length to source string
+	/// and the field is used for highlighting for a term of the phrase, you will
+	/// face a trouble.
+	/// 
+	/// <para>
+	/// ex2) source="aa123bb", pattern="(aa)\\d+(bb)" replacement="$1&nbsp;$2"<br/>
+	/// output="aa&nbsp;bb"<br/>
+	/// and you want to search bb and highlight it, you will get<br/>
+	/// highlight snippet="aa1&lt;em&gt;23bb&lt;/em&gt;"
+	/// </para>
+	/// 
+	/// @since Solr 1.5
+	/// </summary>
+	public class PatternReplaceCharFilter : BaseCharFilter
+	{
+	  [Obsolete]
+	  public const int DEFAULT_MAX_BLOCK_CHARS = 10000;
+
+	  private readonly Pattern pattern;
+	  private readonly string replacement;
+	  private Reader transformedInput;
+
+	  public PatternReplaceCharFilter(Pattern pattern, string replacement, Reader @in) : base(@in)
+	  {
+		this.pattern = pattern;
+		this.replacement = replacement;
+	  }
+
+	  [Obsolete]
+	  public PatternReplaceCharFilter(Pattern pattern, string replacement, int maxBlockChars, string blockDelimiter, Reader @in) : this(pattern, replacement, @in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read(char[] cbuf, int off, int len) throws java.io.IOException
+	  public override int read(char[] cbuf, int off, int len)
+	  {
+		// Buffer all input on the first call.
+		if (transformedInput == null)
+		{
+		  fill();
+		}
+
+		return transformedInput.read(cbuf, off, len);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void fill() throws java.io.IOException
+	  private void fill()
+	  {
+		StringBuilder buffered = new StringBuilder();
+		char[] temp = new char [1024];
+		for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp))
+		{
+		  buffered.Append(temp, 0, cnt);
+		}
+		transformedInput = new StringReader(processPattern(buffered).ToString());
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int read() throws java.io.IOException
+	  public override int read()
+	  {
+		if (transformedInput == null)
+		{
+		  fill();
+		}
+
+		return transformedInput.read();
+	  }
+
+	  protected internal override int correct(int currentOff)
+	  {
+		return Math.Max(0, base.correct(currentOff));
+	  }
+
+	  /// <summary>
+	  /// Replace pattern in input and mark correction offsets. 
+	  /// </summary>
+	  internal virtual CharSequence processPattern(CharSequence input)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.regex.Matcher m = pattern.matcher(input);
+		Matcher m = pattern.matcher(input);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuffer cumulativeOutput = new StringBuffer();
+		StringBuilder cumulativeOutput = new StringBuilder();
+		int cumulative = 0;
+		int lastMatchEnd = 0;
+		while (m.find())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int groupSize = m.end() - m.start();
+		  int groupSize = m.end() - m.start();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int skippedSize = m.start() - lastMatchEnd;
+		  int skippedSize = m.start() - lastMatchEnd;
+		  lastMatchEnd = m.end();
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int lengthBeforeReplacement = cumulativeOutput.length() + skippedSize;
+		  int lengthBeforeReplacement = cumulativeOutput.Length + skippedSize;
+		  m.appendReplacement(cumulativeOutput, replacement);
+		  // Matcher doesn't tell us how many characters have been appended before the replacement.
+		  // So we need to calculate it. Skipped characters have been added as part of appendReplacement.
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int replacementSize = cumulativeOutput.length() - lengthBeforeReplacement;
+		  int replacementSize = cumulativeOutput.Length - lengthBeforeReplacement;
+
+		  if (groupSize != replacementSize)
+		  {
+			if (replacementSize < groupSize)
+			{
+			  // The replacement is smaller. 
+			  // Add the 'backskip' to the next index after the replacement (this is possibly 
+			  // after the end of string, but it's fine -- it just means the last character 
+			  // of the replaced block doesn't reach the end of the original string.
+			  cumulative += groupSize - replacementSize;
+			  int atIndex = lengthBeforeReplacement + replacementSize;
+			  // System.err.println(atIndex + "!" + cumulative);
+			  addOffCorrectMap(atIndex, cumulative);
+			}
+			else
+			{
+			  // The replacement is larger. Every new index needs to point to the last
+			  // element of the original group (if any).
+			  for (int i = groupSize; i < replacementSize; i++)
+			  {
+				addOffCorrectMap(lengthBeforeReplacement + i, --cumulative);
+				// System.err.println((lengthBeforeReplacement + i) + " " + cumulative);
+			  }
+			}
+		  }
+		}
+
+		// Append the remaining output, no further changes to indices.
+		m.appendTail(cumulativeOutput);
+		return cumulativeOutput;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
new file mode 100644
index 0000000..f5aa3cb
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pattern/PatternReplaceCharFilterFactory.cs
@@ -0,0 +1,67 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pattern
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PatternReplaceCharFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;charFilter class="solr.PatternReplaceCharFilterFactory" 
+	///                    pattern="([^a-z])" replacement=""/&gt;
+	///     &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since Solr 3.1
+	/// </summary>
+	public class PatternReplaceCharFilterFactory : CharFilterFactory
+	{
+	  private readonly Pattern pattern;
+	  private readonly string replacement;
+	  private readonly int maxBlockChars;
+	  private readonly string blockDelimiters;
+
+	  /// <summary>
+	  /// Creates a new PatternReplaceCharFilterFactory </summary>
+	  public PatternReplaceCharFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		pattern = getPattern(args, "pattern");
+		replacement = get(args, "replacement", "");
+		// TODO: warn if you set maxBlockChars or blockDelimiters ?
+		maxBlockChars = getInt(args, "maxBlockChars", PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
+		blockDelimiters = args.Remove("blockDelimiters");
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override CharFilter create(Reader input)
+	  {
+		return new PatternReplaceCharFilter(pattern, replacement, maxBlockChars, blockDelimiters, input);
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message