lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [10/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:14 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
new file mode 100644
index 0000000..bfd311a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseMinimalStemmer.cs
@@ -0,0 +1,44 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Minimal Stemmer for Portuguese
+	/// <para>
+	/// This follows the "RSLP-S" algorithm presented in:
+	/// <i>A study on the Use of Stemming for Monolingual Ad-Hoc Portuguese
+	/// Information Retrieval</i> (Orengo, et al)
+	/// which is just the plural reduction step of the RSLP
+	/// algorithm from <i>A Stemming Algorithm for the Portuguese Language</i>,
+	/// Orengo et al.
+	/// </para>
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	public class PortugueseMinimalStemmer : RSLPStemmerBase
+	{
+
+	  private static readonly Step pluralStep = parse(typeof(PortugueseMinimalStemmer), "portuguese.rslp")["Plural"];
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		return pluralStep.apply(s, len);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
new file mode 100644
index 0000000..1c046c0
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilter.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="PortugueseStemmer"/> to stem 
+	/// Portuguese words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class PortugueseStemFilter : TokenFilter
+	{
+	  private readonly PortugueseStemmer stemmer = new PortugueseStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PortugueseStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+			// this stemmer increases word length by 1: worst case '*ã' -> '*ão'
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int len = termAtt.length();
+			int len = termAtt.length();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
+			int newlen = stemmer.stem(termAtt.resizeBuffer(len + 1), len);
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
new file mode 100644
index 0000000..b3895f5
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PortugueseStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PortugueseStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PortugueseStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PortugueseStemFilterFactory </summary>
+	  public PortugueseStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new PortugueseStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
new file mode 100644
index 0000000..90bfbf2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/PortugueseStemmer.cs
@@ -0,0 +1,126 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa)
+	/// algorithm. This is sometimes also referred to as the Orengo stemmer.
+	/// </summary>
+	/// <seealso cref= RSLPStemmerBase </seealso>
+	public class PortugueseStemmer : RSLPStemmerBase
+	{
+	  private static readonly Step plural, feminine, adverb, augmentative, noun, verb, vowel;
+
+	  static PortugueseStemmer()
+	  {
+		IDictionary<string, Step> steps = parse(typeof(PortugueseStemmer), "portuguese.rslp");
+		plural = steps["Plural"];
+		feminine = steps["Feminine"];
+		adverb = steps["Adverb"];
+		augmentative = steps["Augmentative"];
+		noun = steps["Noun"];
+		verb = steps["Verb"];
+		vowel = steps["Vowel"];
+	  }
+
+	  /// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
+	  /// <param name="len"> initial valid length of buffer </param>
+	  /// <returns> new valid length, stemmed </returns>
+	  public virtual int stem(char[] s, int len)
+	  {
+		Debug.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");
+
+		len = plural.apply(s, len);
+		len = adverb.apply(s, len);
+		len = feminine.apply(s, len);
+		len = augmentative.apply(s, len);
+
+		int oldlen = len;
+		len = noun.apply(s, len);
+
+		if (len == oldlen) // suffix not removed
+		{
+		  oldlen = len;
+
+		  len = verb.apply(s, len);
+
+		  if (len == oldlen) // suffix not removed
+		  {
+			len = vowel.apply(s, len);
+		  }
+		}
+
+		// rslp accent removal
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ã':
+			case 'ä':
+			case 'å':
+				s[i] = 'a';
+				break;
+			case 'ç':
+				s[i] = 'c';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+			case 'ñ':
+				s[i] = 'n';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'õ':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ý':
+			case 'ÿ':
+				s[i] = 'y';
+				break;
+		  }
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
new file mode 100644
index 0000000..252c795
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
@@ -0,0 +1,410 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.pt
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using Version = org.apache.lucene.util.Version;
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Base class for stemmers that use a set of RSLP-like stemming steps.
+	/// <para>
+	/// RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
+	/// originally for stemming the Portuguese language, described in the paper
+	/// <i>A Stemming Algorithm for the Portuguese Language</i>, Orengo et. al.
+	/// </para>
+	/// <para>
+	/// Since this time a plural-only modification (RSLP-S) as well as a modification
+	/// for the Galician language have been implemented. This class parses a configuration
+	/// file that describes <seealso cref="Step"/>s, where each Step contains a set of <seealso cref="Rule"/>s.
+	/// </para>
+	/// <para>
+	/// The general rule format is: 
+	/// <blockquote>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</blockquote>
+	/// where:
+	/// <ul>
+	///   <li><code>suffix</code> is the suffix to be removed (such as "inho").
+	///   <li><code>N</code> is the min stem size, where stem is defined as the candidate stem 
+	///       after removing the suffix (but before appending the replacement!)
+	///   <li><code>replacement</code> is an optimal string to append after removing the suffix.
+	///       This can be the empty string.
+	///   <li><code>exceptions</code> is an optional list of exceptions, patterns that should 
+	///       not be stemmed. These patterns can be specified as whole word or suffix (ends-with) 
+	///       patterns, depending upon the exceptions format flag in the step header.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// A step is an ordered list of rules, with a structure in this format:
+	/// <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
+	///               ... rules ... };
+	/// </blockquote>
+	/// where:
+	/// <ul>
+	///   <li><code>name</code> is a name for the step (such as "Plural").
+	///   <li><code>N</code> is the min word size. Words that are less than this length bypass
+	///       the step completely, as an optimization. Note: N can be zero, in this case this 
+	///       implementation will automatically calculate the appropriate value from the underlying 
+	///       rules.
+	///   <li><code>B</code> is a "boolean" flag specifying how exceptions in the rules are matched.
+	///       A value of 1 indicates whole-word pattern matching, a value of 0 indicates that 
+	///       exceptions are actually suffixes and should be matched with ends-with.
+	///   <li><code>conds</code> are an optional list of conditions to enter the step at all. If
+	///       the list is non-empty, then a word must end with one of these conditions or it will
+	///       bypass the step completely as an optimization.
+	/// </ul>
+	/// </para>
+	/// <para>
+	/// </para>
+	/// </summary>
+	/// <seealso cref= <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
+	/// @lucene.internal </seealso>
+	public abstract class RSLPStemmerBase
+	{
+
+	  /// <summary>
+	  /// A basic rule, with no exceptions.
+	  /// </summary>
+	  protected internal class Rule
+	  {
+		protected internal readonly char[] suffix;
+		protected internal readonly char[] replacement;
+		protected internal readonly int min;
+
+		/// <summary>
+		/// Create a rule. </summary>
+		/// <param name="suffix"> suffix to remove </param>
+		/// <param name="min"> minimum stem length </param>
+		/// <param name="replacement"> replacement string </param>
+		public Rule(string suffix, int min, string replacement)
+		{
+		  this.suffix = suffix.ToCharArray();
+		  this.replacement = replacement.ToCharArray();
+		  this.min = min;
+		}
+
+		/// <returns> true if the word matches this rule. </returns>
+		public virtual bool matches(char[] s, int len)
+		{
+		  return (len - suffix.Length >= min && StemmerUtil.EndsWith(s, len, suffix));
+		}
+
+		/// <returns> new valid length of the string after firing this rule. </returns>
+		public virtual int replace(char[] s, int len)
+		{
+		  if (replacement.Length > 0)
+		  {
+			Array.Copy(replacement, 0, s, len - suffix.Length, replacement.Length);
+		  }
+		  return len - suffix.Length + replacement.Length;
+		}
+	  }
+
+	  /// <summary>
+	  /// A rule with a set of whole-word exceptions.
+	  /// </summary>
+	  protected internal class RuleWithSetExceptions : Rule
+	  {
+		protected internal readonly CharArraySet exceptions;
+
+		public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
+		{
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
+			{
+			  throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+			}
+		  }
+		  this.exceptions = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(exceptions), false);
+		}
+
+		public override bool matches(char[] s, int len)
+		{
+		  return base.matches(s, len) && !exceptions.contains(s, 0, len);
+		}
+	  }
+
+	  /// <summary>
+	  /// A rule with a set of exceptional suffixes.
+	  /// </summary>
+	  protected internal class RuleWithSuffixExceptions : Rule
+	  {
+		// TODO: use a more efficient datastructure: automaton?
+		protected internal readonly char[][] exceptions;
+
+		public RuleWithSuffixExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
+		{
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
+			{
+			  throw new Exception("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+			}
+		  }
+		  this.exceptions = new char[exceptions.Length][];
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			this.exceptions[i] = exceptions[i].ToCharArray();
+		  }
+		}
+
+		public override bool matches(char[] s, int len)
+		{
+		  if (!base.matches(s, len))
+		  {
+			return false;
+		  }
+
+		  for (int i = 0; i < exceptions.Length; i++)
+		  {
+			if (StemmerUtil.EndsWith(s, len, exceptions[i]))
+			{
+			  return false;
+			}
+		  }
+
+		  return true;
+		}
+	  }
+
+	  /// <summary>
+	  /// A step containing a list of rules.
+	  /// </summary>
+	  protected internal class Step
+	  {
+		protected internal readonly string name;
+		protected internal readonly Rule[] rules;
+		protected internal readonly int min;
+		protected internal readonly char[][] suffixes;
+
+		/// <summary>
+		/// Create a new step </summary>
+		/// <param name="name"> Step's name. </param>
+		/// <param name="rules"> an ordered list of rules. </param>
+		/// <param name="min"> minimum word size. if this is 0 it is automatically calculated. </param>
+		/// <param name="suffixes"> optional list of conditional suffixes. may be null. </param>
+		public Step(string name, Rule[] rules, int min, string[] suffixes)
+		{
+		  this.name = name;
+		  this.rules = rules;
+		  if (min == 0)
+		  {
+			min = int.MaxValue;
+			foreach (Rule r in rules)
+			{
+			  min = Math.Min(min, r.min + r.suffix.Length);
+			}
+		  }
+		  this.min = min;
+
+		  if (suffixes == null || suffixes.Length == 0)
+		  {
+			this.suffixes = null;
+		  }
+		  else
+		  {
+			this.suffixes = new char[suffixes.Length][];
+			for (int i = 0; i < suffixes.Length; i++)
+			{
+			  this.suffixes[i] = suffixes[i].ToCharArray();
+			}
+		  }
+		}
+
+		/// <returns> new valid length of the string after applying the entire step. </returns>
+		public virtual int apply(char[] s, int len)
+		{
+		  if (len < min)
+		  {
+			return len;
+		  }
+
+		  if (suffixes != null)
+		  {
+			bool found = false;
+
+			for (int i = 0; i < suffixes.Length; i++)
+			{
+			  if (StemmerUtil.EndsWith(s, len, suffixes[i]))
+			  {
+				found = true;
+				break;
+			  }
+			}
+
+			if (!found)
+			{
+				return len;
+			}
+		  }
+
+		  for (int i = 0; i < rules.Length; i++)
+		  {
+			if (rules[i].matches(s, len))
+			{
+			  return rules[i].replace(s, len);
+			}
+		  }
+
+		  return len;
+		}
+	  }
+
+	  /// <summary>
+	  /// Parse a resource file into an RSLP stemmer description. </summary>
+	  /// <returns> a Map containing the named Steps in this description. </returns>
+	  protected internal static IDictionary<string, Step> parse(Type clazz, string resource)
+	  {
+		// TODO: this parser is ugly, but works. use a jflex grammar instead.
+		try
+		{
+		  InputStream @is = clazz.getResourceAsStream(resource);
+		  LineNumberReader r = new LineNumberReader(new InputStreamReader(@is, StandardCharsets.UTF_8));
+		  IDictionary<string, Step> steps = new Dictionary<string, Step>();
+		  string step;
+		  while ((step = readLine(r)) != null)
+		  {
+			Step s = parseStep(r, step);
+			steps[s.name] = s;
+		  }
+		  r.close();
+		  return steps;
+		}
+		catch (IOException e)
+		{
+		  throw new Exception(e);
+		}
+	  }
+
+	  private static readonly Pattern headerPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$");
+	  private static readonly Pattern stripPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$");
+	  private static readonly Pattern repPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$");
+	  private static readonly Pattern excPattern = Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$");
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static Step parseStep(java.io.LineNumberReader r, String header) throws java.io.IOException
+	  private static Step parseStep(LineNumberReader r, string header)
+	  {
+		Matcher matcher = headerPattern.matcher(header);
+		if (!matcher.find())
+		{
+		  throw new Exception("Illegal Step header specified at line " + r.LineNumber);
+		}
+		Debug.Assert(matcher.groupCount() == 4);
+		string name = matcher.group(1);
+		int min = int.Parse(matcher.group(2));
+		int type = int.Parse(matcher.group(3));
+		string[] suffixes = parseList(matcher.group(4));
+		Rule[] rules = parseRules(r, type);
+		return new Step(name, rules, min, suffixes);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static Rule[] parseRules(java.io.LineNumberReader r, int type) throws java.io.IOException
+	  private static Rule[] parseRules(LineNumberReader r, int type)
+	  {
+		IList<Rule> rules = new List<Rule>();
+		string line;
+		while ((line = readLine(r)) != null)
+		{
+		  Matcher matcher = stripPattern.matcher(line);
+		  if (matcher.matches())
+		  {
+			rules.Add(new Rule(matcher.group(1), int.Parse(matcher.group(2)), ""));
+		  }
+		  else
+		  {
+			matcher = repPattern.matcher(line);
+			if (matcher.matches())
+			{
+			  rules.Add(new Rule(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3)));
+			}
+			else
+			{
+			  matcher = excPattern.matcher(line);
+			  if (matcher.matches())
+			  {
+				if (type == 0)
+				{
+				  rules.Add(new RuleWithSuffixExceptions(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3), parseList(matcher.group(4))));
+				}
+				else
+				{
+				  rules.Add(new RuleWithSetExceptions(matcher.group(1), int.Parse(matcher.group(2)), matcher.group(3), parseList(matcher.group(4))));
+				}
+			  }
+			  else
+			  {
+				throw new Exception("Illegal Step rule specified at line " + r.LineNumber);
+			  }
+			}
+		  }
+		  if (line.EndsWith(";", StringComparison.Ordinal))
+		  {
+			return rules.ToArray();
+		  }
+		}
+		return null;
+	  }
+
+	  private static string[] parseList(string s)
+	  {
+		if (s.Length == 0)
+		{
+		  return null;
+		}
+		string[] list = s.Split(",", true);
+		for (int i = 0; i < list.Length; i++)
+		{
+		  list[i] = parseString(list[i].Trim());
+		}
+		return list;
+	  }
+
+	  private static string parseString(string s)
+	  {
+		return s.Substring(1, s.Length - 1 - 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static String readLine(java.io.LineNumberReader r) throws java.io.IOException
+	  private static string readLine(LineNumberReader r)
+	  {
+		string line = null;
+		while ((line = r.readLine()) != null)
+		{
+		  line = line.Trim();
+		  if (line.Length > 0 && line[0] != '#')
+		  {
+			return line;
+		  }
+		}
+		return line;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
new file mode 100644
index 0000000..2daf790
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
@@ -0,0 +1,213 @@
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+
+namespace org.apache.lucene.analysis.query
+{
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using StopFilter = StopFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using IndexReader = org.apache.lucene.index.IndexReader;
+	using MultiFields = org.apache.lucene.index.MultiFields;
+	using Term = org.apache.lucene.index.Term;
+	using Terms = org.apache.lucene.index.Terms;
+	using TermsEnum = org.apache.lucene.index.TermsEnum;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// An <seealso cref="Analyzer"/> used primarily at query time to wrap another analyzer and provide a layer of protection
+	/// which prevents very common words from being passed into queries. 
+	/// <para>
+	/// For very large indexes the cost
+	/// of reading TermDocs for a very common word can be  high. This analyzer was created after experience with
+	/// a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for 
+	/// this term to take 2 seconds.
+	/// </para>
+	/// </summary>
+	public sealed class QueryAutoStopWordAnalyzer : AnalyzerWrapper
+	{
+
+	  private readonly Analyzer @delegate;
+	  private readonly IDictionary<string, HashSet<string>> stopWordsPerField = new Dictionary<string, HashSet<string>>();
+	  //The default maximum percentage (40%) of index documents which
+	  //can contain a term, after which the term is considered to be a stop word.
+	  public const float defaultMaxDocFreqPercent = 0.4f;
+	  private readonly Version matchVersion;
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency percentage greater than
+	  /// <seealso cref="#defaultMaxDocFreqPercent"/>
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader) : this(matchVersion, @delegate, indexReader, defaultMaxDocFreqPercent)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency greater than the given
+	  /// maxDocFreq
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, int maxDocFreq) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, int maxDocFreq) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+	  /// indexed fields from terms with a document frequency percentage greater than
+	  /// the given maxPercentDocs
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+	  ///                      contain a term, after which the word is considered to be a stop word </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, float maxPercentDocs) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+	  /// given selection of fields from terms with a document frequency percentage
+	  /// greater than the given maxPercentDocs
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+	  /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+	  ///                      contain a term, after which the word is considered to be a stop word </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, float maxPercentDocs) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, fields, (int)(indexReader.numDocs() * maxPercentDocs))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+	  /// given selection of fields from terms with a document frequency greater than
+	  /// the given maxDocFreq
+	  /// </summary>
+	  /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+	  /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+	  /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+	  /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+	  /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, int maxDocFreq) throws java.io.IOException
+	  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, int maxDocFreq) : base(@delegate.ReuseStrategy)
+	  {
+		this.matchVersion = matchVersion;
+		this.@delegate = @delegate;
+
+		foreach (string field in fields)
+		{
+		  HashSet<string> stopWords = new HashSet<string>();
+		  Terms terms = MultiFields.getTerms(indexReader, field);
+		  CharsRef spare = new CharsRef();
+		  if (terms != null)
+		  {
+			TermsEnum te = terms.iterator(null);
+			BytesRef text;
+			while ((text = te.next()) != null)
+			{
+			  if (te.docFreq() > maxDocFreq)
+			  {
+				UnicodeUtil.UTF8toUTF16(text, spare);
+				stopWords.Add(spare.ToString());
+			  }
+			}
+		  }
+		  stopWordsPerField[field] = stopWords;
+		}
+	  }
+
+	  protected internal override Analyzer getWrappedAnalyzer(string fieldName)
+	  {
+		return @delegate;
+	  }
+
+	  protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
+	  {
+		HashSet<string> stopWords = stopWordsPerField[fieldName];
+		if (stopWords == null)
+		{
+		  return components;
+		}
+		StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
+		return new TokenStreamComponents(components.Tokenizer, stopFilter);
+	  }
+
+	  /// <summary>
+	  /// Provides information on which stop words have been identified for a field
+	  /// </summary>
+	  /// <param name="fieldName"> The field for which stop words identified in "addStopWords"
+	  ///                  method calls will be returned </param>
+	  /// <returns> the stop words identified for a field </returns>
+	  public string[] getStopWords(string fieldName)
+	  {
+		HashSet<string> stopWords = stopWordsPerField[fieldName];
+		return stopWords != null ? stopWords.toArray(new string[stopWords.Count]) : new string[0];
+	  }
+
+	  /// <summary>
+	  /// Provides information on which stop words have been identified for all fields
+	  /// </summary>
+	  /// <returns> the stop words (as terms) </returns>
+	  public Term[] StopWords
+	  {
+		  get
+		  {
+			IList<Term> allStopWords = new List<Term>();
+			foreach (string fieldName in stopWordsPerField.Keys)
+			{
+			  HashSet<string> stopWords = stopWordsPerField[fieldName];
+			  foreach (string text in stopWords)
+			  {
+				allStopWords.Add(new Term(fieldName, text));
+			  }
+			}
+			return allStopWords.ToArray();
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
new file mode 100644
index 0000000..9382516
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilter.cs
@@ -0,0 +1,281 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.reverse
+{
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Reverse token string, for example "country" => "yrtnuoc".
+	/// <para>
+	/// If <code>marker</code> is supplied, then tokens will be also prepended by
+	/// that character. For example, with a marker of &#x5C;u0001, "country" =>
+	/// "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
+	/// wildcards search.
+	/// </para>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ReverseStringFilter, or when using any of
+	/// its static methods:
+	/// <ul>
+	///   <li> As of 3.1, supplementary characters are handled correctly
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class ReverseStringFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly char marker;
+	  private readonly Version matchVersion;
+	  private const char NOMARKER = '\uFFFF';
+
+	  /// <summary>
+	  /// Example marker character: U+0001 (START OF HEADING) 
+	  /// </summary>
+	  public const char START_OF_HEADING_MARKER = '\u0001';
+
+	  /// <summary>
+	  /// Example marker character: U+001F (INFORMATION SEPARATOR ONE)
+	  /// </summary>
+	  public const char INFORMATION_SEPARATOR_MARKER = '\u001F';
+
+	  /// <summary>
+	  /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) 
+	  /// </summary>
+	  public const char PUA_EC00_MARKER = '\uEC00';
+
+	  /// <summary>
+	  /// Example marker character: U+200F (RIGHT-TO-LEFT MARK)
+	  /// </summary>
+	  public const char RTL_DIRECTION_MARKER = '\u200F';
+
+	  /// <summary>
+	  /// Create a new ReverseStringFilter that reverses all tokens in the 
+	  /// supplied <seealso cref="TokenStream"/>.
+	  /// <para>
+	  /// The reversed tokens will not be marked. 
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
+	  public ReverseStringFilter(Version matchVersion, TokenStream @in) : this(matchVersion, @in, NOMARKER)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new ReverseStringFilter that reverses and marks all tokens in the
+	  /// supplied <seealso cref="TokenStream"/>.
+	  /// <para>
+	  /// The reversed tokens will be prepended (marked) by the <code>marker</code>
+	  /// character.
+	  /// </para>
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="in"> <seealso cref="TokenStream"/> to filter </param>
+	  /// <param name="marker"> A character used to mark reversed tokens </param>
+	  public ReverseStringFilter(Version matchVersion, TokenStream @in, char marker) : base(@in)
+	  {
+		this.matchVersion = matchVersion;
+		this.marker = marker;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  int len = termAtt.length();
+		  if (marker != NOMARKER)
+		  {
+			len++;
+			termAtt.resizeBuffer(len);
+			termAtt.buffer()[len - 1] = marker;
+		  }
+		  reverse(matchVersion, termAtt.buffer(), 0, len);
+		  termAtt.Length = len;
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+
+	  /// <summary>
+	  /// Reverses the given input string
+	  /// </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="input"> the string to reverse </param>
+	  /// <returns> the given input string in reversed order </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static String reverse(org.apache.lucene.util.Version matchVersion, final String input)
+	  public static string reverse(Version matchVersion, string input)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] charInput = input.toCharArray();
+		char[] charInput = input.ToCharArray();
+		reverse(matchVersion, charInput, 0, charInput.Length);
+		return new string(charInput);
+	  }
+
+	  /// <summary>
+	  /// Reverses the given input buffer in-place </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer)
+	  public static void reverse(Version matchVersion, char[] buffer)
+	  {
+		reverse(matchVersion, buffer, 0, buffer.Length);
+	  }
+
+	  /// <summary>
+	  /// Partially reverses the given input buffer in-place from offset 0
+	  /// up to the given length. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+	  /// <param name="len"> the length in the buffer up to where the
+	  ///        buffer should be reversed </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int len)
+	  public static void reverse(Version matchVersion, char[] buffer, int len)
+	  {
+		reverse(matchVersion, buffer, 0, len);
+	  }
+
+	  /// @deprecated (3.1) Remove this when support for 3.0 indexes is no longer needed. 
+	  [Obsolete("(3.1) Remove this when support for 3.0 indexes is no longer needed.")]
+	  private static void reverseUnicode3(char[] buffer, int start, int len)
+	  {
+		if (len <= 1)
+		{
+			return;
+		}
+		int num = len >> 1;
+		for (int i = start; i < (start + num); i++)
+		{
+		  char c = buffer[i];
+		  buffer[i] = buffer[start * 2 + len - i - 1];
+		  buffer[start * 2 + len - i - 1] = c;
+		}
+	  }
+
+	  /// <summary>
+	  /// Partially reverses the given input buffer in-place from the given offset
+	  /// up to the given length. </summary>
+	  /// <param name="matchVersion"> See <a href="#version">above</a> </param>
+	  /// <param name="buffer"> the input char array to reverse </param>
+	  /// <param name="start"> the offset from where to reverse the buffer </param>
+	  /// <param name="len"> the length in the buffer up to where the
+	  ///        buffer should be reversed </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static void reverse(org.apache.lucene.util.Version matchVersion, final char[] buffer, final int start, final int len)
+	  public static void reverse(Version matchVersion, char[] buffer, int start, int len)
+	  {
+		if (!matchVersion.onOrAfter(Version.LUCENE_31))
+		{
+		  reverseUnicode3(buffer, start, len);
+		  return;
+		}
+		/* modified version of Apache Harmony AbstractStringBuilder reverse0() */
+		if (len < 2)
+		{
+		  return;
+		}
+		int end = (start + len) - 1;
+		char frontHigh = buffer[start];
+		char endLow = buffer[end];
+		bool allowFrontSur = true, allowEndSur = true;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int mid = start + (len >> 1);
+		int mid = start + (len >> 1);
+		for (int i = start; i < mid; ++i, --end)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char frontLow = buffer[i + 1];
+		  char frontLow = buffer[i + 1];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char endHigh = buffer[end - 1];
+		  char endHigh = buffer[end - 1];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean surAtFront = allowFrontSur && Character.isSurrogatePair(frontHigh, frontLow);
+		  bool surAtFront = allowFrontSur && char.IsSurrogatePair(frontHigh, frontLow);
+		  if (surAtFront && (len < 3))
+		  {
+			// nothing to do since surAtFront is allowed and 1 char left
+			return;
+		  }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean surAtEnd = allowEndSur && Character.isSurrogatePair(endHigh, endLow);
+		  bool surAtEnd = allowEndSur && char.IsSurrogatePair(endHigh, endLow);
+		  allowFrontSur = allowEndSur = true;
+		  if (surAtFront == surAtEnd)
+		  {
+			if (surAtFront)
+			{
+			  // both surrogates
+			  buffer[end] = frontLow;
+			  buffer[--end] = frontHigh;
+			  buffer[i] = endHigh;
+			  buffer[++i] = endLow;
+			  frontHigh = buffer[i + 1];
+			  endLow = buffer[end - 1];
+			}
+			else
+			{
+			  // neither surrogates
+			  buffer[end] = frontHigh;
+			  buffer[i] = endLow;
+			  frontHigh = frontLow;
+			  endLow = endHigh;
+			}
+		  }
+		  else
+		  {
+			if (surAtFront)
+			{
+			  // surrogate only at the front
+			  buffer[end] = frontLow;
+			  buffer[i] = endLow;
+			  endLow = endHigh;
+			  allowFrontSur = false;
+			}
+			else
+			{
+			  // surrogate only at the end
+			  buffer[end] = frontHigh;
+			  buffer[i] = endHigh;
+			  frontHigh = frontLow;
+			  allowEndSur = false;
+			}
+		  }
+		}
+		if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur))
+		{
+		  // only if odd length
+		  buffer[end] = allowFrontSur ? endLow : frontHigh;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
new file mode 100644
index 0000000..abc8d24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Reverse/ReverseStringFilterFactory.cs
@@ -0,0 +1,59 @@
+using System.Collections.Generic;
+using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+
+namespace org.apache.lucene.analysis.reverse
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ReverseStringFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.ReverseStringFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// 
+	/// @since solr 1.4
+	/// </summary>
+	public class ReverseStringFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ReverseStringFilterFactory </summary>
+	  public ReverseStringFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override ReverseStringFilter create(TokenStream @in)
+	  {
+		return new ReverseStringFilter(luceneMatchVersion,@in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
new file mode 100644
index 0000000..a68928c
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ro/RomanianAnalyzer.cs
@@ -0,0 +1,142 @@
+using System;
+
+namespace org.apache.lucene.analysis.ro
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using RomanianStemmer = org.tartarus.snowball.ext.RomanianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Romanian.
+	/// </summary>
+	public sealed class RomanianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Romanian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+	  /// <summary>
+	  /// The comment character in the stopwords file.  
+	  /// All lines prefixed with this will be ignored.
+	  /// </summary>
+	  private const string STOPWORDS_COMMENT = "#";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(RomanianAnalyzer), DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public RomanianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public RomanianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new RomanianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
new file mode 100644
index 0000000..955f021
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianAnalyzer.cs
@@ -0,0 +1,172 @@
+using System;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Russian language. 
+	/// <para>
+	/// Supports an external list of stopwords (words that
+	/// will not be indexed at all).
+	/// A default set of stopwords is used unless an alternative list is specified.
+	/// </para>
+	/// <a name="version"/>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating RussianAnalyzer:
+	/// <ul>
+	///   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
+	///        SnowballFilter, and Snowball stopwords are used by default.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class RussianAnalyzer : StopwordAnalyzerBase
+	{
+		/// <summary>
+		/// List of typical Russian stopwords. (for backwards compatibility) </summary>
+		/// @deprecated (3.1) Remove this for LUCENE 5.0 
+		[Obsolete("(3.1) Remove this for LUCENE 5.0")]
+		private static readonly string[] RUSSIAN_STOP_WORDS_30 = new string[] {"а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в", "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где", "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", "еще", "же", "за", "здесь", "и", "из", "или", "им", "их", "к", "как", "ко", "когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо", "наш", "не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об", "однако", "он", "она", "они", "оно", "от", "очень", "по", "под", "при", "с", "со", "так", "также", "такой", "там", "те", "тем", "то", "того", "тоже", "той", "только", "том", "ты", "у", "уже", "хотя", "чего", "чей", "чем", "ч
 о", "чтобы", "чье", "чья", "эта", "эти", "это", "я"};
+
+		/// <summary>
+		/// File containing default Russian stopwords. </summary>
+		public const string DEFAULT_STOPWORD_FILE = "russian_stop.txt";
+
+		private class DefaultSetHolder
+		{
+		  /// @deprecated (3.1) remove this for Lucene 5.0 
+		  [Obsolete("(3.1) remove this for Lucene 5.0")]
+		  internal static readonly CharArraySet DEFAULT_STOP_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
+		  internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		  static DefaultSetHolder()
+		  {
+			try
+			{
+			  DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+			}
+			catch (IOException ex)
+			{
+			  // default set should always be present as it is part of the
+			  // distribution (JAR)
+			  throw new Exception("Unable to load default stopword set", ex);
+			}
+		  }
+		}
+
+		private readonly CharArraySet stemExclusionSet;
+
+		/// <summary>
+		/// Returns an unmodifiable instance of the default stop-words set.
+		/// </summary>
+		/// <returns> an unmodifiable instance of the default stop-words set. </returns>
+		public static CharArraySet DefaultStopSet
+		{
+			get
+			{
+			  return DefaultSetHolder.DEFAULT_STOP_SET;
+			}
+		}
+
+		public RussianAnalyzer(Version matchVersion) : this(matchVersion, matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET : DefaultSetHolder.DEFAULT_STOP_SET_30)
+		{
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words
+		/// </summary>
+		/// <param name="matchVersion">
+		///          lucene compatibility version </param>
+		/// <param name="stopwords">
+		///          a stopword set </param>
+		public RussianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+		{
+		}
+
+		/// <summary>
+		/// Builds an analyzer with the given stop words
+		/// </summary>
+		/// <param name="matchVersion">
+		///          lucene compatibility version </param>
+		/// <param name="stopwords">
+		///          a stopword set </param>
+		/// <param name="stemExclusionSet"> a set of words not to be stemmed </param>
+		public RussianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+		{
+		  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+		}
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from a <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided, and <seealso cref="SnowballFilter"/> </returns>
+		protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  if (matchVersion.onOrAfter(Version.LUCENE_31))
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+			Tokenizer source = new StandardTokenizer(matchVersion, reader);
+			TokenStream result = new StandardFilter(matchVersion, source);
+			result = new LowerCaseFilter(matchVersion, result);
+			result = new StopFilter(matchVersion, result, stopwords);
+			if (!stemExclusionSet.Empty)
+			{
+				result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+			}
+			result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+			return new TokenStreamComponents(source, result);
+		  }
+		  else
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
+			Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
+			TokenStream result = new LowerCaseFilter(matchVersion, source);
+			result = new StopFilter(matchVersion, result, stopwords);
+			if (!stemExclusionSet.Empty)
+			{
+				result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+			}
+			result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+			return new TokenStreamComponents(source, result);
+		  }
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
new file mode 100644
index 0000000..5ef27f2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizer.cs
@@ -0,0 +1,83 @@
+using System;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using LetterTokenizer = org.apache.lucene.analysis.core.LetterTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A RussianLetterTokenizer is a <seealso cref="Tokenizer"/> that extends <seealso cref="LetterTokenizer"/>
+	/// by also allowing the basic Latin digits 0-9.
+	/// <para>
+	/// <a name="version"/>
+	/// You must specify the required <seealso cref="Version"/> compatibility when creating
+	/// <seealso cref="RussianLetterTokenizer"/>:
+	/// <ul>
+	/// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+	/// detect token characters. See <seealso cref="CharTokenizer#isTokenChar(int)"/> and
+	/// <seealso cref="CharTokenizer#normalize(int)"/> for details.</li>
+	/// </ul>
+	/// </para>
+	/// </summary>
+	/// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.
+	/// This filter will be removed in Lucene 5.0  
+	[Obsolete("(3.1) Use <seealso cref="StandardTokenizer"/> instead, which has the same functionality.")]
+	public class RussianLetterTokenizer : CharTokenizer
+	{
+		private const int DIGIT_0 = '0';
+		private const int DIGIT_9 = '9';
+
+		/// Construct a new RussianLetterTokenizer. * <param name="matchVersion"> Lucene version
+		/// to match See <seealso cref="<a href="#version">above</a>"/>
+		/// </param>
+		/// <param name="in">
+		///          the input to split up into tokens </param>
+		public RussianLetterTokenizer(Version matchVersion, Reader @in) : base(matchVersion, @in)
+		{
+		}
+
+		/// <summary>
+		/// Construct a new RussianLetterTokenizer using a given
+		/// <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>. * @param
+		/// matchVersion Lucene version to match See
+		/// <seealso cref="<a href="#version">above</a>"/>
+		/// </summary>
+		/// <param name="factory">
+		///          the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+		/// <param name="in">
+		///          the input to split up into tokens </param>
+		public RussianLetterTokenizer(Version matchVersion, AttributeFactory factory, Reader @in) : base(matchVersion, factory, @in)
+		{
+		}
+
+		 /// <summary>
+		 /// Collects only characters which satisfy
+		 /// <seealso cref="Character#isLetter(int)"/>.
+		 /// </summary>
+		protected internal override bool isTokenChar(int c)
+		{
+			return char.IsLetter(c) || (c >= DIGIT_0 && c <= DIGIT_9);
+		}
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
new file mode 100644
index 0000000..b308426
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLetterTokenizerFactory.cs
@@ -0,0 +1,52 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.
+	///  This tokenizer has no Russian-specific functionality. 
+	[Obsolete("Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/> instead.")]
+	public class RussianLetterTokenizerFactory : TokenizerFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RussianLetterTokenizerFactory </summary>
+	  public RussianLetterTokenizerFactory(IDictionary<string, string> args) : base(args)
+	  {
+		assureMatchVersion();
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override RussianLetterTokenizer create(AttributeFactory factory, Reader @in)
+	  {
+		return new RussianLetterTokenizer(luceneMatchVersion, factory, @in);
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
new file mode 100644
index 0000000..0573bd3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="RussianLightStemmer"/> to stem Russian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class RussianLightStemFilter : TokenFilter
+	{
+	  private readonly RussianLightStemmer stemmer = new RussianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public RussianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
new file mode 100644
index 0000000..157a5df
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="RussianLightStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.RussianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class RussianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new RussianLightStemFilterFactory </summary>
+	  public RussianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new RussianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
new file mode 100644
index 0000000..7550c4a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ru/RussianLightStemmer.cs
@@ -0,0 +1,134 @@
+namespace org.apache.lucene.analysis.ru
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Russian.
+	/// <para>
+	/// This stemmer implements the following algorithm:
+	/// <i>Indexing and Searching Strategies for the Russian Language.</i>
+	/// Ljiljana Dolamic and Jacques Savoy.
+	/// </para>
+	/// </summary>
+	public class RussianLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		len = removeCase(s, len);
+		return normalize(s, len);
+	  }
+
+	  private int normalize(char[] s, int len)
+	  {
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'ь':
+			case 'и':
+				return len - 1;
+			case 'н':
+				if (s[len - 2] == 'н')
+				{
+					return len - 1;
+				}
+		  }
+		}
+		return len;
+	  }
+
+	  private int removeCase(char[] s, int len)
+	  {
+		if (len > 6 && (StemmerUtil.EndsWith(s, len, "иями") || StemmerUtil.EndsWith(s, len, "оями")))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5 && (StemmerUtil.EndsWith(s, len, "иям") || StemmerUtil.EndsWith(s, len, "иях") || StemmerUtil.EndsWith(s, len, "оях") || StemmerUtil.EndsWith(s, len, "ями") || StemmerUtil.EndsWith(s, len, "оям") || StemmerUtil.EndsWith(s, len, "оьв") || StemmerUtil.EndsWith(s, len, "ами") || StemmerUtil.EndsWith(s, len, "его") || StemmerUtil.EndsWith(s, len, "ему") || StemmerUtil.EndsWith(s, len, "ери") || StemmerUtil.EndsWith(s, len, "ими") || StemmerUtil.EndsWith(s, len, "ого") || StemmerUtil.EndsWith(s, len, "ому") || StemmerUtil.EndsWith(s, len, "ыми") || StemmerUtil.EndsWith(s, len, "оев")))
+		{
+		  return len - 3;
+		}
+
+		if (len > 4 && (StemmerUtil.EndsWith(s, len, "ая") || StemmerUtil.EndsWith(s, len, "яя") || StemmerUtil.EndsWith(s, len, "ях") || StemmerUtil.EndsWith(s, len, "юю") || StemmerUtil.EndsWith(s, len, "ах") || StemmerUtil.EndsWith(s, len, "ею") || StemmerUtil.EndsWith(s, len, "их") || StemmerUtil.EndsWith(s, len, "ия") || StemmerUtil.EndsWith(s, len, "ию") || StemmerUtil.EndsWith(s, len, "ьв") || StemmerUtil.EndsWith(s, len, "ою") || StemmerUtil.EndsWith(s, len, "ую") || StemmerUtil.EndsWith(s, len, "ям") || StemmerUtil.EndsWith(s, len, "ых") || StemmerUtil.EndsWith(s, len, "ея") || StemmerUtil.EndsWith(s, len, "ам") || StemmerUtil.EndsWith(s, len, "ем") || StemmerUtil.EndsWith(s, len, "ей") || StemmerUtil.EndsWith(s, len, "ём") || StemmerUtil.EndsWith(s, len, "ев") || StemmerUtil.EndsWith(s, len, "ий") || StemmerUtil.EndsWith(s, len, "им") || StemmerUtil.EndsWith(s, len, "ое") || StemmerUtil.EndsWith(s, len, "ой") || StemmerUtil.EndsWit
 h(s, len, "ом") || StemmerUtil.EndsWith(s, len, "ов") || StemmerUtil.EndsWith(s, len, "ые") || StemmerUtil.EndsWith(s, len, "ый") || StemmerUtil.EndsWith(s, len, "ым") || StemmerUtil.EndsWith(s, len, "ми")))
+		{
+		  return len - 2;
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'а':
+			case 'е':
+			case 'и':
+			case 'о':
+			case 'у':
+			case 'й':
+			case 'ы':
+			case 'я':
+			case 'ь':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message