lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [19/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:23 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
new file mode 100644
index 0000000..70b4a94
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Stemmer.cs
@@ -0,0 +1,475 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ByteArrayDataInput = org.apache.lucene.store.ByteArrayDataInput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using Version = org.apache.lucene.util.Version;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+
+	/// <summary>
+	/// Stemmer uses the affix rules declared in the Dictionary to generate one or more stems for a word.  It
+	/// conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
+	/// </summary>
+	internal sealed class Stemmer
+	{
+	  private readonly Dictionary dictionary;
+	  private readonly BytesRef scratch = new BytesRef();
+	  private readonly StringBuilder segment = new StringBuilder();
+	  private readonly ByteArrayDataInput affixReader;
+
+	  // used for normalization
+	  private readonly StringBuilder scratchSegment = new StringBuilder();
+	  private char[] scratchBuffer = new char[32];
+
+	  /// <summary>
+	  /// Constructs a new Stemmer which will use the provided Dictionary to create its stems.
+	  /// </summary>
+	  /// <param name="dictionary"> Dictionary that will be used to create the stems </param>
+	  public Stemmer(Dictionary dictionary)
+	  {
+		this.dictionary = dictionary;
+		this.affixReader = new ByteArrayDataInput(dictionary.affixData);
+	  }
+
+	  /// <summary>
+	  /// Find the stem(s) of the provided word.
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> stem(string word)
+	  {
+		return stem(word.ToCharArray(), word.Length);
+	  }
+
+	  /// <summary>
+	  /// Find the stem(s) of the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> stem(char[] word, int length)
+	  {
+
+		if (dictionary.needsInputCleaning)
+		{
+		  scratchSegment.Length = 0;
+		  scratchSegment.Append(word, 0, length);
+		  CharSequence cleaned = dictionary.cleanInput(scratchSegment, segment);
+		  scratchBuffer = ArrayUtil.grow(scratchBuffer, cleaned.length());
+		  length = segment.Length;
+		  segment.getChars(0, length, scratchBuffer, 0);
+		  word = scratchBuffer;
+		}
+
+		IList<CharsRef> stems = new List<CharsRef>();
+		IntsRef forms = dictionary.lookupWord(word, 0, length);
+		if (forms != null)
+		{
+		  // TODO: some forms should not be added, e.g. ONLYINCOMPOUND
+		  // just because it exists, does not make it valid...
+		  for (int i = 0; i < forms.length; i++)
+		  {
+			stems.Add(newStem(word, length));
+		  }
+		}
+		stems.AddRange(stem(word, length, -1, -1, -1, 0, true, true, false, false));
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// Find the unique stem(s) of the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to find the stems for </param>
+	  /// <returns> List of stems for the word </returns>
+	  public IList<CharsRef> uniqueStems(char[] word, int length)
+	  {
+		IList<CharsRef> stems = stem(word, length);
+		if (stems.Count < 2)
+		{
+		  return stems;
+		}
+		CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+		IList<CharsRef> deduped = new List<CharsRef>();
+		foreach (CharsRef s in stems)
+		{
+		  if (!terms.contains(s))
+		  {
+			deduped.Add(s);
+			terms.add(s);
+		  }
+		}
+		return deduped;
+	  }
+
+	  private CharsRef newStem(char[] buffer, int length)
+	  {
+		if (dictionary.needsOutputCleaning)
+		{
+		  scratchSegment.Length = 0;
+		  scratchSegment.Append(buffer, 0, length);
+		  try
+		  {
+			Dictionary.applyMappings(dictionary.oconv, scratchSegment);
+		  }
+		  catch (IOException bogus)
+		  {
+			throw new Exception(bogus);
+		  }
+		  char[] cleaned = new char[scratchSegment.Length];
+		  scratchSegment.getChars(0, cleaned.Length, cleaned, 0);
+		  return new CharsRef(cleaned, 0, cleaned.Length);
+		}
+		else
+		{
+		  return new CharsRef(buffer, 0, length);
+		}
+	  }
+
+	  // ================================================= Helper Methods ================================================
+
+	  /// <summary>
+	  /// Generates a list of stems for the provided word
+	  /// </summary>
+	  /// <param name="word"> Word to generate the stems for </param>
+	  /// <param name="previous"> previous affix that was removed (so we dont remove same one twice) </param>
+	  /// <param name="prevFlag"> Flag from a previous stemming step that need to be cross-checked with any affixes in this recursive step </param>
+	  /// <param name="prefixFlag"> flag of the most inner removed prefix, so that when removing a suffix, its also checked against the word </param>
+	  /// <param name="recursionDepth"> current recursiondepth </param>
+	  /// <param name="doPrefix"> true if we should remove prefixes </param>
+	  /// <param name="doSuffix"> true if we should remove suffixes </param>
+	  /// <param name="previousWasPrefix"> true if the previous removal was a prefix:
+	  ///        if we are removing a suffix, and it has no continuation requirements, its ok.
+	  ///        but two prefixes (COMPLEXPREFIXES) or two suffixes must have continuation requirements to recurse. </param>
+	  /// <param name="circumfix"> true if the previous prefix removal was signed as a circumfix
+	  ///        this means inner most suffix must also contain circumfix flag. </param>
+	  /// <returns> List of stems, or empty list if no stems are found </returns>
+	  private IList<CharsRef> stem(char[] word, int length, int previous, int prevFlag, int prefixFlag, int recursionDepth, bool doPrefix, bool doSuffix, bool previousWasPrefix, bool circumfix)
+	  {
+
+		// TODO: allow this stuff to be reused by tokenfilter
+		IList<CharsRef> stems = new List<CharsRef>();
+
+		if (doPrefix && dictionary.prefixes != null)
+		{
+		  for (int i = length - 1; i >= 0; i--)
+		  {
+			IntsRef prefixes = dictionary.lookupPrefix(word, 0, i);
+			if (prefixes == null)
+			{
+			  continue;
+			}
+
+			for (int j = 0; j < prefixes.length; j++)
+			{
+			  int prefix = prefixes.ints[prefixes.offset + j];
+			  if (prefix == previous)
+			  {
+				continue;
+			  }
+			  affixReader.Position = 8 * prefix;
+			  char flag = (char)(affixReader.readShort() & 0xffff);
+			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
+			  int condition = (char)(affixReader.readShort() & 0xffff);
+			  bool crossProduct = (condition & 1) == 1;
+			  condition = (int)((uint)condition >> 1);
+			  char append = (char)(affixReader.readShort() & 0xffff);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean compatible;
+			  bool compatible;
+			  if (recursionDepth == 0)
+			  {
+				compatible = true;
+			  }
+			  else if (crossProduct)
+			  {
+				// cross check incoming continuation class (flag of previous affix) against list.
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				Debug.Assert(prevFlag >= 0);
+				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, false);
+			  }
+			  else
+			  {
+				compatible = false;
+			  }
+
+			  if (compatible)
+			  {
+				int deAffixedStart = i;
+				int deAffixedLength = length - deAffixedStart;
+
+				int stripStart = dictionary.stripOffsets[stripOrd];
+				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+				int stripLength = stripEnd - stripStart;
+
+				if (!checkCondition(condition, dictionary.stripData, stripStart, stripLength, word, deAffixedStart, deAffixedLength))
+				{
+				  continue;
+				}
+
+				char[] strippedWord = new char[stripLength + deAffixedLength];
+				Array.Copy(dictionary.stripData, stripStart, strippedWord, 0, stripLength);
+				Array.Copy(word, deAffixedStart, strippedWord, stripLength, deAffixedLength);
+
+				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, prefix, -1, recursionDepth, true, circumfix);
+
+				stems.AddRange(stemList);
+			  }
+			}
+		  }
+		}
+
+		if (doSuffix && dictionary.suffixes != null)
+		{
+		  for (int i = 0; i < length; i++)
+		  {
+			IntsRef suffixes = dictionary.lookupSuffix(word, i, length - i);
+			if (suffixes == null)
+			{
+			  continue;
+			}
+
+			for (int j = 0; j < suffixes.length; j++)
+			{
+			  int suffix = suffixes.ints[suffixes.offset + j];
+			  if (suffix == previous)
+			  {
+				continue;
+			  }
+			  affixReader.Position = 8 * suffix;
+			  char flag = (char)(affixReader.readShort() & 0xffff);
+			  char stripOrd = (char)(affixReader.readShort() & 0xffff);
+			  int condition = (char)(affixReader.readShort() & 0xffff);
+			  bool crossProduct = (condition & 1) == 1;
+			  condition = (int)((uint)condition >> 1);
+			  char append = (char)(affixReader.readShort() & 0xffff);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean compatible;
+			  bool compatible;
+			  if (recursionDepth == 0)
+			  {
+				compatible = true;
+			  }
+			  else if (crossProduct)
+			  {
+				// cross check incoming continuation class (flag of previous affix) against list.
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				Debug.Assert(prevFlag >= 0);
+				compatible = hasCrossCheckedFlag((char)prevFlag, appendFlags, previousWasPrefix);
+			  }
+			  else
+			  {
+				compatible = false;
+			  }
+
+			  if (compatible)
+			  {
+				int appendLength = length - i;
+				int deAffixedLength = length - appendLength;
+
+				int stripStart = dictionary.stripOffsets[stripOrd];
+				int stripEnd = dictionary.stripOffsets[stripOrd + 1];
+				int stripLength = stripEnd - stripStart;
+
+				if (!checkCondition(condition, word, 0, deAffixedLength, dictionary.stripData, stripStart, stripLength))
+				{
+				  continue;
+				}
+
+				char[] strippedWord = new char[stripLength + deAffixedLength];
+				Array.Copy(word, 0, strippedWord, 0, deAffixedLength);
+				Array.Copy(dictionary.stripData, stripStart, strippedWord, deAffixedLength, stripLength);
+
+				IList<CharsRef> stemList = applyAffix(strippedWord, strippedWord.Length, suffix, prefixFlag, recursionDepth, false, circumfix);
+
+				stems.AddRange(stemList);
+			  }
+			}
+		  }
+		}
+
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// checks condition of the concatenation of two strings </summary>
+	  // note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
+	  // but this is a little bit more complicated.
+	  private bool checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len)
+	  {
+		if (condition != 0)
+		{
+		  CharacterRunAutomaton pattern = dictionary.patterns[condition];
+		  int state = pattern.InitialState;
+		  for (int i = c1off; i < c1off + c1len; i++)
+		  {
+			state = pattern.step(state, c1[i]);
+			if (state == -1)
+			{
+			  return false;
+			}
+		  }
+		  for (int i = c2off; i < c2off + c2len; i++)
+		  {
+			state = pattern.step(state, c2[i]);
+			if (state == -1)
+			{
+			  return false;
+			}
+		  }
+		  return pattern.isAccept(state);
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// Applies the affix rule to the given word, producing a list of stems if any are found
+	  /// </summary>
+	  /// <param name="strippedWord"> Word the affix has been removed and the strip added </param>
+	  /// <param name="length"> valid length of stripped word </param>
+	  /// <param name="affix"> HunspellAffix representing the affix rule itself </param>
+	  /// <param name="prefixFlag"> when we already stripped a prefix, we cant simply recurse and check the suffix, unless both are compatible
+	  ///                   so we must check dictionary form against both to add it as a stem! </param>
+	  /// <param name="recursionDepth"> current recursion depth </param>
+	  /// <param name="prefix"> true if we are removing a prefix (false if its a suffix) </param>
+	  /// <returns> List of stems for the word, or an empty list if none are found </returns>
+	  internal IList<CharsRef> applyAffix(char[] strippedWord, int length, int affix, int prefixFlag, int recursionDepth, bool prefix, bool circumfix)
+	  {
+		// TODO: just pass this in from before, no need to decode it twice
+		affixReader.Position = 8 * affix;
+		char flag = (char)(affixReader.readShort() & 0xffff);
+		affixReader.skipBytes(2); // strip
+		int condition = (char)(affixReader.readShort() & 0xffff);
+		bool crossProduct = (condition & 1) == 1;
+		condition = (int)((uint)condition >> 1);
+		char append = (char)(affixReader.readShort() & 0xffff);
+
+		IList<CharsRef> stems = new List<CharsRef>();
+
+		IntsRef forms = dictionary.lookupWord(strippedWord, 0, length);
+		if (forms != null)
+		{
+		  for (int i = 0; i < forms.length; i++)
+		  {
+			dictionary.flagLookup.get(forms.ints[forms.offset + i], scratch);
+			char[] wordFlags = Dictionary.decodeFlags(scratch);
+			if (Dictionary.hasFlag(wordFlags, flag))
+			{
+			  // confusing: in this one exception, we already chained the first prefix against the second,
+			  // so it doesnt need to be checked against the word
+			  bool chainedPrefix = dictionary.complexPrefixes && recursionDepth == 1 && prefix;
+			  if (chainedPrefix == false && prefixFlag >= 0 && !Dictionary.hasFlag(wordFlags, (char)prefixFlag))
+			  {
+				// see if we can chain prefix thru the suffix continuation class (only if it has any!)
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				if (!hasCrossCheckedFlag((char)prefixFlag, appendFlags, false))
+				{
+				  continue;
+				}
+			  }
+
+			  // if circumfix was previously set by a prefix, we must check this suffix,
+			  // to ensure it has it, and vice versa
+			  if (dictionary.circumfix != -1)
+			  {
+				dictionary.flagLookup.get(append, scratch);
+				char[] appendFlags = Dictionary.decodeFlags(scratch);
+				bool suffixCircumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
+				if (circumfix != suffixCircumfix)
+				{
+				  continue;
+				}
+			  }
+			  stems.Add(newStem(strippedWord, length));
+			}
+		  }
+		}
+
+		// if a circumfix flag is defined in the dictionary, and we are a prefix, we need to check if we have that flag
+		if (dictionary.circumfix != -1 && !circumfix && prefix)
+		{
+		  dictionary.flagLookup.get(append, scratch);
+		  char[] appendFlags = Dictionary.decodeFlags(scratch);
+		  circumfix = Dictionary.hasFlag(appendFlags, (char)dictionary.circumfix);
+		}
+
+		if (crossProduct)
+		{
+		  if (recursionDepth == 0)
+		  {
+			if (prefix)
+			{
+			  // we took away the first prefix.
+			  // COMPLEXPREFIXES = true:  combine with a second prefix and another suffix 
+			  // COMPLEXPREFIXES = false: combine with a suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, dictionary.complexPrefixes && dictionary.twoStageAffix, true, true, circumfix));
+			}
+			else if (dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+			{
+			  // we took away a suffix.
+			  // COMPLEXPREFIXES = true: we don't recurse! only one suffix allowed
+			  // COMPLEXPREFIXES = false: combine with another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+			}
+		  }
+		  else if (recursionDepth == 1)
+		  {
+			if (prefix && dictionary.complexPrefixes)
+			{
+			  // we took away the second prefix: go look for another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, flag, ++recursionDepth, false, true, true, circumfix));
+			}
+			else if (prefix == false && dictionary.complexPrefixes == false && dictionary.twoStageAffix)
+			{
+			  // we took away a prefix, then a suffix: go look for another suffix
+			  stems.AddRange(stem(strippedWord, length, affix, flag, prefixFlag, ++recursionDepth, false, true, false, circumfix));
+			}
+		  }
+		}
+
+		return stems;
+	  }
+
+	  /// <summary>
+	  /// Checks if the given flag cross checks with the given array of flags
+	  /// </summary>
+	  /// <param name="flag"> Flag to cross check with the array of flags </param>
+	  /// <param name="flags"> Array of flags to cross check against.  Can be {@code null} </param>
+	  /// <returns> {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise </returns>
+	  private bool hasCrossCheckedFlag(char flag, char[] flags, bool matchEmpty)
+	  {
+		return (flags.Length == 0 && matchEmpty) || Arrays.binarySearch(flags, flag) >= 0;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
new file mode 100644
index 0000000..06368d4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hy/ArmenianAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.hy
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+	using ArmenianStemmer = org.tartarus.snowball.ext.ArmenianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Armenian.
+	/// </summary>
+	public sealed class ArmenianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Armenian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(ArmenianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ArmenianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public ArmenianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new ArmenianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
new file mode 100644
index 0000000..df18160
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianAnalyzer.cs
@@ -0,0 +1,138 @@
+using System;
+
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Analyzer for Indonesian (Bahasa)
+	/// </summary>
+	public sealed class IndonesianAnalyzer : StopwordAnalyzerBase
+	{
+	  /// <summary>
+	  /// File containing default Indonesian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop-words set. </summary>
+	  /// <returns> an unmodifiable instance of the default stop-words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = loadStopwordSet(false, typeof(IndonesianAnalyzer), DEFAULT_STOPWORD_FILE, "#");
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public IndonesianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// <seealso cref="IndonesianStemFilter"/>.
+	  /// </summary>
+	  /// <param name="matchVersion">
+	  ///          lucene compatibility version </param>
+	  /// <param name="stopwords">
+	  ///          a stopword set </param>
+	  /// <param name="stemExclusionSet">
+	  ///          a set of terms not to be stemmed </param>
+	  public IndonesianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>,
+	  ///         <seealso cref="StopFilter"/>, <seealso cref="SetKeywordMarkerFilter"/>
+	  ///         if a stem exclusion set is provided and <seealso cref="IndonesianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		return new TokenStreamComponents(source, new IndonesianStemFilter(result));
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
new file mode 100644
index 0000000..38521a8
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilter.cs
@@ -0,0 +1,75 @@
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="IndonesianStemmer"/> to stem Indonesian words.
+	/// </summary>
+	public sealed class IndonesianStemFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly IndonesianStemmer stemmer = new IndonesianStemmer();
+	  private readonly bool stemDerivational;
+
+	  /// <summary>
+	  /// Calls <seealso cref="#IndonesianStemFilter(TokenStream, boolean) IndonesianStemFilter(input, true)"/>
+	  /// </summary>
+	  public IndonesianStemFilter(TokenStream input) : this(input, true)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new IndonesianStemFilter.
+	  /// <para>
+	  /// If <code>stemDerivational</code> is false, 
+	  /// only inflectional suffixes (particles and possessive pronouns) are stemmed.
+	  /// </para>
+	  /// </summary>
+	  public IndonesianStemFilter(TokenStream input, bool stemDerivational) : base(input)
+	  {
+		this.stemDerivational = stemDerivational;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAtt.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length(), stemDerivational);
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length(), stemDerivational);
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
new file mode 100644
index 0000000..a7bfe5b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemFilterFactory.cs
@@ -0,0 +1,57 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="IndonesianStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class IndonesianStemFilterFactory : TokenFilterFactory
+	{
+	  private readonly bool stemDerivational;
+
+	  /// <summary>
+	  /// Creates a new IndonesianStemFilterFactory </summary>
+	  public IndonesianStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		stemDerivational = getBoolean(args, "stemDerivational", true);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new IndonesianStemFilter(input, stemDerivational);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
new file mode 100644
index 0000000..6e339fe
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Id/IndonesianStemmer.cs
@@ -0,0 +1,334 @@
+namespace org.apache.lucene.analysis.id
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Stemmer for Indonesian.
+	/// <para>
+	/// Stems Indonesian words with the algorithm presented in:
+	/// <i>A Study of Stemming Effects on Information Retrieval in 
+	/// Bahasa Indonesia</i>, Fadillah Z Tala.
+	/// http://www.illc.uva.nl/Publications/ResearchReports/MoL-2003-02.text.pdf
+	/// </para>
+	/// </summary>
+	public class IndonesianStemmer
+	{
+	  private int numSyllables;
+	  private int flags;
+	  private const int REMOVED_KE = 1;
+	  private const int REMOVED_PENG = 2;
+	  private const int REMOVED_DI = 4;
+	  private const int REMOVED_MENG = 8;
+	  private const int REMOVED_TER = 16;
+	  private const int REMOVED_BER = 32;
+	  private const int REMOVED_PE = 64;
+
+	  /// <summary>
+	  /// Stem a term (returning its new length).
+	  /// <para>
+	  /// Use <code>stemDerivational</code> to control whether full stemming
+	  /// or only light inflectional stemming is done.
+	  /// </para>
+	  /// </summary>
+	  public virtual int stem(char[] text, int length, bool stemDerivational)
+	  {
+		flags = 0;
+		numSyllables = 0;
+		for (int i = 0; i < length; i++)
+		{
+		  if (isVowel(text[i]))
+		  {
+			  numSyllables++;
+		  }
+		}
+
+		if (numSyllables > 2)
+		{
+			length = removeParticle(text, length);
+		}
+		if (numSyllables > 2)
+		{
+			length = removePossessivePronoun(text, length);
+		}
+
+		if (stemDerivational_Renamed)
+		{
+		  length = stemDerivational(text, length);
+		}
+		return length;
+	  }
+
+	  private int stemDerivational(char[] text, int length)
+	  {
+		int oldLength = length;
+		if (numSyllables > 2)
+		{
+			length = removeFirstOrderPrefix(text, length);
+		}
+		if (oldLength != length) // a rule is fired
+		{
+		  oldLength = length;
+		  if (numSyllables > 2)
+		  {
+			  length = removeSuffix(text, length);
+		  }
+		  if (oldLength != length) // a rule is fired
+		  {
+			if (numSyllables > 2)
+			{
+				length = removeSecondOrderPrefix(text, length);
+			}
+		  }
+		} // fail
+		else
+		{
+		  if (numSyllables > 2)
+		  {
+			  length = removeSecondOrderPrefix(text, length);
+		  }
+		  if (numSyllables > 2)
+		  {
+			  length = removeSuffix(text, length);
+		  }
+		}
+		return length;
+	  }
+
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+			return true;
+		  default:
+			return false;
+		}
+	  }
+
+	  private int removeParticle(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "kah") || StemmerUtil.EndsWith(text, length, "lah") || StemmerUtil.EndsWith(text, length, "pun"))
+		{
+			numSyllables--;
+			return length - 3;
+		}
+
+		return length;
+	  }
+
+	  private int removePossessivePronoun(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "ku") || StemmerUtil.EndsWith(text, length, "mu"))
+		{
+		  numSyllables--;
+		  return length - 2;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "nya"))
+		{
+		  numSyllables--;
+		  return length - 3;
+		}
+
+		return length;
+	  }
+
+	  private int removeFirstOrderPrefix(char[] text, int length)
+	  {
+		if (StemmerUtil.StartsWith(text, length, "meng"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "meny") && length > 4 && isVowel(text[4]))
+		{
+		  flags |= REMOVED_MENG;
+		  text[3] = 's';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "men"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "mem"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "me"))
+		{
+		  flags |= REMOVED_MENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peng"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peny") && length > 4 && isVowel(text[4]))
+		{
+		  flags |= REMOVED_PENG;
+		  text[3] = 's';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "peny"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 4);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pen") && length > 3 && isVowel(text[3]))
+		{
+		  flags |= REMOVED_PENG;
+		  text[2] = 't';
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pen"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pem"))
+		{
+		  flags |= REMOVED_PENG;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "di"))
+		{
+		  flags |= REMOVED_DI;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "ter"))
+		{
+		  flags |= REMOVED_TER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "ke"))
+		{
+		  flags |= REMOVED_KE;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		return length;
+	  }
+
+	  private int removeSecondOrderPrefix(char[] text, int length)
+	  {
+		if (StemmerUtil.StartsWith(text, length, "ber"))
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (length == 7 && StemmerUtil.StartsWith(text, length, "belajar"))
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "be") && length > 4 && !isVowel(text[2]) && text[3] == 'e' && text[4] == 'r')
+		{
+		  flags |= REMOVED_BER;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "per"))
+		{
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (length == 7 && StemmerUtil.StartsWith(text, length, "pelajar"))
+		{
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 3);
+		}
+
+		if (StemmerUtil.StartsWith(text, length, "pe"))
+		{
+		  flags |= REMOVED_PE;
+		  numSyllables--;
+		  return StemmerUtil.deleteN(text, 0, length, 2);
+		}
+
+		return length;
+	  }
+
+	  private int removeSuffix(char[] text, int length)
+	  {
+		if (StemmerUtil.EndsWith(text, length, "kan") && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0 && (flags & REMOVED_PE) == 0)
+		{
+		  numSyllables--;
+		  return length - 3;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "an") && (flags & REMOVED_DI) == 0 && (flags & REMOVED_MENG) == 0 && (flags & REMOVED_TER) == 0)
+		{
+		  numSyllables--;
+		  return length - 2;
+		}
+
+		if (StemmerUtil.EndsWith(text, length, "i") && !StemmerUtil.EndsWith(text, length, "si") && (flags & REMOVED_BER) == 0 && (flags & REMOVED_KE) == 0 && (flags & REMOVED_PENG) == 0)
+		{
+		  numSyllables--;
+		  return length - 1;
+		}
+		return length;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
new file mode 100644
index 0000000..ef3ee00
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilter.cs
@@ -0,0 +1,52 @@
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="IndicNormalizer"/> to normalize text
+	/// in Indian Languages.
+	/// </summary>
+	public sealed class IndicNormalizationFilter : TokenFilter
+	{
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly IndicNormalizer normalizer = new IndicNormalizer();
+
+	  public IndicNormalizationFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  termAtt.Length = normalizer.normalize(termAtt.buffer(), termAtt.length());
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
new file mode 100644
index 0000000..f4112ad
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizationFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="IndicNormalizationFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.IndicNormalizationFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class IndicNormalizationFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+	{
+
+	  /// <summary>
+	  /// Creates a new IndicNormalizationFilterFactory </summary>
+	  public IndicNormalizationFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new IndicNormalizationFilter(input);
+	  }
+
+	  public virtual AbstractAnalysisFactory MultiTermComponent
+	  {
+		  get
+		  {
+			return this;
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
new file mode 100644
index 0000000..599e030
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
@@ -0,0 +1,194 @@
+using System.Collections;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static Character.UnicodeBlock.*;
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Normalizes the Unicode representation of text in Indian languages.
+	/// <para>
+	/// Follows guidelines from Unicode 5.2, chapter 6, South Asian Scripts I
+	/// and graphical decompositions from http://ldc.upenn.edu/myl/IndianScriptsUnicode.html
+	/// </para>
+	/// </summary>
+	public class IndicNormalizer
+	{
+
+	  private class ScriptData
+	  {
+		internal readonly int flag;
+		internal readonly int @base;
+		internal BitArray decompMask;
+
+		internal ScriptData(int flag, int @base)
+		{
+		  this.flag = flag;
+		  this.@base = @base;
+		}
+	  }
+
+	  private static readonly IdentityHashMap<char.UnicodeBlock, ScriptData> scripts = new IdentityHashMap<char.UnicodeBlock, ScriptData>(9);
+
+	  private static int flag(char.UnicodeBlock ub)
+	  {
+		return scripts.get(ub).flag;
+	  }
+
+	  static IndicNormalizer()
+	  {
+		scripts.put(DEVANAGARI, new ScriptData(1, 0x0900));
+		scripts.put(BENGALI, new ScriptData(2, 0x0980));
+		scripts.put(GURMUKHI, new ScriptData(4, 0x0A00));
+		scripts.put(GUJARATI, new ScriptData(8, 0x0A80));
+		scripts.put(ORIYA, new ScriptData(16, 0x0B00));
+		scripts.put(TAMIL, new ScriptData(32, 0x0B80));
+		scripts.put(TELUGU, new ScriptData(64, 0x0C00));
+		scripts.put(KANNADA, new ScriptData(128, 0x0C80));
+		scripts.put(MALAYALAM, new ScriptData(256, 0x0D00));
+		foreach (ScriptData sd in scripts.values())
+		{
+		  sd.decompMask = new BitArray(0x7F);
+		  for (int i = 0; i < decompositions.Length; i++)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = decompositions[i][0];
+			int ch = decompositions[i][0];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int flags = decompositions[i][4];
+			int flags = decompositions[i][4];
+			if ((flags & sd.flag) != 0)
+			{
+			  sd.decompMask.Set(ch, true);
+			}
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Decompositions according to Unicode 5.2, 
+	  /// and http://ldc.upenn.edu/myl/IndianScriptsUnicode.html
+	  /// 
+	  /// Most of these are not handled by unicode normalization anyway.
+	  /// 
+	  /// The numbers here represent offsets into the respective codepages,
+	  /// with -1 representing null and 0xFF representing zero-width joiner.
+	  /// 
+	  /// the columns are: ch1, ch2, ch3, res, flags
+	  /// ch1, ch2, and ch3 are the decomposition
+	  /// res is the composition, and flags are the scripts to which it applies.
+	  /// </summary>
+	  private static readonly int[][] decompositions = {};
+
+
+	  /// <summary>
+	  /// Normalizes input text, and returns the new length.
+	  /// The length will always be less than or equal to the existing length.
+	  /// </summary>
+	  /// <param name="text"> input text </param>
+	  /// <param name="len"> valid length </param>
+	  /// <returns> normalized length </returns>
+	  public virtual int normalize(char[] text, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Character.UnicodeBlock block = Character.UnicodeBlock.of(text[i]);
+		  char.UnicodeBlock block = char.UnicodeBlock.of(text[i]);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ScriptData sd = scripts.get(block);
+		  ScriptData sd = scripts.get(block);
+		  if (sd != null)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = text[i] - sd.base;
+			int ch = text[i] - sd.@base;
+			if (sd.decompMask.Get(ch))
+			{
+			  len = compose(ch, block, sd, text, i, len);
+			}
+		  }
+		}
+		return len;
+	  }
+
+	  /// <summary>
+	  /// Compose into standard form any compositions in the decompositions table.
+	  /// </summary>
+	  private int compose(int ch0, char.UnicodeBlock block0, ScriptData sd, char[] text, int pos, int len)
+	  {
+		if (pos + 1 >= len) // need at least 2 chars!
+		{
+		  return len;
+		}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch1 = text[pos + 1] - sd.base;
+		int ch1 = text[pos + 1] - sd.@base;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Character.UnicodeBlock block1 = Character.UnicodeBlock.of(text[pos + 1]);
+		char.UnicodeBlock block1 = char.UnicodeBlock.of(text[pos + 1]);
+		if (block1 != block0) // needs to be the same writing system
+		{
+		  return len;
+		}
+
+		int ch2 = -1;
+
+		if (pos + 2 < len)
+		{
+		  ch2 = text[pos + 2] - sd.@base;
+		  char.UnicodeBlock block2 = char.UnicodeBlock.of(text[pos + 2]);
+		  if (text[pos + 2] == '\u200D') // ZWJ
+		  {
+			ch2 = 0xFF;
+		  }
+		  else if (block2 != block1) // still allow a 2-char match
+		  {
+			ch2 = -1;
+		  }
+		}
+
+		for (int i = 0; i < decompositions.Length; i++)
+		{
+		  if (decompositions[i][0] == ch0 && (decompositions[i][4] & sd.flag) != 0)
+		  {
+			if (decompositions[i][1] == ch1 && (decompositions[i][2] < 0 || decompositions[i][2] == ch2))
+			{
+			  text[pos] = (char)(sd.@base + decompositions[i][3]);
+			  len = StemmerUtil.delete(text, pos + 1, len);
+			  if (decompositions[i][2] >= 0)
+			  {
+				len = StemmerUtil.delete(text, pos + 1, len);
+			  }
+			  return len;
+			}
+		  }
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
new file mode 100644
index 0000000..2355448
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicTokenizer.cs
@@ -0,0 +1,48 @@
+using System;
+
+namespace org.apache.lucene.analysis.@in
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharTokenizer = org.apache.lucene.analysis.util.CharTokenizer;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer; // javadocs
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Simple Tokenizer for text in Indian Languages. </summary>
+	/// @deprecated (3.6) Use <seealso cref="StandardTokenizer"/> instead. 
+	[Obsolete("(3.6) Use <seealso cref="StandardTokenizer"/> instead.")]
+	public sealed class IndicTokenizer : CharTokenizer
+	{
+
+	  public IndicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(matchVersion, factory, input)
+	  {
+	  }
+
+	  public IndicTokenizer(Version matchVersion, Reader input) : base(matchVersion, input)
+	  {
+	  }
+
+	  protected internal override bool isTokenChar(int c)
+	  {
+		return char.IsLetter(c) || char.getType(c) == char.NON_SPACING_MARK || char.getType(c) == char.FORMAT || char.getType(c) == char.COMBINING_SPACING_MARK;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
new file mode 100644
index 0000000..053fe5b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianAnalyzer.cs
@@ -0,0 +1,164 @@
+using System;
+
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ElisionFilter = org.apache.lucene.analysis.util.ElisionFilter;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using ItalianStemmer = org.tartarus.snowball.ext.ItalianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Italian.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating ItalianAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, ItalianLightStemFilter is used for less aggressive stemming.
+	///   <li> As of 3.2, ElisionFilter with a set of Italian 
+	///        contractions is used by default.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class ItalianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Italian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+
+	  private static readonly CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList("c", "l", "all", "dall", "dell", "nell", "sull", "coll", "pell", "gl", "agl", "dagl", "degl", "negl", "sugl", "un", "m", "t", "s", "v", "d"), true));
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public ItalianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public ItalianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="ElisionFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="ItalianLightStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		if (matchVersion.onOrAfter(Version.LUCENE_32))
+		{
+		  result = new ElisionFilter(result, DEFAULT_ARTICLES);
+		}
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new ItalianLightStemFilter(result);
+		}
+		else
+		{
+		  result = new SnowballFilter(result, new ItalianStemmer());
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
new file mode 100644
index 0000000..1b1023a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="ItalianLightStemmer"/> to stem Italian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class ItalianLightStemFilter : TokenFilter
+	{
+	  private readonly ItalianLightStemmer stemmer = new ItalianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public ItalianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
new file mode 100644
index 0000000..8377b02
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="ItalianLightStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.ItalianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre> 
+	/// </summary>
+	public class ItalianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new ItalianLightStemFilterFactory </summary>
+	  public ItalianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new ItalianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
new file mode 100644
index 0000000..c125272
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/It/ItalianLightStemmer.cs
@@ -0,0 +1,155 @@
+namespace org.apache.lucene.analysis.it
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	/// <summary>
+	/// Light Stemmer for Italian.
+	/// <para>
+	/// This stemmer implements the algorithm described in:
+	/// <i>Report on CLEF-2001 Experiments</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class ItalianLightStemmer
+	{
+
+	  public virtual int stem(char[] s, int len)
+	  {
+		if (len < 6)
+		{
+		  return len;
+		}
+
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'à':
+			case 'á':
+			case 'â':
+			case 'ä':
+				s[i] = 'a';
+				break;
+			case 'ò':
+			case 'ó':
+			case 'ô':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'è':
+			case 'é':
+			case 'ê':
+			case 'ë':
+				s[i] = 'e';
+				break;
+			case 'ù':
+			case 'ú':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+			case 'ì':
+			case 'í':
+			case 'î':
+			case 'ï':
+				s[i] = 'i';
+				break;
+		  }
+		}
+
+		switch (s[len - 1])
+		{
+		  case 'e':
+			if (s[len - 2] == 'i' || s[len - 2] == 'h')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'i':
+			if (s[len - 2] == 'h' || s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'a':
+			if (s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		  case 'o':
+			if (s[len - 2] == 'i')
+			{
+			  return len - 2;
+			}
+			else
+			{
+			  return len - 1;
+			}
+		}
+
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
new file mode 100644
index 0000000..1a65410
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianAnalyzer.cs
@@ -0,0 +1,137 @@
+using System;
+
+namespace org.apache.lucene.analysis.lv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Latvian.
+	/// </summary>
+	public sealed class LatvianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Latvian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "stopwords.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(typeof(LatvianAnalyzer), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public LatvianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public LatvianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="LatvianStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new LatvianStemFilter(result);
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
new file mode 100644
index 0000000..3c18f24
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Lv/LatvianStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.lv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="LatvianStemmer"/> to stem Latvian
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class LatvianStemFilter : TokenFilter
+	{
+	  private readonly LatvianStemmer stemmer = new LatvianStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public LatvianStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message