lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [20/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:24 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
new file mode 100644
index 0000000..4f068b3
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemFilterFactory.cs
@@ -0,0 +1,54 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HindiStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.HindiStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HindiStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HindiStemFilterFactory </summary>
+	  public HindiStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new HindiStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
new file mode 100644
index 0000000..b177a31
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hi/HindiStemmer.cs
@@ -0,0 +1,71 @@
+namespace org.apache.lucene.analysis.hi
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Hindi.
+	/// <para>
+	/// Implements the algorithm specified in:
+	/// <i>A Lightweight Stemmer for Hindi</i>
+	/// Ananthakrishnan Ramanathan and Durgesh D Rao.
+	/// http://computing.open.ac.uk/Sites/EACLSouthAsia/Papers/p6-Ramanathan.pdf
+	/// </para>
+	/// </summary>
+	public class HindiStemmer
+	{
+	  public virtual int stem(char[] buffer, int len)
+	  {
+		// 5
+		if ((len > 6) && (StemmerUtil.EndsWith(buffer, len, "ाएंगी") || StemmerUtil.EndsWith(buffer, len, "ाएंगे") || StemmerUtil.EndsWith(buffer, len, "ाऊंगी") || StemmerUtil.EndsWith(buffer, len, "ाऊंगा") || StemmerUtil.EndsWith(buffer, len, "ाइयाँ") || StemmerUtil.EndsWith(buffer, len, "ाइयों") || StemmerUtil.EndsWith(buffer, len, "ाइयां")))
+		{
+		  return len - 5;
+		}
+
+		// 4
+		if ((len > 5) && (StemmerUtil.EndsWith(buffer, len, "ाएगी") || StemmerUtil.EndsWith(buffer, len, "ाएगा") || StemmerUtil.EndsWith(buffer, len, "ाओगी") || StemmerUtil.EndsWith(buffer, len, "ाओगे") || StemmerUtil.EndsWith(buffer, len, "एंगी") || StemmerUtil.EndsWith(buffer, len, "ेंगी") || StemmerUtil.EndsWith(buffer, len, "एंगे") || StemmerUtil.EndsWith(buffer, len, "ेंगे") || StemmerUtil.EndsWith(buffer, len, "ूंगी") || StemmerUtil.EndsWith(buffer, len, "ूंगा") || StemmerUtil.EndsWith(buffer, len, "ातीं") || StemmerUtil.EndsWith(buffer, len, "नाओं") || StemmerUtil.EndsWith(buffer, len, "नाएं") || StemmerUtil.EndsWith(buffer, len, "ताओं") || StemmerUtil.EndsWith(buffer, len, "ताएं") || StemmerUtil.EndsWith(buffer, len, "ियाँ") || StemmerUtil.EndsWith(buffer, len, "ियों") || StemmerUtil.EndsWith(buffer, len, "ियां")))
+		{
+		  return len - 4;
+		}
+
+		// 3
+		if ((len > 4) && (StemmerUtil.EndsWith(buffer, len, "ाकर") || StemmerUtil.EndsWith(buffer, len, "ाइए") || StemmerUtil.EndsWith(buffer, len, "ाईं") || StemmerUtil.EndsWith(buffer, len, "ाया") || StemmerUtil.EndsWith(buffer, len, "ेगी") || StemmerUtil.EndsWith(buffer, len, "ेगा") || StemmerUtil.EndsWith(buffer, len, "ोगी") || StemmerUtil.EndsWith(buffer, len, "ोगे") || StemmerUtil.EndsWith(buffer, len, "ाने") || StemmerUtil.EndsWith(buffer, len, "ाना") || StemmerUtil.EndsWith(buffer, len, "ाते") || StemmerUtil.EndsWith(buffer, len, "ाती") || StemmerUtil.EndsWith(buffer, len, "ाता") || StemmerUtil.EndsWith(buffer, len, "तीं") || StemmerUtil.EndsWith(buffer, len, "ाओं") || StemmerUtil.EndsWith(buffer, len, "ाएं") || StemmerUtil.EndsWith(buffer, len, "ुओं") || StemmerUtil.EndsWith(buffer, len, "ुएं") || StemmerUtil.EndsWith(buffer, len, "ुआं")))
+		{
+		  return len - 3;
+		}
+
+		// 2
+		if ((len > 3) && (StemmerUtil.EndsWith(buffer, len, "कर") || StemmerUtil.EndsWith(buffer, len, "ाओ") || StemmerUtil.EndsWith(buffer, len, "िए") || StemmerUtil.EndsWith(buffer, len, "ाई") || StemmerUtil.EndsWith(buffer, len, "ाए") || StemmerUtil.EndsWith(buffer, len, "ने") || StemmerUtil.EndsWith(buffer, len, "नी") || StemmerUtil.EndsWith(buffer, len, "ना") || StemmerUtil.EndsWith(buffer, len, "ते") || StemmerUtil.EndsWith(buffer, len, "ीं") || StemmerUtil.EndsWith(buffer, len, "ती") || StemmerUtil.EndsWith(buffer, len, "ता") || StemmerUtil.EndsWith(buffer, len, "ाँ") || StemmerUtil.EndsWith(buffer, len, "ां") || StemmerUtil.EndsWith(buffer, len, "ों") || StemmerUtil.EndsWith(buffer, len, "ें")))
+		{
+		  return len - 2;
+		}
+
+		// 1
+		if ((len > 2) && (StemmerUtil.EndsWith(buffer, len, "ो") || StemmerUtil.EndsWith(buffer, len, "े") || StemmerUtil.EndsWith(buffer, len, "ू") || StemmerUtil.EndsWith(buffer, len, "ु") || StemmerUtil.EndsWith(buffer, len, "ी") || StemmerUtil.EndsWith(buffer, len, "ि") || StemmerUtil.EndsWith(buffer, len, "ा")))
+		{
+		  return len - 1;
+		}
+		return len;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
new file mode 100644
index 0000000..a47d86a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianAnalyzer.cs
@@ -0,0 +1,139 @@
+using System;
+
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using HungarianStemmer = org.tartarus.snowball.ext.HungarianStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Hungarian.
+	/// </summary>
+	public sealed class HungarianAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Hungarian stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public HungarianAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public HungarianAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SnowballFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		result = new SnowballFilter(result, new HungarianStemmer());
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
new file mode 100644
index 0000000..c51040b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="HungarianLightStemmer"/> to stem
+	/// Hungarian words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class HungarianLightStemFilter : TokenFilter
+	{
+	  private readonly HungarianLightStemmer stemmer = new HungarianLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public HungarianLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
new file mode 100644
index 0000000..090c64b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="HungarianLightStemFilter"/>. 
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.HungarianLightStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class HungarianLightStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new HungarianLightStemFilterFactory </summary>
+	  public HungarianLightStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream input)
+	  {
+		return new HungarianLightStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
new file mode 100644
index 0000000..eb29272
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hu/HungarianLightStemmer.cs
@@ -0,0 +1,292 @@
+namespace org.apache.lucene.analysis.hu
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/* 
+	 * This algorithm is updated based on code located at:
+	 * http://members.unine.ch/jacques.savoy/clef/
+	 * 
+	 * Full copyright for that code follows:
+	 */
+
+	/*
+	 * Copyright (c) 2005, Jacques Savoy
+	 * All rights reserved.
+	 *
+	 * Redistribution and use in source and binary forms, with or without 
+	 * modification, are permitted provided that the following conditions are met:
+	 *
+	 * Redistributions of source code must retain the above copyright notice, this 
+	 * list of conditions and the following disclaimer. Redistributions in binary 
+	 * form must reproduce the above copyright notice, this list of conditions and
+	 * the following disclaimer in the documentation and/or other materials 
+	 * provided with the distribution. Neither the name of the author nor the names 
+	 * of its contributors may be used to endorse or promote products derived from 
+	 * this software without specific prior written permission.
+	 * 
+	 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+	 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+	 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+	 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+	 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+	 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+	 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+	 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+	 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+	 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+	 * POSSIBILITY OF SUCH DAMAGE.
+	 */
+
+	using org.apache.lucene.analysis.util;
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+	/// <summary>
+	/// Light Stemmer for Hungarian.
+	/// <para>
+	/// This stemmer implements the "UniNE" algorithm in:
+	/// <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+	/// Jacques Savoy
+	/// </para>
+	/// </summary>
+	public class HungarianLightStemmer
+	{
+	  public virtual int stem(char[] s, int len)
+	  {
+		for (int i = 0; i < len; i++)
+		{
+		  switch (s[i])
+		  {
+			case 'á':
+				s[i] = 'a';
+				break;
+			case 'ë':
+			case 'é':
+				s[i] = 'e';
+				break;
+			case 'í':
+				s[i] = 'i';
+				break;
+			case 'ó':
+			case 'ő':
+			case 'õ':
+			case 'ö':
+				s[i] = 'o';
+				break;
+			case 'ú':
+			case 'ű':
+			case 'ũ':
+			case 'û':
+			case 'ü':
+				s[i] = 'u';
+				break;
+		  }
+		}
+
+		len = removeCase(s, len);
+		len = removePossessive(s, len);
+		len = removePlural(s, len);
+		return normalize(s, len);
+	  }
+
+	  private int removeCase(char[] s, int len)
+	  {
+		if (len > 6 && StemmerUtil.EndsWith(s, len, "kent"))
+		{
+		  return len - 4;
+		}
+
+		if (len > 5)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "nak") || StemmerUtil.EndsWith(s, len, "nek") || StemmerUtil.EndsWith(s, len, "val") || StemmerUtil.EndsWith(s, len, "vel") || StemmerUtil.EndsWith(s, len, "ert") || StemmerUtil.EndsWith(s, len, "rol") || StemmerUtil.EndsWith(s, len, "ban") || StemmerUtil.EndsWith(s, len, "ben") || StemmerUtil.EndsWith(s, len, "bol") || StemmerUtil.EndsWith(s, len, "nal") || StemmerUtil.EndsWith(s, len, "nel") || StemmerUtil.EndsWith(s, len, "hoz") || StemmerUtil.EndsWith(s, len, "hez") || StemmerUtil.EndsWith(s, len, "tol"))
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "al") || StemmerUtil.EndsWith(s, len, "el"))
+		  {
+			if (!isVowel(s[len - 3]) && s[len - 3] == s[len - 4])
+			{
+			  return len - 3;
+			}
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (StemmerUtil.EndsWith(s, len, "at") || StemmerUtil.EndsWith(s, len, "et") || StemmerUtil.EndsWith(s, len, "ot") || StemmerUtil.EndsWith(s, len, "va") || StemmerUtil.EndsWith(s, len, "ve") || StemmerUtil.EndsWith(s, len, "ra") || StemmerUtil.EndsWith(s, len, "re") || StemmerUtil.EndsWith(s, len, "ba") || StemmerUtil.EndsWith(s, len, "be") || StemmerUtil.EndsWith(s, len, "ul") || StemmerUtil.EndsWith(s, len, "ig"))
+		  {
+			return len - 2;
+		  }
+
+		  if ((StemmerUtil.EndsWith(s, len, "on") || StemmerUtil.EndsWith(s, len, "en")) && !isVowel(s[len - 3]))
+		  {
+			  return len - 2;
+		  }
+
+		  switch (s[len - 1])
+		  {
+			case 't':
+			case 'n':
+				return len - 1;
+			case 'a':
+			case 'e':
+				if (s[len - 2] == s[len - 3] && !isVowel(s[len - 2]))
+				{
+					return len - 2;
+				}
+		  }
+		}
+
+		return len;
+	  }
+
+	  private int removePossessive(char[] s, int len)
+	  {
+		if (len > 6)
+		{
+		  if (!isVowel(s[len - 5]) && (StemmerUtil.EndsWith(s, len, "atok") || StemmerUtil.EndsWith(s, len, "otok") || StemmerUtil.EndsWith(s, len, "etek")))
+		  {
+			return len - 4;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "itek") || StemmerUtil.EndsWith(s, len, "itok"))
+		  {
+			return len - 4;
+		  }
+		}
+
+		if (len > 5)
+		{
+		  if (!isVowel(s[len - 4]) && (StemmerUtil.EndsWith(s, len, "unk") || StemmerUtil.EndsWith(s, len, "tok") || StemmerUtil.EndsWith(s, len, "tek")))
+		  {
+			return len - 3;
+		  }
+
+		  if (isVowel(s[len - 4]) && StemmerUtil.EndsWith(s, len, "juk"))
+		  {
+			return len - 3;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "ink"))
+		  {
+			return len - 3;
+		  }
+		}
+
+		if (len > 4)
+		{
+		  if (!isVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "am") || StemmerUtil.EndsWith(s, len, "em") || StemmerUtil.EndsWith(s, len, "om") || StemmerUtil.EndsWith(s, len, "ad") || StemmerUtil.EndsWith(s, len, "ed") || StemmerUtil.EndsWith(s, len, "od") || StemmerUtil.EndsWith(s, len, "uk")))
+		  {
+			return len - 2;
+		  }
+
+		  if (isVowel(s[len - 3]) && (StemmerUtil.EndsWith(s, len, "nk") || StemmerUtil.EndsWith(s, len, "ja") || StemmerUtil.EndsWith(s, len, "je")))
+		  {
+			return len - 2;
+		  }
+
+		  if (StemmerUtil.EndsWith(s, len, "im") || StemmerUtil.EndsWith(s, len, "id") || StemmerUtil.EndsWith(s, len, "ik"))
+		  {
+			return len - 2;
+		  }
+		}
+
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a':
+			case 'e':
+				if (!isVowel(s[len - 2]))
+				{
+					return len - 1;
+				}
+				break;
+			case 'm':
+			case 'd':
+				if (isVowel(s[len - 2]))
+				{
+					return len - 1;
+				}
+				break;
+			case 'i':
+				return len - 1;
+		  }
+		}
+
+		return len;
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("fallthrough") private int removePlural(char s[] , int len)
+	  private int removePlural(char[] s, int len)
+	  {
+		if (len > 3 && s[len - 1] == 'k')
+		{
+		  switch (s[len - 2])
+		  {
+			case 'a':
+			case 'o':
+			case 'e': // intentional fallthru
+				if (len > 4)
+				{
+					return len - 2;
+				}
+			default:
+				return len - 1;
+		  }
+		}
+		return len;
+	  }
+
+	  private int normalize(char[] s, int len)
+	  {
+		if (len > 3)
+		{
+		  switch (s[len - 1])
+		  {
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+				return len - 1;
+		  }
+		}
+		return len;
+	  }
+
+	  private bool isVowel(char ch)
+	  {
+		switch (ch)
+		{
+		  case 'a':
+		  case 'e':
+		  case 'i':
+		  case 'o':
+		  case 'u':
+		  case 'y':
+			  return true;
+		  default:
+			  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
new file mode 100644
index 0000000..ff6f4e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -0,0 +1,1235 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using ByteArrayDataOutput = org.apache.lucene.store.ByteArrayDataOutput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using BytesRefHash = org.apache.lucene.util.BytesRefHash;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using IntsRef = org.apache.lucene.util.IntsRef;
+	using OfflineSorter = org.apache.lucene.util.OfflineSorter;
+	using ByteSequencesReader = org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
+	using ByteSequencesWriter = org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+	using RegExp = org.apache.lucene.util.automaton.RegExp;
+	using Builder = org.apache.lucene.util.fst.Builder;
+	using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
+	using FST = org.apache.lucene.util.fst.FST;
+	using IntSequenceOutputs = org.apache.lucene.util.fst.IntSequenceOutputs;
+	using Outputs = org.apache.lucene.util.fst.Outputs;
+	using Util = org.apache.lucene.util.fst.Util;
+
+
+	/// <summary>
+	/// In-memory structure for the dictionary (.dic) and affix (.aff)
+	/// data of a hunspell dictionary.
+	/// </summary>
+	public class Dictionary
+	{
+
+	  internal static readonly char[] NOFLAGS = new char[0];
+
+	  private const string ALIAS_KEY = "AF";
+	  private const string PREFIX_KEY = "PFX";
+	  private const string SUFFIX_KEY = "SFX";
+	  private const string FLAG_KEY = "FLAG";
+	  private const string COMPLEXPREFIXES_KEY = "COMPLEXPREFIXES";
+	  private const string CIRCUMFIX_KEY = "CIRCUMFIX";
+	  private const string IGNORE_KEY = "IGNORE";
+	  private const string ICONV_KEY = "ICONV";
+	  private const string OCONV_KEY = "OCONV";
+
+	  private const string NUM_FLAG_TYPE = "num";
+	  private const string UTF8_FLAG_TYPE = "UTF-8";
+	  private const string LONG_FLAG_TYPE = "long";
+
+	  // TODO: really for suffixes we should reverse the automaton and run them backwards
+	  private const string PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
+	  private const string SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
+
+	  internal FST<IntsRef> prefixes;
+	  internal FST<IntsRef> suffixes;
+
+	  // all condition checks used by prefixes and suffixes. these are typically re-used across
+	  // many affix stripping rules. so these are deduplicated, to save RAM.
+	  internal List<CharacterRunAutomaton> patterns = new List<CharacterRunAutomaton>();
+
+	  // the entries in the .dic file, mapping to their set of flags.
+	  // the fst output is the ordinal list for flagLookup
+	  internal FST<IntsRef> words;
+	  // the list of unique flagsets (wordforms). theoretically huge, but practically
+	  // small (e.g. for polish this is 756), otherwise humans wouldn't be able to deal with it either.
+	  internal BytesRefHash flagLookup = new BytesRefHash();
+
+	  // the list of unique strip affixes.
+	  internal char[] stripData;
+	  internal int[] stripOffsets;
+
+	  // 8 bytes per affix
+	  internal sbyte[] affixData = new sbyte[64];
+	  private int currentAffix = 0;
+
+	  private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+
+	  private string[] aliases;
+	  private int aliasCount = 0;
+
+	  private readonly File tempDir = OfflineSorter.defaultTempDir(); // TODO: make this configurable?
+
+	  internal bool ignoreCase;
+	  internal bool complexPrefixes;
+	  internal bool twoStageAffix; // if no affixes have continuation classes, no need to do 2-level affix stripping
+
+	  internal int circumfix = -1; // circumfix flag, or -1 if one is not defined
+
+	  // ignored characters (dictionary, affix, inputs)
+	  private char[] ignore;
+
+	  // FSTs used for ICONV/OCONV, output ord pointing to replacement text
+	  internal FST<CharsRef> iconv;
+	  internal FST<CharsRef> oconv;
+
+	  internal bool needsInputCleaning;
+	  internal bool needsOutputCleaning;
+
+	  /// <summary>
+	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+	  /// and dictionary files.
+	  /// You have to close the provided InputStreams yourself.
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+	  /// <param name="dictionary"> InputStream for reading the hunspell dictionary file (won't be closed). </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.io.InputStream dictionary) throws java.io.IOException, java.text.ParseException
+	  public Dictionary(InputStream affix, InputStream dictionary) : this(affix, Collections.singletonList(dictionary), false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+	  /// and dictionary files.
+	  /// You have to close the provided InputStreams yourself.
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the hunspell affix file (won't be closed). </param>
+	  /// <param name="dictionaries"> InputStream for reading the hunspell dictionary files (won't be closed). </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStreams </exception>
+	  /// <exception cref="ParseException"> Can be thrown if the content of the files does not meet expected formats </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Dictionary(java.io.InputStream affix, java.util.List<java.io.InputStream> dictionaries, boolean ignoreCase) throws java.io.IOException, java.text.ParseException
+	  public Dictionary(InputStream affix, IList<InputStream> dictionaries, bool ignoreCase)
+	  {
+		this.ignoreCase = ignoreCase;
+		this.needsInputCleaning = ignoreCase;
+		this.needsOutputCleaning = false; // set if we have an OCONV
+		flagLookup.add(new BytesRef()); // no flags -> ord 0
+
+		File aff = File.createTempFile("affix", "aff", tempDir);
+		OutputStream @out = new BufferedOutputStream(new FileOutputStream(aff));
+		InputStream aff1 = null;
+		InputStream aff2 = null;
+		try
+		{
+		  // copy contents of affix stream to temp file
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte [] buffer = new byte [1024 * 8];
+		  sbyte[] buffer = new sbyte [1024 * 8];
+		  int len;
+		  while ((len = affix.read(buffer)) > 0)
+		  {
+			@out.write(buffer, 0, len);
+		  }
+		  @out.close();
+
+		  // pass 1: get encoding
+		  aff1 = new BufferedInputStream(new FileInputStream(aff));
+		  string encoding = getDictionaryEncoding(aff1);
+
+		  // pass 2: parse affixes
+		  CharsetDecoder decoder = getJavaEncoding(encoding);
+		  aff2 = new BufferedInputStream(new FileInputStream(aff));
+		  readAffixFile(aff2, decoder);
+
+		  // read dictionary entries
+		  IntSequenceOutputs o = IntSequenceOutputs.Singleton;
+		  Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+		  readDictionaryFiles(dictionaries, decoder, b);
+		  words = b.finish();
+		  aliases = null; // no longer needed
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(@out, aff1, aff2);
+		  aff.delete();
+		}
+	  }
+
+	  /// <summary>
+	  /// Looks up Hunspell word forms from the dictionary
+	  /// </summary>
+	  internal virtual IntsRef lookupWord(char[] word, int offset, int length)
+	  {
+		return lookup(words, word, offset, length);
+	  }
+
+	  /// <summary>
+	  /// Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length
+	  /// </summary>
+	  /// <param name="word"> Char array to generate the String from </param>
+	  /// <param name="offset"> Offset in the char array that the String starts at </param>
+	  /// <param name="length"> Length from the offset that the String is </param>
+	  /// <returns> List of HunspellAffix prefixes with an append that matches the String, or {@code null} if none are found </returns>
+	  internal virtual IntsRef lookupPrefix(char[] word, int offset, int length)
+	  {
+		return lookup(prefixes, word, offset, length);
+	  }
+
+	  /// <summary>
+	  /// Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length
+	  /// </summary>
+	  /// <param name="word"> Char array to generate the String from </param>
+	  /// <param name="offset"> Offset in the char array that the String starts at </param>
+	  /// <param name="length"> Length from the offset that the String is </param>
+	  /// <returns> List of HunspellAffix suffixes with an append that matches the String, or {@code null} if none are found </returns>
+	  internal virtual IntsRef lookupSuffix(char[] word, int offset, int length)
+	  {
+		return lookup(suffixes, word, offset, length);
+	  }
+
+	  // TODO: this is pretty stupid, considering how the stemming algorithm works
+	  // we can speed it up to be significantly faster!
+	  internal virtual IntsRef lookup(FST<IntsRef> fst, char[] word, int offset, int length)
+	  {
+		if (fst == null)
+		{
+		  return null;
+		}
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
+		FST.BytesReader bytesReader = fst.BytesReader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef> arc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.IntsRef>());
+		FST.Arc<IntsRef> arc = fst.getFirstArc(new FST.Arc<IntsRef>());
+		// Accumulate output as we go
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.IntsRef NO_OUTPUT = fst.outputs.getNoOutput();
+		IntsRef NO_OUTPUT = fst.outputs.NoOutput;
+		IntsRef output = NO_OUTPUT;
+
+		int l = offset + length;
+		try
+		{
+		  for (int i = offset, cp = 0; i < l; i += char.charCount(cp))
+		  {
+			cp = char.codePointAt(word, i, l);
+			if (fst.findTargetArc(cp, arc, arc, bytesReader) == null)
+			{
+			  return null;
+			}
+			else if (arc.output != NO_OUTPUT)
+			{
+			  output = fst.outputs.add(output, arc.output);
+			}
+		  }
+		  if (fst.findTargetArc(FST.END_LABEL, arc, arc, bytesReader) == null)
+		  {
+			return null;
+		  }
+		  else if (arc.output != NO_OUTPUT)
+		  {
+			return fst.outputs.add(output, arc.output);
+		  }
+		  else
+		  {
+			return output;
+		  }
+		}
+		catch (IOException bogus)
+		{
+		  throw new Exception(bogus);
+		}
+	  }
+
+	  /// <summary>
+	  /// Reads the affix file through the provided InputStream, building up the prefix and suffix maps
+	  /// </summary>
+	  /// <param name="affixStream"> InputStream to read the content of the affix file from </param>
+	  /// <param name="decoder"> CharsetDecoder to decode the content of the file </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void readAffixFile(java.io.InputStream affixStream, java.nio.charset.CharsetDecoder decoder) throws java.io.IOException, java.text.ParseException
+	  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder)
+	  {
+		SortedDictionary<string, IList<char?>> prefixes = new SortedDictionary<string, IList<char?>>();
+		SortedDictionary<string, IList<char?>> suffixes = new SortedDictionary<string, IList<char?>>();
+		IDictionary<string, int?> seenPatterns = new Dictionary<string, int?>();
+
+		// zero condition -> 0 ord
+		seenPatterns[".*"] = 0;
+		patterns.Add(null);
+
+		// zero strip -> 0 ord
+		IDictionary<string, int?> seenStrips = new LinkedHashMap<string, int?>();
+		seenStrips[""] = 0;
+
+		LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
+		string line = null;
+		while ((line = reader.readLine()) != null)
+		{
+		  // ignore any BOM marker on first line
+		  if (reader.LineNumber == 1 && line.StartsWith("\uFEFF", StringComparison.Ordinal))
+		  {
+			line = line.Substring(1);
+		  }
+		  if (line.StartsWith(ALIAS_KEY, StringComparison.Ordinal))
+		  {
+			parseAlias(line);
+		  }
+		  else if (line.StartsWith(PREFIX_KEY, StringComparison.Ordinal))
+		  {
+			parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+		  }
+		  else if (line.StartsWith(SUFFIX_KEY, StringComparison.Ordinal))
+		  {
+			parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, seenPatterns, seenStrips);
+		  }
+		  else if (line.StartsWith(FLAG_KEY, StringComparison.Ordinal))
+		  {
+			// Assume that the FLAG line comes before any prefix or suffixes
+			// Store the strategy so it can be used when parsing the dic file
+			flagParsingStrategy = getFlagParsingStrategy(line);
+		  }
+		  else if (line.Equals(COMPLEXPREFIXES_KEY))
+		  {
+			complexPrefixes = true; // 2-stage prefix+1-stage suffix instead of 2-stage suffix+1-stage prefix
+		  }
+		  else if (line.StartsWith(CIRCUMFIX_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal CIRCUMFIX declaration", reader.LineNumber);
+			}
+			circumfix = flagParsingStrategy.parseFlag(parts[1]);
+		  }
+		  else if (line.StartsWith(IGNORE_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal IGNORE declaration", reader.LineNumber);
+			}
+			ignore = parts[1].ToCharArray();
+			Arrays.sort(ignore);
+			needsInputCleaning = true;
+		  }
+		  else if (line.StartsWith(ICONV_KEY, StringComparison.Ordinal) || line.StartsWith(OCONV_KEY, StringComparison.Ordinal))
+		  {
+			string[] parts = line.Split("\\s+", true);
+			string type = parts[0];
+			if (parts.Length != 2)
+			{
+			  throw new ParseException("Illegal " + type + " declaration", reader.LineNumber);
+			}
+			int num = int.Parse(parts[1]);
+			FST<CharsRef> res = parseConversions(reader, num);
+			if (type.Equals("ICONV"))
+			{
+			  iconv = res;
+			  needsInputCleaning |= iconv != null;
+			}
+			else
+			{
+			  oconv = res;
+			  needsOutputCleaning |= oconv != null;
+			}
+		  }
+		}
+
+		this.prefixes = affixFST(prefixes);
+		this.suffixes = affixFST(suffixes);
+
+		int totalChars = 0;
+		foreach (string strip in seenStrips.Keys)
+		{
+		  totalChars += strip.Length;
+		}
+		stripData = new char[totalChars];
+		stripOffsets = new int[seenStrips.Count + 1];
+		int currentOffset = 0;
+		int currentIndex = 0;
+		foreach (string strip in seenStrips.Keys)
+		{
+		  stripOffsets[currentIndex++] = currentOffset;
+		  strip.CopyTo(0, stripData, currentOffset, strip.Length - 0);
+		  currentOffset += strip.Length;
+		}
+		Debug.Assert(currentIndex == seenStrips.Count);
+		stripOffsets[currentIndex] = currentOffset;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.IntsRef> affixFST(java.util.TreeMap<String,java.util.List<Character>> affixes) throws java.io.IOException
+	  private FST<IntsRef> affixFST(SortedDictionary<string, IList<char?>> affixes)
+	  {
+		IntSequenceOutputs outputs = IntSequenceOutputs.Singleton;
+		Builder<IntsRef> builder = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, outputs);
+
+		IntsRef scratch = new IntsRef();
+		foreach (KeyValuePair<string, IList<char?>> entry in affixes.SetOfKeyValuePairs())
+		{
+		  Util.toUTF32(entry.Key, scratch);
+		  IList<char?> entries = entry.Value;
+		  IntsRef output = new IntsRef(entries.Count);
+		  foreach (char? c in entries)
+		  {
+			output.ints[output.length++] = c;
+		  }
+		  builder.add(scratch, output);
+		}
+		return builder.finish();
+	  }
+
+	  /// <summary>
+	  /// Parses a specific affix rule putting the result into the provided affix map
+	  /// </summary>
+	  /// <param name="affixes"> Map where the result of the parsing will be put </param>
+	  /// <param name="header"> Header line of the affix rule </param>
+	  /// <param name="reader"> BufferedReader to read the content of the rule from </param>
+	  /// <param name="conditionPattern"> <seealso cref="String#format(String, Object...)"/> pattern to be used to generate the condition regex
+	  ///                         pattern </param>
+	  /// <param name="seenPatterns"> map from condition -> index of patterns, for deduplication. </param>
+	  /// <exception cref="IOException"> Can be thrown while reading the rule </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void parseAffix(java.util.TreeMap<String,java.util.List<Character>> affixes, String header, java.io.LineNumberReader reader, String conditionPattern, java.util.Map<String,Integer> seenPatterns, java.util.Map<String,Integer> seenStrips) throws java.io.IOException, java.text.ParseException
+	  private void parseAffix(SortedDictionary<string, IList<char?>> affixes, string header, LineNumberReader reader, string conditionPattern, IDictionary<string, int?> seenPatterns, IDictionary<string, int?> seenStrips)
+	  {
+
+		BytesRef scratch = new BytesRef();
+		StringBuilder sb = new StringBuilder();
+		string[] args = header.Split("\\s+", true);
+
+		bool crossProduct = args[2].Equals("Y");
+
+		int numLines = int.Parse(args[3]);
+		affixData = ArrayUtil.grow(affixData, (currentAffix << 3) + (numLines << 3));
+		ByteArrayDataOutput affixWriter = new ByteArrayDataOutput(affixData, currentAffix << 3, numLines << 3);
+
+		for (int i = 0; i < numLines; i++)
+		{
+		  Debug.Assert(affixWriter.Position == currentAffix << 3);
+		  string line = reader.readLine();
+		  string[] ruleArgs = line.Split("\\s+", true);
+
+		  // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
+		  // condition is optional
+		  if (ruleArgs.Length < 4)
+		  {
+			  throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.LineNumber);
+		  }
+
+		  char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
+		  string strip = ruleArgs[2].Equals("0") ? "" : ruleArgs[2];
+		  string affixArg = ruleArgs[3];
+		  char[] appendFlags = null;
+
+		  int flagSep = affixArg.LastIndexOf('/');
+		  if (flagSep != -1)
+		  {
+			string flagPart = affixArg.Substring(flagSep + 1);
+			affixArg = affixArg.Substring(0, flagSep);
+
+			if (aliasCount > 0)
+			{
+			  flagPart = getAliasValue(int.Parse(flagPart));
+			}
+
+			appendFlags = flagParsingStrategy.parseFlags(flagPart);
+			Arrays.sort(appendFlags);
+			twoStageAffix = true;
+		  }
+
+		  // TODO: add test and fix zero-affix handling!
+
+		  string condition = ruleArgs.Length > 4 ? ruleArgs[4] : ".";
+		  // at least the gascon affix file has this issue
+		  if (condition.StartsWith("[", StringComparison.Ordinal) && !condition.EndsWith("]", StringComparison.Ordinal))
+		  {
+			condition = condition + "]";
+		  }
+		  // "dash hasn't got special meaning" (we must escape it)
+		  if (condition.IndexOf('-') >= 0)
+		  {
+			condition = condition.Replace("-", "\\-");
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String regex;
+		  string regex;
+		  if (".".Equals(condition))
+		  {
+			regex = ".*"; // Zero condition is indicated by dot
+		  }
+		  else if (condition.Equals(strip))
+		  {
+			regex = ".*"; // TODO: optimize this better:
+						  // if we remove 'strip' from condition, we don't have to append 'strip' to check it...!
+						  // but this is complicated...
+		  }
+		  else
+		  {
+			regex = string.format(Locale.ROOT, conditionPattern, condition);
+		  }
+
+		  // deduplicate patterns
+		  int? patternIndex = seenPatterns[regex];
+		  if (patternIndex == null)
+		  {
+			patternIndex = patterns.Count;
+			if (patternIndex > short.MaxValue)
+			{
+			  throw new System.NotSupportedException("Too many patterns, please report this to dev@lucene.apache.org");
+			}
+			seenPatterns[regex] = patternIndex;
+			CharacterRunAutomaton pattern = new CharacterRunAutomaton((new RegExp(regex, RegExp.NONE)).toAutomaton());
+			patterns.Add(pattern);
+		  }
+
+		  int? stripOrd = seenStrips[strip];
+		  if (stripOrd == null)
+		  {
+			stripOrd = seenStrips.Count;
+			seenStrips[strip] = stripOrd;
+			if (stripOrd > Char.MaxValue)
+			{
+			  throw new System.NotSupportedException("Too many unique strips, please report this to dev@lucene.apache.org");
+			}
+		  }
+
+		  if (appendFlags == null)
+		  {
+			appendFlags = NOFLAGS;
+		  }
+
+		  encodeFlags(scratch, appendFlags);
+		  int appendFlagsOrd = flagLookup.add(scratch);
+		  if (appendFlagsOrd < 0)
+		  {
+			// already exists in our hash
+			appendFlagsOrd = (-appendFlagsOrd) - 1;
+		  }
+		  else if (appendFlagsOrd > short.MaxValue)
+		  {
+			// this limit is probably flexible, but its a good sanity check too
+			throw new System.NotSupportedException("Too many unique append flags, please report this to dev@lucene.apache.org");
+		  }
+
+		  affixWriter.writeShort((short)flag);
+		  affixWriter.writeShort((int)(short)stripOrd);
+		  // encode crossProduct into patternIndex
+		  int patternOrd = (int)patternIndex << 1 | (crossProduct ? 1 : 0);
+		  affixWriter.writeShort((short)patternOrd);
+		  affixWriter.writeShort((short)appendFlagsOrd);
+
+		  if (needsInputCleaning)
+		  {
+			CharSequence cleaned = cleanInput(affixArg, sb);
+			affixArg = cleaned.ToString();
+		  }
+
+		  IList<char?> list = affixes[affixArg];
+		  if (list == null)
+		  {
+			list = new List<>();
+			affixes[affixArg] = list;
+		  }
+
+		  list.Add((char)currentAffix);
+		  currentAffix++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> parseConversions(java.io.LineNumberReader reader, int num) throws java.io.IOException, java.text.ParseException
+	  private FST<CharsRef> parseConversions(LineNumberReader reader, int num)
+	  {
+		IDictionary<string, string> mappings = new SortedDictionary<string, string>();
+
+		for (int i = 0; i < num; i++)
+		{
+		  string line = reader.readLine();
+		  string[] parts = line.Split("\\s+", true);
+		  if (parts.Length != 3)
+		  {
+			throw new ParseException("invalid syntax: " + line, reader.LineNumber);
+		  }
+		  if (mappings.put(parts[1], parts[2]) != null)
+		  {
+			throw new System.InvalidOperationException("duplicate mapping specified for: " + parts[1]);
+		  }
+		}
+
+		Outputs<CharsRef> outputs = CharSequenceOutputs.Singleton;
+		Builder<CharsRef> builder = new Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
+		IntsRef scratchInts = new IntsRef();
+		foreach (KeyValuePair<string, string> entry in mappings.SetOfKeyValuePairs())
+		{
+		  Util.toUTF16(entry.Key, scratchInts);
+		  builder.add(scratchInts, new CharsRef(entry.Value));
+		}
+
+		return builder.finish();
+	  }
+
+	  /// <summary>
+	  /// pattern accepts optional BOM + SET + any whitespace </summary>
+	  internal static readonly Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
+
+	  /// <summary>
+	  /// Parses the encoding specified in the affix file readable through the provided InputStream
+	  /// </summary>
+	  /// <param name="affix"> InputStream for reading the affix file </param>
+	  /// <returns> Encoding specified in the affix file </returns>
+	  /// <exception cref="IOException"> Can be thrown while reading from the InputStream </exception>
+	  /// <exception cref="ParseException"> Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>} </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static String getDictionaryEncoding(java.io.InputStream affix) throws java.io.IOException, java.text.ParseException
+	  internal static string getDictionaryEncoding(InputStream affix)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder encoding = new StringBuilder();
+		StringBuilder encoding = new StringBuilder();
+		for (;;)
+		{
+		  encoding.Length = 0;
+		  int ch;
+		  while ((ch = affix.read()) >= 0)
+		  {
+			if (ch == '\n')
+			{
+			  break;
+			}
+			if (ch != '\r')
+			{
+			  encoding.Append((char)ch);
+			}
+		  }
+		  if (encoding.Length == 0 || encoding[0] == '#' || encoding.ToString().Trim().Length == 0)
+		  {
+			  // this test only at the end as ineffective but would allow lines only containing spaces:
+			if (ch < 0)
+			{
+			  throw new ParseException("Unexpected end of affix file.", 0);
+			}
+			continue;
+		  }
+		  Matcher matcher = ENCODING_PATTERN.matcher(encoding);
+		  if (matcher.find())
+		  {
+			int last = matcher.end();
+			return encoding.Substring(last).Trim();
+		  }
+		}
+	  }
+
+	  internal static readonly IDictionary<string, string> CHARSET_ALIASES;
+	  static Dictionary()
+	  {
+		IDictionary<string, string> m = new Dictionary<string, string>();
+		m["microsoft-cp1251"] = "windows-1251";
+		m["TIS620-2533"] = "TIS-620";
+		CHARSET_ALIASES = Collections.unmodifiableMap(m);
+	  }
+
+	  /// <summary>
+	  /// Retrieves the CharsetDecoder for the given encoding.  Note, This isn't perfect as I think ISCII-DEVANAGARI and
+	  /// MICROSOFT-CP1251 etc are allowed...
+	  /// </summary>
+	  /// <param name="encoding"> Encoding to retrieve the CharsetDecoder for </param>
+	  /// <returns> CharSetDecoder for the given encoding </returns>
+	  private CharsetDecoder getJavaEncoding(string encoding)
+	  {
+		if ("ISO8859-14".Equals(encoding))
+		{
+		  return new ISO8859_14Decoder();
+		}
+		string canon = CHARSET_ALIASES[encoding];
+		if (canon != null)
+		{
+		  encoding = canon;
+		}
+		Charset charset = Charset.forName(encoding);
+		return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE);
+	  }
+
+	  /// <summary>
+	  /// Determines the appropriate <seealso cref="FlagParsingStrategy"/> based on the FLAG definition line taken from the affix file
+	  /// </summary>
+	  /// <param name="flagLine"> Line containing the flag information </param>
+	  /// <returns> FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition </returns>
+	  internal static FlagParsingStrategy getFlagParsingStrategy(string flagLine)
+	  {
+		string[] parts = flagLine.Split("\\s+", true);
+		if (parts.Length != 2)
+		{
+		  throw new System.ArgumentException("Illegal FLAG specification: " + flagLine);
+		}
+		string flagType = parts[1];
+
+		if (NUM_FLAG_TYPE.Equals(flagType))
+		{
+		  return new NumFlagParsingStrategy();
+		}
+		else if (UTF8_FLAG_TYPE.Equals(flagType))
+		{
+		  return new SimpleFlagParsingStrategy();
+		}
+		else if (LONG_FLAG_TYPE.Equals(flagType))
+		{
+		  return new DoubleASCIIFlagParsingStrategy();
+		}
+
+		throw new System.ArgumentException("Unknown flag type: " + flagType);
+	  }
+
+	  internal readonly char FLAG_SEPARATOR = (char)0x1f; // flag separator after escaping
+
+	  internal virtual string unescapeEntry(string entry)
+	  {
+		StringBuilder sb = new StringBuilder();
+		for (int i = 0; i < entry.Length; i++)
+		{
+		  char ch = entry[i];
+		  if (ch == '\\' && i + 1 < entry.Length)
+		  {
+			sb.Append(entry[i + 1]);
+			i++;
+		  }
+		  else if (ch == '/')
+		  {
+			sb.Append(FLAG_SEPARATOR);
+		  }
+		  else
+		  {
+			sb.Append(ch);
+		  }
+		}
+		return sb.ToString();
+	  }
+
+	  /// <summary>
+	  /// Reads the dictionary file through the provided InputStreams, building up the words map
+	  /// </summary>
+	  /// <param name="dictionaries"> InputStreams to read the dictionary file through </param>
+	  /// <param name="decoder"> CharsetDecoder used to decode the contents of the file </param>
+	  /// <exception cref="IOException"> Can be thrown while reading from the file </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void readDictionaryFiles(java.util.List<java.io.InputStream> dictionaries, java.nio.charset.CharsetDecoder decoder, org.apache.lucene.util.fst.Builder<org.apache.lucene.util.IntsRef> words) throws java.io.IOException
+	  private void readDictionaryFiles(IList<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
+	  {
+		BytesRef flagsScratch = new BytesRef();
+		IntsRef scratchInts = new IntsRef();
+
+		StringBuilder sb = new StringBuilder();
+
+		File unsorted = File.createTempFile("unsorted", "dat", tempDir);
+		OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(unsorted);
+		bool success = false;
+		try
+		{
+		  foreach (InputStream dictionary in dictionaries)
+		  {
+			BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
+			string line = lines.readLine(); // first line is number of entries (approximately, sometimes)
+
+			while ((line = lines.readLine()) != null)
+			{
+			  line = unescapeEntry(line);
+			  if (needsInputCleaning)
+			  {
+				int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
+				if (flagSep == -1)
+				{
+				  CharSequence cleansed = cleanInput(line, sb);
+				  writer.write(cleansed.ToString().GetBytes(StandardCharsets.UTF_8));
+				}
+				else
+				{
+				  string text = line.Substring(0, flagSep);
+				  CharSequence cleansed = cleanInput(text, sb);
+				  if (cleansed != sb)
+				  {
+					sb.Length = 0;
+					sb.Append(cleansed);
+				  }
+				  sb.Append(line.Substring(flagSep));
+				  writer.write(sb.ToString().GetBytes(StandardCharsets.UTF_8));
+				}
+			  }
+			  else
+			  {
+				writer.write(line.GetBytes(StandardCharsets.UTF_8));
+			  }
+			}
+		  }
+		  success = true;
+		}
+		finally
+		{
+		  if (success)
+		  {
+			IOUtils.close(writer);
+		  }
+		  else
+		  {
+			IOUtils.closeWhileHandlingException(writer);
+		  }
+		}
+		File sorted = File.createTempFile("sorted", "dat", tempDir);
+
+		OfflineSorter sorter = new OfflineSorter(new ComparatorAnonymousInnerClassHelper(this));
+		sorter.sort(unsorted, sorted);
+		unsorted.delete();
+
+		OfflineSorter.ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(sorted);
+		BytesRef scratchLine = new BytesRef();
+
+		// TODO: the flags themselves can be double-chars (long) or also numeric
+		// either way the trick is to encode them as char... but they must be parsed differently
+
+		string currentEntry = null;
+		IntsRef currentOrds = new IntsRef();
+
+		string line;
+		while (reader.read(scratchLine))
+		{
+		  line = scratchLine.utf8ToString();
+		  string entry;
+		  char[] wordForm;
+
+		  int flagSep = line.LastIndexOf(FLAG_SEPARATOR);
+		  if (flagSep == -1)
+		  {
+			wordForm = NOFLAGS;
+			entry = line;
+		  }
+		  else
+		  {
+			// note, there can be comments (morph description) after a flag.
+			// we should really look for any whitespace: currently just tab and space
+			int end = line.IndexOf('\t', flagSep);
+			if (end == -1)
+			{
+			  end = line.Length;
+			}
+			int end2 = line.IndexOf(' ', flagSep);
+			if (end2 == -1)
+			{
+			  end2 = line.Length;
+			}
+			end = Math.Min(end, end2);
+
+			string flagPart = StringHelperClass.SubstringSpecial(line, flagSep + 1, end);
+			if (aliasCount > 0)
+			{
+			  flagPart = getAliasValue(int.Parse(flagPart));
+			}
+
+			wordForm = flagParsingStrategy.parseFlags(flagPart);
+			Arrays.sort(wordForm);
+			entry = line.Substring(0, flagSep);
+		  }
+
+		  int cmp = currentEntry == null ? 1 : entry.CompareTo(currentEntry);
+		  if (cmp < 0)
+		  {
+			throw new System.ArgumentException("out of order: " + entry + " < " + currentEntry);
+		  }
+		  else
+		  {
+			encodeFlags(flagsScratch, wordForm);
+			int ord = flagLookup.add(flagsScratch);
+			if (ord < 0)
+			{
+			  // already exists in our hash
+			  ord = (-ord) - 1;
+			}
+			// finalize current entry, and switch "current" if necessary
+			if (cmp > 0 && currentEntry != null)
+			{
+			  Util.toUTF32(currentEntry, scratchInts);
+			  words.add(scratchInts, currentOrds);
+			}
+			// swap current
+			if (cmp > 0 || currentEntry == null)
+			{
+			  currentEntry = entry;
+			  currentOrds = new IntsRef(); // must be this way
+			}
+			currentOrds.grow(currentOrds.length + 1);
+			currentOrds.ints[currentOrds.length++] = ord;
+		  }
+		}
+
+		// finalize last entry
+		Util.toUTF32(currentEntry, scratchInts);
+		words.add(scratchInts, currentOrds);
+
+		reader.close();
+		sorted.delete();
+	  }
+
+	  private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>
+	  {
+		  private readonly Dictionary outerInstance;
+
+		  public ComparatorAnonymousInnerClassHelper(Dictionary outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+			  scratch1 = new BytesRef();
+			  scratch2 = new BytesRef();
+		  }
+
+		  internal BytesRef scratch1;
+		  internal BytesRef scratch2;
+
+		  public virtual int Compare(BytesRef o1, BytesRef o2)
+		  {
+			scratch1.bytes = o1.bytes;
+			scratch1.offset = o1.offset;
+			scratch1.length = o1.length;
+
+			for (int i = scratch1.length - 1; i >= 0; i--)
+			{
+			  if (scratch1.bytes[scratch1.offset + i] == outerInstance.FLAG_SEPARATOR)
+			  {
+				scratch1.length = i;
+				break;
+			  }
+			}
+
+			scratch2.bytes = o2.bytes;
+			scratch2.offset = o2.offset;
+			scratch2.length = o2.length;
+
+			for (int i = scratch2.length - 1; i >= 0; i--)
+			{
+			  if (scratch2.bytes[scratch2.offset + i] == outerInstance.FLAG_SEPARATOR)
+			  {
+				scratch2.length = i;
+				break;
+			  }
+			}
+
+			int cmp = scratch1.compareTo(scratch2);
+			if (cmp == 0)
+			{
+			  // tie break on whole row
+			  return o1.compareTo(o2);
+			}
+			else
+			{
+			  return cmp;
+			}
+		  }
+	  }
+
+	  internal static char[] decodeFlags(BytesRef b)
+	  {
+		if (b.length == 0)
+		{
+		  return CharsRef.EMPTY_CHARS;
+		}
+		int len = (int)((uint)b.length >> 1);
+		char[] flags = new char[len];
+		int upto = 0;
+		int end = b.offset + b.length;
+		for (int i = b.offset; i < end; i += 2)
+		{
+		  flags[upto++] = (char)((b.bytes[i] << 8) | (b.bytes[i + 1] & 0xff));
+		}
+		return flags;
+	  }
+
+	  internal static void encodeFlags(BytesRef b, char[] flags)
+	  {
+		int len = flags.Length << 1;
+		b.grow(len);
+		b.length = len;
+		int upto = b.offset;
+		for (int i = 0; i < flags.Length; i++)
+		{
+		  int flag = flags[i];
+		  b.bytes[upto++] = unchecked((sbyte)((flag >> 8) & 0xff));
+		  b.bytes[upto++] = unchecked((sbyte)(flag & 0xff));
+		}
+	  }
+
+	  private void parseAlias(string line)
+	  {
+		string[] ruleArgs = line.Split("\\s+", true);
+		if (aliases == null)
+		{
+		  //first line should be the aliases count
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int count = Integer.parseInt(ruleArgs[1]);
+		  int count = int.Parse(ruleArgs[1]);
+		  aliases = new string[count];
+		}
+		else
+		{
+		  // an alias can map to no flags
+		  string aliasValue = ruleArgs.Length == 1 ? "" : ruleArgs[1];
+		  aliases[aliasCount++] = aliasValue;
+		}
+	  }
+
+	  private string getAliasValue(int id)
+	  {
+		try
+		{
+		  return aliases[id - 1];
+		}
+		catch (System.IndexOutOfRangeException ex)
+		{
+		  throw new System.ArgumentException("Bad flag alias number:" + id, ex);
+		}
+	  }
+
+	  /// <summary>
+	  /// Abstraction of the process of parsing flags taken from the affix and dic files
+	  /// </summary>
+	  internal abstract class FlagParsingStrategy
+	  {
+
+		/// <summary>
+		/// Parses the given String into a single flag
+		/// </summary>
+		/// <param name="rawFlag"> String to parse into a flag </param>
+		/// <returns> Parsed flag </returns>
+		internal virtual char parseFlag(string rawFlag)
+		{
+		  char[] flags = parseFlags(rawFlag);
+		  if (flags.Length != 1)
+		  {
+			throw new System.ArgumentException("expected only one flag, got: " + rawFlag);
+		  }
+		  return flags[0];
+		}
+
+		/// <summary>
+		/// Parses the given String into multiple flags
+		/// </summary>
+		/// <param name="rawFlags"> String to parse into flags </param>
+		/// <returns> Parsed flags </returns>
+		internal abstract char[] parseFlags(string rawFlags);
+	  }
+
+	  /// <summary>
+	  /// Simple implementation of <seealso cref="FlagParsingStrategy"/> that treats the chars in each String as a individual flags.
+	  /// Can be used with both the ASCII and UTF-8 flag types.
+	  /// </summary>
+	  private class SimpleFlagParsingStrategy : FlagParsingStrategy
+	  {
+		public override char[] parseFlags(string rawFlags)
+		{
+		  return rawFlags.ToCharArray();
+		}
+	  }
+
+	  /// <summary>
+	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded in its numerical form.  In the case
+	  /// of multiple flags, each number is separated by a comma.
+	  /// </summary>
+	  private class NumFlagParsingStrategy : FlagParsingStrategy
+	  {
+		public override char[] parseFlags(string rawFlags)
+		{
+		  string[] rawFlagParts = rawFlags.Trim().Split(",", true);
+		  char[] flags = new char[rawFlagParts.Length];
+		  int upto = 0;
+
+		  for (int i = 0; i < rawFlagParts.Length; i++)
+		  {
+			// note, removing the trailing X/leading I for nepali... what is the rule here?! 
+			string replacement = rawFlagParts[i].replaceAll("[^0-9]", "");
+			// note, ignoring empty flags (this happens in danish, for example)
+			if (replacement.Length == 0)
+			{
+			  continue;
+			}
+			flags[upto++] = (char) int.Parse(replacement);
+		  }
+
+		  if (upto < flags.Length)
+		  {
+			flags = Arrays.copyOf(flags, upto);
+		  }
+		  return flags;
+		}
+	  }
+
+	  /// <summary>
+	  /// Implementation of <seealso cref="FlagParsingStrategy"/> that assumes each flag is encoded as two ASCII characters whose codes
+	  /// must be combined into a single character.
+	  /// 
+	  /// TODO (rmuir) test
+	  /// </summary>
+	  private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy
+	  {
+
+		public override char[] parseFlags(string rawFlags)
+		{
+		  if (rawFlags.Length == 0)
+		  {
+			return new char[0];
+		  }
+
+		  StringBuilder builder = new StringBuilder();
+		  if (rawFlags.Length % 2 == 1)
+		  {
+			throw new System.ArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
+		  }
+		  for (int i = 0; i < rawFlags.Length; i += 2)
+		  {
+			char cookedFlag = (char)((int) rawFlags[i] + (int) rawFlags[i + 1]);
+			builder.Append(cookedFlag);
+		  }
+
+		  char[] flags = new char[builder.Length];
+		  builder.getChars(0, builder.Length, flags, 0);
+		  return flags;
+		}
+	  }
+
+	  internal static bool hasFlag(char[] flags, char flag)
+	  {
+		return Arrays.binarySearch(flags, flag) >= 0;
+	  }
+
+	  internal virtual CharSequence cleanInput(CharSequence input, StringBuilder reuse)
+	  {
+		reuse.Length = 0;
+
+		for (int i = 0; i < input.length(); i++)
+		{
+		  char ch = input.charAt(i);
+
+		  if (ignore != null && Arrays.binarySearch(ignore, ch) >= 0)
+		  {
+			continue;
+		  }
+
+		  if (ignoreCase && iconv == null)
+		  {
+			// if we have no input conversion mappings, do this on-the-fly
+			ch = char.ToLower(ch);
+		  }
+
+		  reuse.Append(ch);
+		}
+
+		if (iconv != null)
+		{
+		  try
+		  {
+			applyMappings(iconv, reuse);
+		  }
+		  catch (IOException bogus)
+		  {
+			throw new Exception(bogus);
+		  }
+		  if (ignoreCase)
+		  {
+			for (int i = 0; i < reuse.Length; i++)
+			{
+			  reuse[i] = char.ToLower(reuse[i]);
+			}
+		  }
+		}
+
+		return reuse;
+	  }
+
+	  // TODO: this could be more efficient!
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void applyMappings(org.apache.lucene.util.fst.FST<org.apache.lucene.util.CharsRef> fst, StringBuilder sb) throws java.io.IOException
+	  internal static void applyMappings(FST<CharsRef> fst, StringBuilder sb)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.BytesReader bytesReader = fst.getBytesReader();
+		FST.BytesReader bytesReader = fst.BytesReader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> firstArc = fst.getFirstArc(new org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef>());
+		FST.Arc<CharsRef> firstArc = fst.getFirstArc(new FST.Arc<CharsRef>());
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef NO_OUTPUT = fst.outputs.getNoOutput();
+		CharsRef NO_OUTPUT = fst.outputs.NoOutput;
+
+		// temporary stuff
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.fst.FST.Arc<org.apache.lucene.util.CharsRef> arc = new org.apache.lucene.util.fst.FST.Arc<>();
+		FST.Arc<CharsRef> arc = new FST.Arc<CharsRef>();
+		int longestMatch;
+		CharsRef longestOutput;
+
+		for (int i = 0; i < sb.Length; i++)
+		{
+		  arc.copyFrom(firstArc);
+		  CharsRef output = NO_OUTPUT;
+		  longestMatch = -1;
+		  longestOutput = null;
+
+		  for (int j = i; j < sb.Length; j++)
+		  {
+			char ch = sb[j];
+			if (fst.findTargetArc(ch, arc, arc, bytesReader) == null)
+			{
+			  break;
+			}
+			else
+			{
+			  output = fst.outputs.add(output, arc.output);
+			}
+			if (arc.Final)
+			{
+			  longestOutput = fst.outputs.add(output, arc.nextFinalOutput);
+			  longestMatch = j;
+			}
+		  }
+
+		  if (longestMatch >= 0)
+		  {
+			sb.Remove(i, longestMatch + 1 - i);
+			sb.Insert(i, longestOutput);
+			i += (longestOutput.length - 1);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
new file mode 100644
index 0000000..bf59a70
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilter.cs
@@ -0,0 +1,171 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using CharsRef = org.apache.lucene.util.CharsRef;
+
+	/// <summary>
+	/// TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
+	/// stems, this filter can emit multiple tokens for each consumed token
+	/// 
+	/// <para>
+	/// Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	/// certain terms from being passed to the stemmer
+	/// <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	/// in a previous <seealso cref="TokenStream"/>.
+	/// 
+	/// Note: For including the original term as well as the stemmed version, see
+	/// <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	/// </para>
+	/// 
+	/// @lucene.experimental
+	/// </summary>
+	public sealed class HunspellStemFilter : TokenFilter
+	{
+
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+	  private readonly KeywordAttribute keywordAtt = addAttribute(typeof(KeywordAttribute));
+	  private readonly Stemmer stemmer;
+
+	  private IList<CharsRef> buffer;
+	  private State savedState;
+
+	  private readonly bool dedup;
+	  private readonly bool longestOnly;
+
+	  /// <summary>
+	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean)  </seealso>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary) : this(input, dictionary, true)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a <seealso cref="HunspellStemFilter"/> outputting all possible stems. </summary>
+	  ///  <seealso cref= #HunspellStemFilter(TokenStream, Dictionary, boolean, boolean)  </seealso>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup) : this(input, dictionary, dedup, false)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+	  /// Dictionary
+	  /// </summary>
+	  /// <param name="input"> TokenStream whose tokens will be stemmed </param>
+	  /// <param name="dictionary"> HunspellDictionary containing the affix rules and words that will be used to stem the tokens </param>
+	  /// <param name="longestOnly"> true if only the longest term should be output. </param>
+	  public HunspellStemFilter(TokenStream input, Dictionary dictionary, bool dedup, bool longestOnly) : base(input)
+	  {
+		this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
+		this.stemmer = new Stemmer(dictionary);
+		this.longestOnly = longestOnly;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (buffer != null && buffer.Count > 0)
+		{
+		  CharsRef nextStem = buffer.Remove(0);
+		  restoreState(savedState);
+		  posIncAtt.PositionIncrement = 0;
+		  termAtt.setEmpty().append(nextStem);
+		  return true;
+		}
+
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		if (keywordAtt.Keyword)
+		{
+		  return true;
+		}
+
+		buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
+
+		if (buffer.Count == 0) // we do not know this word, return it unchanged
+		{
+		  return true;
+		}
+
+		if (longestOnly && buffer.Count > 1)
+		{
+		  buffer.Sort(lengthComparator);
+		}
+
+		CharsRef stem = buffer.Remove(0);
+		termAtt.setEmpty().append(stem);
+
+		if (longestOnly)
+		{
+		  buffer.Clear();
+		}
+		else
+		{
+		  if (buffer.Count > 0)
+		  {
+			savedState = captureState();
+		  }
+		}
+
+		return true;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+	  public override void reset()
+	  {
+		base.reset();
+		buffer = null;
+	  }
+
+	  internal static readonly IComparer<CharsRef> lengthComparator = new ComparatorAnonymousInnerClassHelper();
+
+	  private class ComparatorAnonymousInnerClassHelper : IComparer<CharsRef>
+	  {
+		  public ComparatorAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  public virtual int Compare(CharsRef o1, CharsRef o2)
+		  {
+			if (o2.length == o1.length)
+			{
+			  // tie break on text
+			  return o2.compareTo(o1);
+			}
+			else
+			{
+			  return o2.length < o1.length ? - 1 : 1;
+			}
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
new file mode 100644
index 0000000..c9888fd
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/HunspellStemFilterFactory.cs
@@ -0,0 +1,116 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+
+	/// <summary>
+	/// TokenFilterFactory that creates instances of <seealso cref="HunspellStemFilter"/>.
+	/// Example config for British English:
+	/// <pre class="prettyprint">
+	/// &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
+	///         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
+	///         affix=&quot;en_GB.aff&quot; 
+	///         ignoreCase=&quot;false&quot;
+	///         longestOnly=&quot;false&quot; /&gt;</pre>
+	/// Both parameters dictionary and affix are mandatory.
+	/// Dictionaries for many languages are available through the OpenOffice project.
+	/// 
+	/// See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
+	/// @lucene.experimental
+	/// </summary>
+	public class HunspellStemFilterFactory : TokenFilterFactory, ResourceLoaderAware
+	{
+	  private const string PARAM_DICTIONARY = "dictionary";
+	  private const string PARAM_AFFIX = "affix";
+	  private const string PARAM_RECURSION_CAP = "recursionCap";
+	  private const string PARAM_IGNORE_CASE = "ignoreCase";
+	  private const string PARAM_LONGEST_ONLY = "longestOnly";
+
+	  private readonly string dictionaryFiles;
+	  private readonly string affixFile;
+	  private readonly bool ignoreCase;
+	  private readonly bool longestOnly;
+	  private Dictionary dictionary;
+
+	  /// <summary>
+	  /// Creates a new HunspellStemFilterFactory </summary>
+	  public HunspellStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		dictionaryFiles = require(args, PARAM_DICTIONARY);
+		affixFile = get(args, PARAM_AFFIX);
+		ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
+		longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
+		// this isnt necessary: we properly load all dictionaries.
+		// but recognize and ignore for back compat
+		getBoolean(args, "strictAffixParsing", true);
+		// this isn't necessary: multi-stage stripping is fixed and 
+		// flags like COMPLEXPREFIXES in the data itself control this.
+		// but recognize and ignore for back compat
+		getInt(args, "recursionCap", 0);
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
+	  public virtual void inform(ResourceLoader loader)
+	  {
+		string[] dicts = dictionaryFiles.Split(",", true);
+
+		InputStream affix = null;
+		IList<InputStream> dictionaries = new List<InputStream>();
+
+		try
+		{
+		  dictionaries = new List<>();
+		  foreach (string file in dicts)
+		  {
+			dictionaries.Add(loader.openResource(file));
+		  }
+		  affix = loader.openResource(affixFile);
+
+		  this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
+		}
+		catch (ParseException e)
+		{
+		  throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(affix);
+		  IOUtils.closeWhileHandlingException(dictionaries);
+		}
+	  }
+
+	  public override TokenStream create(TokenStream tokenStream)
+	  {
+		return new HunspellStemFilter(tokenStream, dictionary, true, longestOnly);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
new file mode 100644
index 0000000..87872c9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/ISO8859_14Decoder.cs
@@ -0,0 +1,47 @@
+namespace org.apache.lucene.analysis.hunspell
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	// many hunspell dictionaries use this encoding, yet java does not have it?!?!
+	internal sealed class ISO8859_14Decoder : CharsetDecoder
+	{
+
+	  internal static readonly char[] TABLE = new char[] {0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178, 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
+
+	  internal ISO8859_14Decoder() : base(StandardCharsets.ISO_88591, 1f, 1f); / / fake with similar properties
+	  {
+	  }
+
+	  protected internal override CoderResult decodeLoop(ByteBuffer @in, CharBuffer @out)
+	  {
+		while (@in.hasRemaining() && @out.hasRemaining())
+		{
+		  char ch = (char)(@in.get() & 0xff);
+		  if (ch >= 0xA0)
+		  {
+			ch = TABLE[ch - 0xA0];
+		  }
+		  @out.put(ch);
+		}
+		return @in.hasRemaining() ? CoderResult.OVERFLOW : CoderResult.UNDERFLOW;
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message