lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [23/34] lucenenet git commit: Raw porting of Lucene.Net.Analysis.Common
Date Fri, 07 Nov 2014 23:12:27 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
new file mode 100644
index 0000000..91d84ee
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/KStemmer.cs
@@ -0,0 +1,2044 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+This file was partially derived from the
+original CIIR University of Massachusetts Amherst version of KStemmer.java (license for
+the original shown below)
+ */
+
+/*
+ Copyright © 2003,
+ Center for Intelligent Information Retrieval,
+ University of Massachusetts, Amherst.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. The names "Center for Intelligent Information Retrieval" and
+ "University of Massachusetts" must not be used to endorse or promote products
+ derived from this software without prior written permission. To obtain
+ permission, contact info@ciir.cs.umass.edu.
+
+ THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+ */
+namespace org.apache.lucene.analysis.en
+{
+
+	using org.apache.lucene.analysis.util;
+	using OpenStringBuilder = org.apache.lucene.analysis.util.OpenStringBuilder;
+	/// <summary>
+	/// <para>Title: Kstemmer</para>
+	/// <para>Description: This is a java version of Bob Krovetz' kstem stemmer</para>
+	/// <para>Copyright: Copyright 2008, Luicid Imagination, Inc. </para>
+	/// <para>Copyright: Copyright 2003, CIIR University of Massachusetts Amherst (http://ciir.cs.umass.edu) </para>
+	/// </summary>
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// This class implements the Kstem algorithm
+	/// </summary>
+	public class KStemmer
+	{
+	  private const int MaxWordLen = 50;
+
+	  private static readonly string[] exceptionWords = new string[] {"aide", "bathe", "caste", "cute", "dame", "dime", "doge", "done", "dune", "envelope", "gage", "grille", "grippe", "lobe", "mane", "mare", "nape", "node", "pane", "pate", "plane", "pope", "programme", "quite", "ripe", "rote", "rune", "sage", "severe", "shoppe", "sine", "slime", "snipe", "steppe", "suite", "swinge", "tare", "tine", "tope", "tripe", "twine"};
+
+	  private static readonly string[][] directConflations = new string[][]
+	  {
+		  new string[] {"aging", "age"},
+		  new string[] {"going", "go"},
+		  new string[] {"goes", "go"},
+		  new string[] {"lying", "lie"},
+		  new string[] {"using", "use"},
+		  new string[] {"owing", "owe"},
+		  new string[] {"suing", "sue"},
+		  new string[] {"dying", "die"},
+		  new string[] {"tying", "tie"},
+		  new string[] {"vying", "vie"},
+		  new string[] {"aged", "age"},
+		  new string[] {"used", "use"},
+		  new string[] {"vied", "vie"},
+		  new string[] {"cued", "cue"},
+		  new string[] {"died", "die"},
+		  new string[] {"eyed", "eye"},
+		  new string[] {"hued", "hue"},
+		  new string[] {"iced", "ice"},
+		  new string[] {"lied", "lie"},
+		  new string[] {"owed", "owe"},
+		  new string[] {"sued", "sue"},
+		  new string[] {"toed", "toe"},
+		  new string[] {"tied", "tie"},
+		  new string[] {"does", "do"},
+		  new string[] {"doing", "do"},
+		  new string[] {"aeronautical", "aeronautics"},
+		  new string[] {"mathematical", "mathematics"},
+		  new string[] {"political", "politics"},
+		  new string[] {"metaphysical", "metaphysics"},
+		  new string[] {"cylindrical", "cylinder"},
+		  new string[] {"nazism", "nazi"},
+		  new string[] {"ambiguity", "ambiguous"},
+		  new string[] {"barbarity", "barbarous"},
+		  new string[] {"credulity", "credulous"},
+		  new string[] {"generosity", "generous"},
+		  new string[] {"spontaneity", "spontaneous"},
+		  new string[] {"unanimity", "unanimous"},
+		  new string[] {"voracity", "voracious"},
+		  new string[] {"fled", "flee"},
+		  new string[] {"miscarriage", "miscarry"}
+	  };
+
+	  private static readonly string[][] countryNationality = new string[][]
+	  {
+		  new string[] {"afghan", "afghanistan"},
+		  new string[] {"african", "africa"},
+		  new string[] {"albanian", "albania"},
+		  new string[] {"algerian", "algeria"},
+		  new string[] {"american", "america"},
+		  new string[] {"andorran", "andorra"},
+		  new string[] {"angolan", "angola"},
+		  new string[] {"arabian", "arabia"},
+		  new string[] {"argentine", "argentina"},
+		  new string[] {"armenian", "armenia"},
+		  new string[] {"asian", "asia"},
+		  new string[] {"australian", "australia"},
+		  new string[] {"austrian", "austria"},
+		  new string[] {"azerbaijani", "azerbaijan"},
+		  new string[] {"azeri", "azerbaijan"},
+		  new string[] {"bangladeshi", "bangladesh"},
+		  new string[] {"belgian", "belgium"},
+		  new string[] {"bermudan", "bermuda"},
+		  new string[] {"bolivian", "bolivia"},
+		  new string[] {"bosnian", "bosnia"},
+		  new string[] {"botswanan", "botswana"},
+		  new string[] {"brazilian", "brazil"},
+		  new string[] {"british", "britain"},
+		  new string[] {"bulgarian", "bulgaria"},
+		  new string[] {"burmese", "burma"},
+		  new string[] {"californian", "california"},
+		  new string[] {"cambodian", "cambodia"},
+		  new string[] {"canadian", "canada"},
+		  new string[] {"chadian", "chad"},
+		  new string[] {"chilean", "chile"},
+		  new string[] {"chinese", "china"},
+		  new string[] {"colombian", "colombia"},
+		  new string[] {"croat", "croatia"},
+		  new string[] {"croatian", "croatia"},
+		  new string[] {"cuban", "cuba"},
+		  new string[] {"cypriot", "cyprus"},
+		  new string[] {"czechoslovakian", "czechoslovakia"},
+		  new string[] {"danish", "denmark"},
+		  new string[] {"egyptian", "egypt"},
+		  new string[] {"equadorian", "equador"},
+		  new string[] {"eritrean", "eritrea"},
+		  new string[] {"estonian", "estonia"},
+		  new string[] {"ethiopian", "ethiopia"},
+		  new string[] {"european", "europe"},
+		  new string[] {"fijian", "fiji"},
+		  new string[] {"filipino", "philippines"},
+		  new string[] {"finnish", "finland"},
+		  new string[] {"french", "france"},
+		  new string[] {"gambian", "gambia"},
+		  new string[] {"georgian", "georgia"},
+		  new string[] {"german", "germany"},
+		  new string[] {"ghanian", "ghana"},
+		  new string[] {"greek", "greece"},
+		  new string[] {"grenadan", "grenada"},
+		  new string[] {"guamian", "guam"},
+		  new string[] {"guatemalan", "guatemala"},
+		  new string[] {"guinean", "guinea"},
+		  new string[] {"guyanan", "guyana"},
+		  new string[] {"haitian", "haiti"},
+		  new string[] {"hawaiian", "hawaii"},
+		  new string[] {"holland", "dutch"},
+		  new string[] {"honduran", "honduras"},
+		  new string[] {"hungarian", "hungary"},
+		  new string[] {"icelandic", "iceland"},
+		  new string[] {"indonesian", "indonesia"},
+		  new string[] {"iranian", "iran"},
+		  new string[] {"iraqi", "iraq"},
+		  new string[] {"iraqui", "iraq"},
+		  new string[] {"irish", "ireland"},
+		  new string[] {"israeli", "israel"},
+		  new string[] {"italian", "italy"},
+		  new string[] {"jamaican", "jamaica"},
+		  new string[] {"japanese", "japan"},
+		  new string[] {"jordanian", "jordan"},
+		  new string[] {"kampuchean", "cambodia"},
+		  new string[] {"kenyan", "kenya"},
+		  new string[] {"korean", "korea"},
+		  new string[] {"kuwaiti", "kuwait"},
+		  new string[] {"lankan", "lanka"},
+		  new string[] {"laotian", "laos"},
+		  new string[] {"latvian", "latvia"},
+		  new string[] {"lebanese", "lebanon"},
+		  new string[] {"liberian", "liberia"},
+		  new string[] {"libyan", "libya"},
+		  new string[] {"lithuanian", "lithuania"},
+		  new string[] {"macedonian", "macedonia"},
+		  new string[] {"madagascan", "madagascar"},
+		  new string[] {"malaysian", "malaysia"},
+		  new string[] {"maltese", "malta"},
+		  new string[] {"mauritanian", "mauritania"},
+		  new string[] {"mexican", "mexico"},
+		  new string[] {"micronesian", "micronesia"},
+		  new string[] {"moldovan", "moldova"},
+		  new string[] {"monacan", "monaco"},
+		  new string[] {"mongolian", "mongolia"},
+		  new string[] {"montenegran", "montenegro"},
+		  new string[] {"moroccan", "morocco"},
+		  new string[] {"myanmar", "burma"},
+		  new string[] {"namibian", "namibia"},
+		  new string[] {"nepalese", "nepal"},
+		  new string[] {"nicaraguan", "nicaragua"},
+		  new string[] {"nigerian", "nigeria"},
+		  new string[] {"norwegian", "norway"},
+		  new string[] {"omani", "oman"},
+		  new string[] {"pakistani", "pakistan"},
+		  new string[] {"panamanian", "panama"},
+		  new string[] {"papuan", "papua"},
+		  new string[] {"paraguayan", "paraguay"},
+		  new string[] {"peruvian", "peru"},
+		  new string[] {"portuguese", "portugal"},
+		  new string[] {"romanian", "romania"},
+		  new string[] {"rumania", "romania"},
+		  new string[] {"rumanian", "romania"},
+		  new string[] {"russian", "russia"},
+		  new string[] {"rwandan", "rwanda"},
+		  new string[] {"samoan", "samoa"},
+		  new string[] {"scottish", "scotland"},
+		  new string[] {"serb", "serbia"},
+		  new string[] {"serbian", "serbia"},
+		  new string[] {"siam", "thailand"},
+		  new string[] {"siamese", "thailand"},
+		  new string[] {"slovakia", "slovak"},
+		  new string[] {"slovakian", "slovak"},
+		  new string[] {"slovenian", "slovenia"},
+		  new string[] {"somali", "somalia"},
+		  new string[] {"somalian", "somalia"},
+		  new string[] {"spanish", "spain"},
+		  new string[] {"swedish", "sweden"},
+		  new string[] {"swiss", "switzerland"},
+		  new string[] {"syrian", "syria"},
+		  new string[] {"taiwanese", "taiwan"},
+		  new string[] {"tanzanian", "tanzania"},
+		  new string[] {"texan", "texas"},
+		  new string[] {"thai", "thailand"},
+		  new string[] {"tunisian", "tunisia"},
+		  new string[] {"turkish", "turkey"},
+		  new string[] {"ugandan", "uganda"},
+		  new string[] {"ukrainian", "ukraine"},
+		  new string[] {"uruguayan", "uruguay"},
+		  new string[] {"uzbek", "uzbekistan"},
+		  new string[] {"venezuelan", "venezuela"},
+		  new string[] {"vietnamese", "viet"},
+		  new string[] {"virginian", "virginia"},
+		  new string[] {"yemeni", "yemen"},
+		  new string[] {"yugoslav", "yugoslavia"},
+		  new string[] {"yugoslavian", "yugoslavia"},
+		  new string[] {"zambian", "zambia"},
+		  new string[] {"zealander", "zealand"},
+		  new string[] {"zimbabwean", "zimbabwe"}
+	  };
+
+	  private static readonly string[] supplementDict = new string[] {"aids", "applicator", "capacitor", "digitize", "electromagnet", "ellipsoid", "exosphere", "extensible", "ferromagnet", "graphics", "hydromagnet", "polygraph", "toroid", "superconduct", "backscatter", "connectionism"};
+
+	  private static readonly string[] properNouns = new string[] {"abrams", "achilles", "acropolis", "adams", "agnes", "aires", "alexander", "alexis", "alfred", "algiers", "alps", "amadeus", "ames", "amos", "andes", "angeles", "annapolis", "antilles", "aquarius", "archimedes", "arkansas", "asher", "ashly", "athens", "atkins", "atlantis", "avis", "bahamas", "bangor", "barbados", "barger", "bering", "brahms", "brandeis", "brussels", "bruxelles", "cairns", "camoros", "camus", "carlos", "celts", "chalker", "charles", "cheops", "ching", "christmas", "cocos", "collins", "columbus", "confucius", "conners", "connolly", "copernicus", "cramer", "cyclops", "cygnus", "cyprus", "dallas", "damascus", "daniels", "davies", "davis", "decker", "denning", "dennis", "descartes", "dickens", "doris", "douglas", "downs", "dreyfus", "dukakis", "dulles", "dumfries", "ecclesiastes", "edwards", "emily", "erasmus", "euphrates", "evans", "everglades", "fairbanks", "federales", "fisher", "fitzsimmons", "fleming", 
 "forbes", "fowler", "france", "francis", "goering", "goodling", "goths", "grenadines", "guiness", "hades", "harding", "harris", "hastings", "hawkes", "hawking", "hayes", "heights", "hercules", "himalayas", "hippocrates", "hobbs", "holmes", "honduras", "hopkins", "hughes", "humphreys", "illinois", "indianapolis", "inverness", "iris", "iroquois", "irving", "isaacs", "italy", "james", "jarvis", "jeffreys", "jesus", "jones", "josephus", "judas", "julius", "kansas", "keynes", "kipling", "kiwanis", "lansing", "laos", "leeds", "levis", "leviticus", "lewis", "louis", "maccabees", "madras", "maimonides", "maldive", "massachusetts", "matthews", "mauritius", "memphis", "mercedes", "midas", "mingus", "minneapolis", "mohammed", "moines", "morris", "moses", "myers", "myknos", "nablus", "nanjing", "nantes", "naples", "neal", "netherlands", "nevis", "nostradamus", "oedipus", "olympus", "orleans", "orly", "papas", "paris", "parker", "pauling", "peking", "pershing", "peter", "peters", "philippines", 
 "phineas", "pisces", "pryor", "pythagoras", "queens", "rabelais", "ramses", "reynolds", "rhesus", "rhodes", "richards", "robins", "rodgers", "rogers", "rubens", "sagittarius", "seychelles", "socrates", "texas", "thames", "thomas", "tiberias", "tunis", "venus", "vilnius", "wales", "warner", "wilkins", "williams", "wyoming", "xmas", "yonkers", "zeus", "frances", "aarhus", "adonis", "andrews", "angus", "antares", "aquinas", "arcturus", "ares", "artemis", "augustus", "ayers", "barnabas", "barnes", "becker", "bejing", "biggs", "billings", "boeing", "boris", "borroughs", "briggs", "buenos", "calais", "caracas", "cassius", "cerberus", "ceres", "cervantes", "chantilly", "chartres", "chester", "connally", "conner", "coors", "cummings", "curtis", "daedalus", "dionysus", "dobbs", "dolores", "edmonds"};
+
+	  internal class DictEntry
+	  {
+		internal bool exception;
+		internal string root;
+
+		internal DictEntry(string root, bool isException)
+		{
+		  this.root = root;
+		  this.exception = isException;
+		}
+	  }
+
+	  private static readonly CharArrayMap<DictEntry> dict_ht = initializeDictHash();
+
+	  /// <summary>
+	  ///*
+	  /// caching off private int maxCacheSize; private CharArrayMap<String> cache =
+	  /// null; private static final String SAME = "SAME"; // use if stemmed form is
+	  /// the same
+	  /// **
+	  /// </summary>
+
+	  private readonly OpenStringBuilder word = new OpenStringBuilder();
+	  private int j; // index of final letter in stem (within word)
+	  private int k; /*
+	                  * INDEX of final letter in word. You must add 1 to k to get
+	                  * the current length of word. When you want the length of
+	                  * word, use the method wordLength, which returns (k+1).
+	                  */
+
+	  /// <summary>
+	  ///*
+	  /// private void initializeStemHash() { if (maxCacheSize > 0) cache = new
+	  /// CharArrayMap<String>(maxCacheSize,false); }
+	  /// **
+	  /// </summary>
+
+	  private char finalChar()
+	  {
+		return word.charAt(k);
+	  }
+
+	  private char penultChar()
+	  {
+		return word.charAt(k - 1);
+	  }
+
+	  private bool isVowel(int index)
+	  {
+		return !isCons(index);
+	  }
+
+	  private bool isCons(int index)
+	  {
+		char ch;
+
+		ch = word.charAt(index);
+
+		if ((ch == 'a') || (ch == 'e') || (ch == 'i') || (ch == 'o') || (ch == 'u'))
+		{
+			return false;
+		}
+		if ((ch != 'y') || (index == 0))
+		{
+			return true;
+		}
+		else
+		{
+			return (!isCons(index - 1));
+		}
+	  }
+
+	  private static CharArrayMap<DictEntry> initializeDictHash()
+	  {
+		DictEntry defaultEntry;
+		DictEntry entry;
+
+		CharArrayMap<DictEntry> d = new CharArrayMap<DictEntry>(Version.LUCENE_CURRENT, 1000, false);
+		for (int i = 0; i < exceptionWords.Length; i++)
+		{
+		  if (!d.containsKey(exceptionWords[i]))
+		  {
+			entry = new DictEntry(exceptionWords[i], true);
+			d.put(exceptionWords[i], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + exceptionWords[i] + "] already in dictionary 1");
+		  }
+		}
+
+		for (int i = 0; i < directConflations.Length; i++)
+		{
+		  if (!d.containsKey(directConflations[i][0]))
+		  {
+			entry = new DictEntry(directConflations[i][1], false);
+			d.put(directConflations[i][0], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + directConflations[i][0] + "] already in dictionary 2");
+		  }
+		}
+
+		for (int i = 0; i < countryNationality.Length; i++)
+		{
+		  if (!d.containsKey(countryNationality[i][0]))
+		  {
+			entry = new DictEntry(countryNationality[i][1], false);
+			d.put(countryNationality[i][0], entry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + countryNationality[i][0] + "] already in dictionary 3");
+		  }
+		}
+
+		defaultEntry = new DictEntry(null, false);
+
+		string[] array;
+		array = KStemData1.data;
+
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData2.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData3.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData4.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData5.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData6.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		array = KStemData7.data;
+		for (int i = 0; i < array.Length; i++)
+		{
+		  if (!d.containsKey(array[i]))
+		  {
+			d.put(array[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + array[i] + "] already in dictionary 4");
+		  }
+		}
+
+		for (int i = 0; i < KStemData8.data.Length; i++)
+		{
+		  if (!d.containsKey(KStemData8.data[i]))
+		  {
+			d.put(KStemData8.data[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + KStemData8.data[i] + "] already in dictionary 4");
+		  }
+		}
+
+		for (int i = 0; i < supplementDict.Length; i++)
+		{
+		  if (!d.containsKey(supplementDict[i]))
+		  {
+			d.put(supplementDict[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + supplementDict[i] + "] already in dictionary 5");
+		  }
+		}
+
+		for (int i = 0; i < properNouns.Length; i++)
+		{
+		  if (!d.containsKey(properNouns[i]))
+		  {
+			d.put(properNouns[i], defaultEntry);
+		  }
+		  else
+		  {
+			throw new Exception("Warning: Entry [" + properNouns[i] + "] already in dictionary 6");
+		  }
+		}
+
+		return d;
+	  }
+
+	  private bool isAlpha(char ch)
+	  {
+		return ch >= 'a' && ch <= 'z'; // terms must be lowercased already
+	  }
+
+	  /* length of stem within word */
+	  private int stemLength()
+	  {
+		return j + 1;
+	  };
+
+	  private bool endsIn(char[] s)
+	  {
+		if (s.Length > k)
+		{
+			return false;
+		}
+
+		int r = word.length() - s.Length; // length of word before this suffix
+		j = k;
+		for (int r1 = r, i = 0; i < s.Length; i++, r1++)
+		{
+		  if (s[i] != word.charAt(r1))
+		  {
+			  return false;
+		  }
+		}
+		j = r - 1; // index of the character BEFORE the posfix
+		return true;
+	  }
+
+	  private bool endsIn(char a, char b)
+	  {
+		if (2 > k)
+		{
+			return false;
+		}
+		// check left to right since the endings have often already matched
+		if (word.charAt(k - 1) == a && word.charAt(k) == b)
+		{
+		  j = k - 2;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private bool endsIn(char a, char b, char c)
+	  {
+		if (3 > k)
+		{
+			return false;
+		}
+		if (word.charAt(k - 2) == a && word.charAt(k - 1) == b && word.charAt(k) == c)
+		{
+		  j = k - 3;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private bool endsIn(char a, char b, char c, char d)
+	  {
+		if (4 > k)
+		{
+			return false;
+		}
+		if (word.charAt(k - 3) == a && word.charAt(k - 2) == b && word.charAt(k - 1) == c && word.charAt(k) == d)
+		{
+		  j = k - 4;
+		  return true;
+		}
+		return false;
+	  }
+
+	  private DictEntry wordInDict()
+	  {
+		/// <summary>
+		///*
+		/// if (matchedEntry != null) { if (dict_ht.get(word.getArray(), 0,
+		/// word.size()) != matchedEntry) {
+		/// System.out.println("Uh oh... cached entry doesn't match"); } return
+		/// matchedEntry; }
+		/// **
+		/// </summary>
+		if (matchedEntry != null)
+		{
+			return matchedEntry;
+		}
+		DictEntry e = dict_ht.get(word.Array, 0, word.length());
+		if (e != null && !e.exception)
+		{
+		  matchedEntry = e; // only cache if it's not an exception.
+		}
+		// lookups.add(word.toString());
+		return e;
+	  }
+
+	  /* Convert plurals to singular form, and '-ies' to 'y' */
+	  private void plural()
+	  {
+		if (word.charAt(k) == 's')
+		{
+		  if (endsIn('i', 'e', 's'))
+		  {
+			word.Length = j + 3;
+			k--;
+			if (lookup()) // ensure calories -> calorie
+			{
+			return;
+			}
+			k++;
+			word.unsafeWrite('s');
+			Suffix = "y";
+			lookup();
+		  }
+		  else if (endsIn('e', 's'))
+		  {
+			/* try just removing the "s" */
+			word.Length = j + 2;
+			k--;
+
+			/*
+			 * note: don't check for exceptions here. So, `aides' -> `aide', but
+			 * `aided' -> `aid'. The exception for double s is used to prevent
+			 * crosses -> crosse. This is actually correct if crosses is a plural
+			 * noun (a type of racket used in lacrosse), but the verb is much more
+			 * common
+			 */
+
+			/// <summary>
+			///**
+			/// YCS: this was the one place where lookup was not followed by return.
+			/// So restructure it. if ((j>0)&&(lookup(word.toString())) &&
+			/// !((word.charAt(j) == 's') && (word.charAt(j-1) == 's'))) return;
+			/// ****
+			/// </summary>
+			bool tryE = j > 0 && !((word.charAt(j) == 's') && (word.charAt(j - 1) == 's'));
+			if (tryE && lookup())
+			{
+				return;
+			}
+
+			/* try removing the "es" */
+
+			word.Length = j + 1;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+
+			/* the default is to retain the "e" */
+			word.unsafeWrite('e');
+			k++;
+
+			if (!tryE) // if we didn't try the "e" ending before
+			{
+				lookup();
+			}
+			return;
+		  }
+		  else
+		  {
+			if (word.length() > 3 && penultChar() != 's' && !endsIn('o', 'u', 's'))
+			{
+			  /* unless the word ends in "ous" or a double "s", remove the final "s" */
+
+			  word.Length = k;
+			  k--;
+			  lookup();
+			}
+		  }
+		}
+	  }
+
+	  private string Suffix
+	  {
+		  set
+		  {
+			setSuff(value, value.Length);
+		  }
+	  }
+
+	  /* replace old suffix with s */
+	  private void setSuff(string s, int len)
+	  {
+		word.Length = j + 1;
+		for (int l = 0; l < len; l++)
+		{
+		  word.unsafeWrite(s[l]);
+		}
+		k = j + len;
+	  }
+
+	  /* Returns true if the word is found in the dictionary */
+	  // almost all uses of lookup() return immediately and are
+	  // followed by another lookup in the dict. Store the match
+	  // to avoid this double lookup.
+	  internal DictEntry matchedEntry = null;
+
+	  private bool lookup()
+	  {
+		/// <summary>
+		///****
+		/// debugging code String thisLookup = word.toString(); boolean added =
+		/// lookups.add(thisLookup); if (!added) {
+		/// System.out.println("######extra lookup:" + thisLookup); // occaasional
+		/// extra lookups aren't necessarily errors... could happen by diff
+		/// manipulations // throw new RuntimeException("######extra lookup:" +
+		/// thisLookup); } else { // System.out.println("new lookup:" + thisLookup);
+		/// }
+		/// *****
+		/// </summary>
+
+		matchedEntry = dict_ht.get(word.Array, 0, word.size());
+		return matchedEntry != null;
+	  }
+
+	  // Set<String> lookups = new HashSet<>();
+
+	  /* convert past tense (-ed) to present, and `-ied' to `y' */
+	  private void pastTense()
+	  {
+		/*
+		 * Handle words less than 5 letters with a direct mapping This prevents
+		 * (fled -> fl).
+		 */
+		if (word.length() <= 4)
+		{
+			return;
+		}
+
+		if (endsIn('i', 'e', 'd'))
+		{
+		  word.Length = j + 3;
+		  k--;
+		  if (lookup()) // we almost always want to convert -ied to -y, but
+		  {
+		  return; // this isn't true for short words (died->die)
+		  }
+		  k++; // I don't know any long words that this applies to,
+		  word.unsafeWrite('d'); // but just in case...
+		  Suffix = "y";
+		  lookup();
+		  return;
+		}
+
+		/* the vowelInStem() is necessary so we don't stem acronyms */
+		if (endsIn('e', 'd') && vowelInStem())
+		{
+		  /* see if the root ends in `e' */
+		  word.Length = j + 2;
+		  k = j + 1;
+
+		  DictEntry entry = wordInDict();
+		  if (entry != null) /*
+		  {
+			  if (!entry.exception)
+	                                                * if it's in the dictionary and
+	                                                * not an exception
+	                                                */
+			  {
+		  return;
+			  }
+		  }
+
+		  /* try removing the "ed" */
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /*
+		   * try removing a doubled consonant. if the root isn't found in the
+		   * dictionary, the default is to leave it doubled. This will correctly
+		   * capture `backfilled' -> `backfill' instead of `backfill' ->
+		   * `backfille', and seems correct most of the time
+		   */
+
+		  if (doubleC(k))
+		  {
+			word.Length = k;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(k));
+			k++;
+			lookup();
+			return;
+		  }
+
+		  /* if we have a `un-' prefix, then leave the word alone */
+		  /* (this will sometimes screw up with `under-', but we */
+		  /* will take care of that later) */
+
+		  if ((word.charAt(0) == 'u') && (word.charAt(1) == 'n'))
+		  {
+			word.unsafeWrite('e');
+			word.unsafeWrite('d');
+			k = k + 2;
+			// nolookup()
+			return;
+		  }
+
+		  /*
+		   * it wasn't found by just removing the `d' or the `ed', so prefer to end
+		   * with an `e' (e.g., `microcoded' -> `microcode').
+		   */
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  // nolookup() - we already tried the "e" ending
+		  return;
+		}
+	  }
+
+	  /* return TRUE if word ends with a double consonant */
+	  private bool doubleC(int i)
+	  {
+		if (i < 1)
+		{
+			return false;
+		}
+
+		if (word.charAt(i) != word.charAt(i - 1))
+		{
+			return false;
+		}
+		return (isCons(i));
+	  }
+
+	  private bool vowelInStem()
+	  {
+		for (int i = 0; i < stemLength(); i++)
+		{
+		  if (isVowel(i))
+		  {
+			  return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /* handle `-ing' endings */
+	  private void aspect()
+	  {
+		/*
+		 * handle short words (aging -> age) via a direct mapping. This prevents
+		 * (thing -> the) in the version of this routine that ignores inflectional
+		 * variants that are mentioned in the dictionary (when the root is also
+		 * present)
+		 */
+
+		if (word.length() <= 5)
+		{
+			return;
+		}
+
+		/* the vowelinstem() is necessary so we don't stem acronyms */
+		if (endsIn('i', 'n', 'g') && vowelInStem())
+		{
+
+		  /* try adding an `e' to the stem and check against the dictionary */
+		  word.setCharAt(j + 1, 'e');
+		  word.Length = j + 2;
+		  k = j + 1;
+
+		  DictEntry entry = wordInDict();
+		  if (entry != null)
+		  {
+			if (!entry.exception) // if it's in the dictionary and not an exception
+			{
+			return;
+			}
+		  }
+
+		  /* adding on the `e' didn't work, so remove it */
+		  word.Length = k;
+		  k--; // note that `ing' has also been removed
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* if I can remove a doubled consonant and get a word, then do so */
+		  if (doubleC(k))
+		  {
+			k--;
+			word.Length = k + 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(k)); // restore the doubled consonant
+
+			/* the default is to leave the consonant doubled */
+			/* (e.g.,`fingerspelling' -> `fingerspell'). Unfortunately */
+			/* `bookselling' -> `booksell' and `mislabelling' -> `mislabell'). */
+			/* Without making the algorithm significantly more complicated, this */
+			/* is the best I can do */
+			k++;
+			lookup();
+			return;
+		  }
+
+		  /*
+		   * the word wasn't in the dictionary after removing the stem, and then
+		   * checking with and without a final `e'. The default is to add an `e'
+		   * unless the word ends in two consonants, so `microcoding' ->
+		   * `microcode'. The two consonants restriction wouldn't normally be
+		   * necessary, but is needed because we don't try to deal with prefixes and
+		   * compounds, and most of the time it is correct (e.g., footstamping ->
+		   * footstamp, not footstampe; however, decoupled -> decoupl). We can
+		   * prevent almost all of the incorrect stems if we try to do some prefix
+		   * analysis first
+		   */
+
+		  if ((j > 0) && isCons(j) && isCons(j - 1))
+		  {
+			k = j;
+			word.Length = k + 1;
+			// nolookup() because we already did according to the comment
+			return;
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  // nolookup(); we already tried an 'e' ending
+		  return;
+		}
+	  }
+
+	  /*
+	   * this routine deals with -ity endings. It accepts -ability, -ibility, and
+	   * -ality, even without checking the dictionary because they are so
+	   * productive. The first two are mapped to -ble, and the -ity is remove for
+	   * the latter
+	   */
+	  private void ityEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 't', 'y'))
+		{
+		  word.Length = j + 1; // try just removing -ity
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('e'); // try removing -ity and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 1, 'i');
+		  word.append("ty");
+		  k = old_k;
+		  /*
+		   * the -ability and -ibility endings are highly productive, so just accept
+		   * them
+		   */
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'l'))
+		  {
+			word.Length = j - 1;
+			word.append("le"); // convert to -ble
+			k = j;
+			lookup();
+			return;
+		  }
+
+		  /* ditto for -ivity */
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'v'))
+		  {
+			word.Length = j + 1;
+			word.unsafeWrite('e'); // convert to -ive
+			k = j + 1;
+			lookup();
+			return;
+		  }
+		  /* ditto for -ality */
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l'))
+		  {
+			word.Length = j + 1;
+			k = j;
+			lookup();
+			return;
+		  }
+
+		  /*
+		   * if the root isn't in the dictionary, and the variant *is* there, then
+		   * use the variant. This allows `immunity'->`immune', but prevents
+		   * `capacity'->`capac'. If neither the variant nor the root form are in
+		   * the dictionary, then remove the ending as a default
+		   */
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* the default is to remove -ity altogether */
+		  word.Length = j + 1;
+		  k = j;
+		  // nolookup(), we already did it.
+		  return;
+		}
+	  }
+
+	  /* handle -ence and -ance */
+	  private void nceEndings()
+	  {
+		int old_k = k;
+		char word_char;
+
+		if (endsIn('n', 'c', 'e'))
+		{
+		  word_char = word.charAt(j);
+		  if (!((word_char == 'e') || (word_char == 'a')))
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.unsafeWrite('e'); // try converting -e/ance to -e (adherance/adhere)
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j; /*
+	                          * try removing -e/ance altogether
+	                          * (disappearance/disappear)
+	                          */
+		  k = j - 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite(word_char); // restore the original ending
+		  word.append("nce");
+		  k = old_k;
+		  // nolookup() because we restored the original ending
+		}
+		return;
+	  }
+
+	  /* handle -ness */
+	  private void nessEndings()
+	  {
+		if (endsIn('n', 'e', 's', 's'))
+		{
+		/*
+		                                   * this is a very productive endings, so
+		                                   * just accept it
+		                                   */
+		  word.Length = j + 1;
+		  k = j;
+		  if (word.charAt(j) == 'i')
+		  {
+			  word.setCharAt(j, 'y');
+		  }
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* handle -ism */
+	  private void ismEndings()
+	  {
+		if (endsIn('i', 's', 'm'))
+		{
+		/*
+		                              * this is a very productive ending, so just
+		                              * accept it
+		                              */
+		  word.Length = j + 1;
+		  k = j;
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* this routine deals with -ment endings. */
+	  private void mentEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('m', 'e', 'n', 't'))
+		{
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.append("ment");
+		  k = old_k;
+		  // nolookup
+		}
+		return;
+	  }
+
+	  /* this routine deals with -ize endings. */
+	  private void izeEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 'z', 'e'))
+		{
+		  word.Length = j + 1; // try removing -ize entirely
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('i');
+
+		  if (doubleC(j)) // allow for a doubled consonant
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1));
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); // try removing -ize and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.append("ize");
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  /* handle -ency and -ancy */
+	  private void ncyEndings()
+	  {
+		if (endsIn('n', 'c', 'y'))
+		{
+		  if (!((word.charAt(j) == 'e') || (word.charAt(j) == 'a')))
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 2, 't'); // try converting -ncy to -nt
+		  word.Length = j + 3;
+		  k = j + 2;
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 2, 'c'); // the default is to convert it to -nce
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  lookup();
+		}
+		return;
+	  }
+
+	  /* handle -able and -ible */
+	  private void bleEndings()
+	  {
+		int old_k = k;
+		char word_char;
+
+		if (endsIn('b', 'l', 'e'))
+		{
+		  if (!((word.charAt(j) == 'a') || (word.charAt(j) == 'i')))
+		  {
+			  return;
+		  }
+		  word_char = word.charAt(j);
+		  word.Length = j; // try just removing the ending
+		  k = j - 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  if (doubleC(k)) // allow for a doubled consonant
+		  {
+			word.Length = k;
+			k--;
+			if (lookup())
+			{
+				return;
+			}
+			k++;
+			word.unsafeWrite(word.charAt(k - 1));
+		  }
+		  word.Length = j;
+		  word.unsafeWrite('e'); // try removing -a/ible and adding -e
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.append("ate"); // try removing -able and adding -ate
+		  /* (e.g., compensable/compensate) */
+		  k = j + 2;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j;
+		  word.unsafeWrite(word_char); // restore the original values
+		  word.append("ble");
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  /*
+	   * handle -ic endings. This is fairly straightforward, but this is also the
+	   * only place we try *expanding* an ending, -ic -> -ical. This is to handle
+	   * cases like `canonic' -> `canonical'
+	   */
+	  private void icEndings()
+	  {
+		if (endsIn('i', 'c'))
+		{
+		  word.Length = j + 3;
+		  word.append("al"); // try converting -ic to -ical
+		  k = j + 4;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 1, 'y'); // try converting -ic to -y
+		  word.Length = j + 2;
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 1, 'e'); // try converting -ic to -e
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1; // try removing -ic altogether
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.append("ic"); // restore the original ending
+		  k = j + 2;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  private static char[] ization = "ization".ToCharArray();
+	  private static char[] ition = "ition".ToCharArray();
+	  private static char[] ation = "ation".ToCharArray();
+	  private static char[] ication = "ication".ToCharArray();
+
+	  /* handle some derivational endings */
+	  /*
+	   * this routine deals with -ion, -ition, -ation, -ization, and -ication. The
+	   * -ization ending is always converted to -ize
+	   */
+	  private void ionEndings()
+	  {
+		int old_k = k;
+		if (!endsIn('i', 'o', 'n'))
+		{
+		  return;
+		}
+
+		if (endsIn(ization))
+		{
+		/*
+		                        * the -ize ending is very productive, so simply
+		                        * accept it as the root
+		                        */
+		  word.Length = j + 3;
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  lookup();
+		  return;
+		}
+
+		if (endsIn(ition))
+		{
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  if (lookup()) /*
+	                     * remove -ition and add `e', and check against the
+	                     * dictionary
+	                     */
+		  {
+		  return; // (e.g., definition->define, opposition->oppose)
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ition");
+		  k = old_k;
+		  // nolookup()
+		}
+		else if (endsIn(ation))
+		{
+		  word.Length = j + 3;
+		  word.unsafeWrite('e');
+		  k = j + 3;
+		  if (lookup()) // remove -ion and add `e', and check against the dictionary
+		  {
+		  return; // (elmination -> eliminate)
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); /*
+	                              * remove -ation and add `e', and check against the
+	                              * dictionary
+	                              */
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1; /*
+	                             * just remove -ation (resignation->resign) and
+	                             * check dictionary
+	                             */
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ation");
+		  k = old_k;
+		  // nolookup()
+
+		}
+
+		/*
+		 * test -ication after -ation is attempted (e.g., `complication->complicate'
+		 * rather than `complication->comply')
+		 */
+
+		if (endsIn(ication))
+		{
+		  word.Length = j + 1;
+		  word.unsafeWrite('y');
+		  k = j + 1;
+		  if (lookup()) /*
+	                     * remove -ication and add `y', and check against the
+	                     * dictionary
+	                     */
+		  {
+		  return; // (e.g., amplification -> amplify)
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ication");
+		  k = old_k;
+		  // nolookup()
+		}
+
+		// if (endsIn(ion)) {
+		if (true) // we checked for this earlier... just need to set "j"
+		{
+		  j = k - 3; // YCS
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e');
+		  k = j + 1;
+		  if (lookup()) // remove -ion and add `e', and check against the dictionary
+		  {
+		  return;
+		  }
+
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup()) // remove -ion, and if it's found, treat that as the root
+		  {
+		  return;
+		  }
+
+		  /* restore original values */
+		  word.Length = j + 1;
+		  word.append("ion");
+		  k = old_k;
+		  // nolookup()
+		}
+
+		// nolookup(); all of the other paths restored original values
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -er, -or, -ier, and -eer. The -izer ending is
+	   * always converted to -ize
+	   */
+	  private void erAndOrEndings()
+	  {
+		int old_k = k;
+
+		if (word.charAt(k) != 'r') // YCS
+		{
+			return;
+		}
+
+		char word_char; // so we can remember if it was -er or -or
+
+		if (endsIn('i', 'z', 'e', 'r'))
+		{
+		/*
+		                                   * -ize is very productive, so accept it
+		                                   * as the root
+		                                   */
+		  word.Length = j + 4;
+		  k = j + 3;
+		  lookup();
+		  return;
+		}
+
+		if (endsIn('e', 'r') || endsIn('o', 'r'))
+		{
+		  word_char = word.charAt(j + 1);
+		  if (doubleC(j))
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1)); // restore the doubled consonant
+		  }
+
+		  if (word.charAt(j) == 'i') // do we have a -ier ending?
+		  {
+			word.setCharAt(j, 'y');
+			word.Length = j + 1;
+			k = j;
+			if (lookup()) // yes, so check against the dictionary
+			{
+			return;
+			}
+			word.setCharAt(j, 'i'); // restore the endings
+			word.unsafeWrite('e');
+		  }
+
+		  if (word.charAt(j) == 'e') // handle -eer
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite('e');
+		  }
+
+		  word.Length = j + 2; // remove the -r ending
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1; // try removing -er/-or
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.unsafeWrite('e'); // try removing -or and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.unsafeWrite(word_char);
+		  word.unsafeWrite('r'); // restore the word to the way it was
+		  k = old_k;
+		  // nolookup()
+		}
+
+	  }
+
+	  /*
+	   * this routine deals with -ly endings. The -ally ending is always converted
+	   * to -al Sometimes this will temporarily leave us with a non-word (e.g.,
+	   * heuristically maps to heuristical), but then the -al is removed in the next
+	   * step.
+	   */
+	  private void lyEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('l', 'y'))
+		{
+
+		  word.setCharAt(j + 2, 'e'); // try converting -ly to -le
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.setCharAt(j + 2, 'y');
+
+		  word.Length = j + 1; // try just removing the -ly
+		  k = j;
+
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'l')) /*
+	                                                                              * always
+	                                                                              * convert
+	                                                                              * -
+	                                                                              * ally
+	                                                                              * to
+	                                                                              * -
+	                                                                              * al
+	                                                                              */
+		  {
+		  return;
+		  }
+		  word.append("ly");
+		  k = old_k;
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 'b'))
+		  {
+		  /*
+		                                                                            * always
+		                                                                            * convert
+		                                                                            * -
+		                                                                            * ably
+		                                                                            * to
+		                                                                            * -
+		                                                                            * able
+		                                                                            */
+			word.setCharAt(j + 2, 'e');
+			k = j + 2;
+			return;
+		  }
+
+		  if (word.charAt(j) == 'i') // e.g., militarily -> military
+		  {
+			word.Length = j;
+			word.unsafeWrite('y');
+			k = j;
+			if (lookup())
+			{
+				return;
+			}
+			word.Length = j;
+			word.append("ily");
+			k = old_k;
+		  }
+
+		  word.Length = j + 1; // the default is to remove -ly
+
+		  k = j;
+		  // nolookup()... we already tried removing the "ly" variant
+		}
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -al endings. Some of the endings from the previous
+	   * routine are finished up here.
+	   */
+	  private void alEndings()
+	  {
+		int old_k = k;
+
+		if (word.length() < 4)
+		{
+			return;
+		}
+		if (endsIn('a', 'l'))
+		{
+		  word.Length = j + 1;
+		  k = j;
+		  if (lookup()) // try just removing the -al
+		  {
+		  return;
+		  }
+
+		  if (doubleC(j)) // allow for a doubled consonant
+		  {
+			word.Length = j;
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.unsafeWrite(word.charAt(j - 1));
+		  }
+
+		  word.Length = j + 1;
+		  word.unsafeWrite('e'); // try removing the -al and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1;
+		  word.append("um"); // try converting -al to -um
+		  /* (e.g., optimal - > optimum ) */
+		  k = j + 2;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.Length = j + 1;
+		  word.append("al"); // restore the ending to the way it was
+		  k = old_k;
+
+		  if ((j > 0) && (word.charAt(j - 1) == 'i') && (word.charAt(j) == 'c'))
+		  {
+			word.Length = j - 1; // try removing -ical
+			k = j - 2;
+			if (lookup())
+			{
+				return;
+			}
+
+			word.Length = j - 1;
+			word.unsafeWrite('y'); // try turning -ical to -y (e.g., bibliographical)
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+
+			word.Length = j - 1;
+			word.append("ic"); // the default is to convert -ical to -ic
+			k = j;
+			// nolookup() ... converting ical to ic means removing "al" which we
+			// already tried
+			// ERROR
+			lookup();
+			return;
+		  }
+
+		  if (word.charAt(j) == 'i') // sometimes -ial endings should be removed
+		  {
+			word.Length = j; // (sometimes it gets turned into -y, but we
+			k = j - 1; // aren't dealing with that case for now)
+			if (lookup())
+			{
+				return;
+			}
+			word.append("ial");
+			k = old_k;
+			lookup();
+		  }
+
+		}
+		return;
+	  }
+
+	  /*
+	   * this routine deals with -ive endings. It normalizes some of the -ative
+	   * endings directly, and also maps some -ive endings to -ion.
+	   */
+	  private void iveEndings()
+	  {
+		int old_k = k;
+
+		if (endsIn('i', 'v', 'e'))
+		{
+		  word.Length = j + 1; // try removing -ive entirely
+		  k = j;
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.unsafeWrite('e'); // try removing -ive and adding -e
+		  k = j + 1;
+		  if (lookup())
+		  {
+			  return;
+		  }
+		  word.Length = j + 1;
+		  word.append("ive");
+		  if ((j > 0) && (word.charAt(j - 1) == 'a') && (word.charAt(j) == 't'))
+		  {
+			word.setCharAt(j - 1, 'e'); // try removing -ative and adding -e
+			word.Length = j; // (e.g., determinative -> determine)
+			k = j - 1;
+			if (lookup())
+			{
+				return;
+			}
+			word.Length = j - 1; // try just removing -ative
+			if (lookup())
+			{
+				return;
+			}
+
+			word.append("ative");
+			k = old_k;
+		  }
+
+		  /* try mapping -ive to -ion (e.g., injunctive/injunction) */
+		  word.setCharAt(j + 2, 'o');
+		  word.setCharAt(j + 3, 'n');
+		  if (lookup())
+		  {
+			  return;
+		  }
+
+		  word.setCharAt(j + 2, 'v'); // restore the original values
+		  word.setCharAt(j + 3, 'e');
+		  k = old_k;
+		  // nolookup()
+		}
+		return;
+	  }
+
+	  internal KStemmer()
+	  {
+	  }
+
+	  internal virtual string stem(string term)
+	  {
+		bool changed = stem(term.ToCharArray(), term.Length);
+		if (!changed)
+		{
+			return term;
+		}
+		return asString();
+	  }
+
+	  /// <summary>
+	  /// Returns the result of the stem (assuming the word was changed) as a String.
+	  /// </summary>
+	  internal virtual string asString()
+	  {
+		string s = string;
+		if (s != null)
+		{
+			return s;
+		}
+		return word.ToString();
+	  }
+
+	  internal virtual CharSequence asCharSequence()
+	  {
+		return result != null ? result : word;
+	  }
+
+	  internal virtual string String
+	  {
+		  get
+		  {
+			return result;
+		  }
+	  }
+
+	  internal virtual char[] Chars
+	  {
+		  get
+		  {
+			return word.Array;
+		  }
+	  }
+
+	  internal virtual int Length
+	  {
+		  get
+		  {
+			return word.length();
+		  }
+	  }
+
+	  internal string result;
+
+	  private bool matched()
+	  {
+		/// <summary>
+		///*
+		/// if (!lookups.contains(word.toString())) { throw new
+		/// RuntimeException("didn't look up "+word.toString()+" prev="+prevLookup);
+		/// }
+		/// **
+		/// </summary>
+		// lookup();
+		return matchedEntry != null;
+	  }
+
+	  /// <summary>
+	  /// Stems the text in the token. Returns true if changed.
+	  /// </summary>
+	  internal virtual bool stem(char[] term, int len)
+	  {
+
+		result = null;
+
+		k = len - 1;
+		if ((k <= 1) || (k >= MaxWordLen - 1))
+		{
+		  return false; // don't stem
+		}
+
+		// first check the stemmer dictionaries, and avoid using the
+		// cache if it's in there.
+		DictEntry entry = dict_ht.get(term, 0, len);
+		if (entry != null)
+		{
+		  if (entry.root != null)
+		  {
+			result = entry.root;
+			return true;
+		  }
+		  return false;
+		}
+
+		/// <summary>
+		///*
+		/// caching off is normally faster if (cache == null) initializeStemHash();
+		/// 
+		/// // now check the cache, before we copy chars to "word" if (cache != null)
+		/// { String val = cache.get(term, 0, len); if (val != null) { if (val !=
+		/// SAME) { result = val; return true; } return false; } }
+		/// **
+		/// </summary>
+
+		word.reset();
+		// allocate enough space so that an expansion is never needed
+		word.reserve(len + 10);
+		for (int i = 0; i < len; i++)
+		{
+		  char ch = term[i];
+		  if (!isAlpha(ch)) // don't stem
+		  {
+			  return false;
+		  }
+		  // don't lowercase... it's a requirement that lowercase filter be
+		  // used before this stemmer.
+		  word.unsafeWrite(ch);
+		}
+
+		matchedEntry = null;
+		/// <summary>
+		///*
+		/// lookups.clear(); lookups.add(word.toString());
+		/// **
+		/// </summary>
+
+		/*
+		 * This while loop will never be executed more than one time; it is here
+		 * only to allow the break statement to be used to escape as soon as a word
+		 * is recognized
+		 */
+		while (true)
+		{
+		  // YCS: extra lookup()s were inserted so we don't need to
+		  // do an extra wordInDict() here.
+		  plural();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  pastTense();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  aspect();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ityEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  nessEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ionEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  erAndOrEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  lyEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  alEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  entry = wordInDict();
+		  iveEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  izeEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  mentEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  bleEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ismEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  icEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  ncyEndings();
+		  if (matched())
+		  {
+			  break;
+		  }
+		  nceEndings();
+		  matched();
+		  break;
+		}
+
+		/*
+		 * try for a direct mapping (allows for cases like `Italian'->`Italy' and
+		 * `Italians'->`Italy')
+		 */
+		entry = matchedEntry;
+		if (entry != null)
+		{
+		  result = entry.root; // may be null, which means that "word" is the stem
+		}
+
+		/// <summary>
+		///*
+		/// caching off is normally faster if (cache != null && cache.size() <
+		/// maxCacheSize) { char[] key = new char[len]; System.arraycopy(term, 0,
+		/// key, 0, len); if (result != null) { cache.put(key, result); } else {
+		/// cache.put(key, word.toString()); } }
+		/// **
+		/// </summary>
+
+		/// <summary>
+		///*
+		/// if (entry == null) { if (!word.toString().equals(new String(term,0,len)))
+		/// { System.out.println("CASE:" + word.toString() + "," + new
+		/// String(term,0,len));
+		/// 
+		/// } }
+		/// **
+		/// </summary>
+
+		// no entry matched means result is "word"
+		return true;
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
new file mode 100644
index 0000000..7933292
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilter.cs
@@ -0,0 +1,81 @@
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+	/// <summary>
+	/// Transforms the token stream as per the Porter stemming algorithm.
+	///    Note: the input to the stemming filter must already be in lower case,
+	///    so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+	///    down the Tokenizer chain in order for this to work properly!
+	///    <P>
+	///    To use this filter with other analyzers, you'll want to write an
+	///    Analyzer class that sets up the TokenStream chain as you want it.
+	///    To use this with LowerCaseTokenizer, for example, you'd write an
+	///    analyzer like this:
+	///    <P>
+	///    <PRE class="prettyprint">
+	///    class MyAnalyzer extends Analyzer {
+	///      {@literal @Override}
+	///      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+	///        Tokenizer source = new LowerCaseTokenizer(version, reader);
+	///        return new TokenStreamComponents(source, new PorterStemFilter(source));
+	///      }
+	///    }
+	///    </PRE>
+	///    <para>
+	///    Note: This filter is aware of the <seealso cref="KeywordAttribute"/>. To prevent
+	///    certain terms from being passed to the stemmer
+	///    <seealso cref="KeywordAttribute#isKeyword()"/> should be set to <code>true</code>
+	///    in a previous <seealso cref="TokenStream"/>.
+	/// 
+	///    Note: For including the original term as well as the stemmed version, see
+	///   <seealso cref="org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory"/>
+	///    </para>
+	/// </summary>
+	public sealed class PorterStemFilter : TokenFilter
+	{
+	  private readonly PorterStemmer stemmer = new PorterStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public PorterStemFilter(TokenStream @in) : base(@in)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (!input.incrementToken())
+		{
+		  return false;
+		}
+
+		if ((!keywordAttr.Keyword) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
+		{
+		  termAtt.copyBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+		}
+		return true;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
new file mode 100644
index 0000000..588b559
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemFilterFactory.cs
@@ -0,0 +1,55 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Factory for <seealso cref="PorterStemFilter"/>.
+	/// <pre class="prettyprint">
+	/// &lt;fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100"&gt;
+	///   &lt;analyzer&gt;
+	///     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+	///     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+	///     &lt;filter class="solr.PorterStemFilterFactory"/&gt;
+	///   &lt;/analyzer&gt;
+	/// &lt;/fieldType&gt;</pre>
+	/// </summary>
+	public class PorterStemFilterFactory : TokenFilterFactory
+	{
+
+	  /// <summary>
+	  /// Creates a new PorterStemFilterFactory </summary>
+	  public PorterStemFilterFactory(IDictionary<string, string> args) : base(args)
+	  {
+		if (args.Count > 0)
+		{
+		  throw new System.ArgumentException("Unknown parameters: " + args);
+		}
+	  }
+
+	  public override PorterStemFilter create(TokenStream input)
+	  {
+		return new PorterStemFilter(input);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
new file mode 100644
index 0000000..16d01e1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/En/PorterStemmer.cs
@@ -0,0 +1,871 @@
+using System;
+
+namespace org.apache.lucene.analysis.en
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/*
+	
+	   Porter stemmer in Java. The original paper is in
+	
+	       Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+	       no. 3, pp 130-137,
+	
+	   See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+	
+	   Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+	   Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+	   is then out outside the bounds of b.
+	
+	   Similarly,
+	
+	   Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+	   'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+	   b[j] is then outside the bounds of b.
+	
+	   Release 3.
+	
+	   [ This version is derived from Release 3, modified by Brian Goetz to
+	     optimize for fewer object creations.  ]
+	
+	*/
+
+
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+
+	/// 
+	/// <summary>
+	/// Stemmer, implementing the Porter Stemming Algorithm
+	/// 
+	/// The Stemmer class transforms a word into its root form.  The input
+	/// word can be provided a character at time (by calling add()), or at once
+	/// by calling one of the various stem(something) methods.
+	/// </summary>
+
+	internal class PorterStemmer
+	{
+	  private char[] b;
+	  private int i, j, k, k0; // offset into b
+	  private bool dirty = false;
+	  private const int INITIAL_SIZE = 50;
+
+	  public PorterStemmer()
+	  {
+		b = new char[INITIAL_SIZE];
+		i = 0;
+	  }
+
+	  /// <summary>
+	  /// reset() resets the stemmer so it can stem another word.  If you invoke
+	  /// the stemmer by calling add(char) and then stem(), you must call reset()
+	  /// before starting another word.
+	  /// </summary>
+	  public virtual void reset()
+	  {
+		  i = 0;
+		  dirty = false;
+	  }
+
+	  /// <summary>
+	  /// Add a character to the word being stemmed.  When you are finished
+	  /// adding characters, you can call stem(void) to process the word.
+	  /// </summary>
+	  public virtual void add(char ch)
+	  {
+		if (b.Length <= i)
+		{
+		  b = ArrayUtil.grow(b, i + 1);
+		}
+		b[i++] = ch;
+	  }
+
+	  /// <summary>
+	  /// After a word has been stemmed, it can be retrieved by toString(),
+	  /// or a reference to the internal buffer can be retrieved by getResultBuffer
+	  /// and getResultLength (which is generally more efficient.)
+	  /// </summary>
+	  public override string ToString()
+	  {
+		  return new string(b,0,i);
+	  }
+
+	  /// <summary>
+	  /// Returns the length of the word resulting from the stemming process.
+	  /// </summary>
+	  public virtual int ResultLength
+	  {
+		  get
+		  {
+			  return i;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Returns a reference to a character buffer containing the results of
+	  /// the stemming process.  You also need to consult getResultLength()
+	  /// to determine the length of the result.
+	  /// </summary>
+	  public virtual char[] ResultBuffer
+	  {
+		  get
+		  {
+			  return b;
+		  }
+	  }
+
+	  /* cons(i) is true <=> b[i] is a consonant. */
+
+	  private bool cons(int i)
+	  {
+		switch (b[i])
+		{
+		case 'a':
+	case 'e':
+	case 'i':
+	case 'o':
+	case 'u':
+		  return false;
+		case 'y':
+		  return (i == k0) ? true :!cons(i - 1);
+		default:
+		  return true;
+		}
+	  }
+
+	  /* m() measures the number of consonant sequences between k0 and j. if c is
+	     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+	     presence,
+	
+	          <c><v>       gives 0
+	          <c>vc<v>     gives 1
+	          <c>vcvc<v>   gives 2
+	          <c>vcvcvc<v> gives 3
+	          ....
+	  */
+
+	  private int m()
+	  {
+		int n = 0;
+		int i = k0;
+		while (true)
+		{
+		  if (i > j)
+		  {
+			return n;
+		  }
+		  if (!cons(i))
+		  {
+			break;
+		  }
+		  i++;
+		}
+		i++;
+		while (true)
+		{
+		  while (true)
+		  {
+			if (i > j)
+			{
+			  return n;
+			}
+			if (cons(i))
+			{
+			  break;
+			}
+			i++;
+		  }
+		  i++;
+		  n++;
+		  while (true)
+		  {
+			if (i > j)
+			{
+			  return n;
+			}
+			if (!cons(i))
+			{
+			  break;
+			}
+			i++;
+		  }
+		  i++;
+		}
+	  }
+
+	  /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+	  private bool vowelinstem()
+	  {
+		int i;
+		for (i = k0; i <= j; i++)
+		{
+		  if (!cons(i))
+		  {
+			return true;
+		  }
+		}
+		return false;
+	  }
+
+	  /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+	  private bool doublec(int j)
+	  {
+		if (j < k0 + 1)
+		{
+		  return false;
+		}
+		if (b[j] != b[j - 1])
+		{
+		  return false;
+		}
+		return cons(j);
+	  }
+
+	  /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+	     and also if the second c is not w,x or y. this is used when trying to
+	     restore an e at the end of a short word. e.g.
+	
+	          cav(e), lov(e), hop(e), crim(e), but
+	          snow, box, tray.
+	
+	  */
+
+	  private bool cvc(int i)
+	  {
+		if (i < k0 + 2 || !cons(i) || cons(i - 1) || !cons(i - 2))
+		{
+		  return false;
+		}
+		else
+		{
+		  int ch = b[i];
+		  if (ch == 'w' || ch == 'x' || ch == 'y')
+		  {
+			  return false;
+		  }
+		}
+		return true;
+	  }
+
+	  private bool ends(string s)
+	  {
+		int l = s.Length;
+		int o = k - l + 1;
+		if (o < k0)
+		{
+		  return false;
+		}
+		for (int i = 0; i < l; i++)
+		{
+		  if (b[o + i] != s[i])
+		  {
+			return false;
+		  }
+		}
+		j = k - l;
+		return true;
+	  }
+
+	  /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+	     k. */
+
+	  internal virtual void setto(string s)
+	  {
+		int l = s.Length;
+		int o = j + 1;
+		for (int i = 0; i < l; i++)
+		{
+		  b[o + i] = s[i];
+		}
+		k = j + l;
+		dirty = true;
+	  }
+
+	  /* r(s) is used further down. */
+
+	  internal virtual void r(string s)
+	  {
+		  if (m() > 0)
+		  {
+			  setto(s);
+		  }
+	  }
+
+	  /* step1() gets rid of plurals and -ed or -ing. e.g.
+	
+	           caresses  ->  caress
+	           ponies    ->  poni
+	           ties      ->  ti
+	           caress    ->  caress
+	           cats      ->  cat
+	
+	           feed      ->  feed
+	           agreed    ->  agree
+	           disabled  ->  disable
+	
+	           matting   ->  mat
+	           mating    ->  mate
+	           meeting   ->  meet
+	           milling   ->  mill
+	           messing   ->  mess
+	
+	           meetings  ->  meet
+	
+	  */
+
+	  private void step1()
+	  {
+		if (b[k] == 's')
+		{
+		  if (ends("sses"))
+		  {
+			  k -= 2;
+		  }
+		  else if (ends("ies"))
+		  {
+			  setto("i");
+		  }
+		  else if (b[k - 1] != 's')
+		  {
+			  k--;
+		  }
+		}
+		if (ends("eed"))
+		{
+		  if (m() > 0)
+		  {
+			k--;
+		  }
+		}
+		else if ((ends("ed") || ends("ing")) && vowelinstem())
+		{
+		  k = j;
+		  if (ends("at"))
+		  {
+			  setto("ate");
+		  }
+		  else if (ends("bl"))
+		  {
+			  setto("ble");
+		  }
+		  else if (ends("iz"))
+		  {
+			  setto("ize");
+		  }
+		  else if (doublec(k))
+		  {
+			int ch = b[k--];
+			if (ch == 'l' || ch == 's' || ch == 'z')
+			{
+			  k++;
+			}
+		  }
+		  else if (m() == 1 && cvc(k))
+		  {
+			setto("e");
+		  }
+		}
+	  }
+
+	  /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+	  private void step2()
+	  {
+		if (ends("y") && vowelinstem())
+		{
+		  b[k] = 'i';
+		  dirty = true;
+		}
+	  }
+
+	  /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+	     -ation) maps to -ize etc. note that the string before the suffix must give
+	     m() > 0. */
+
+	  private void step3()
+	  {
+		if (k == k0) // For Bug 1
+		{
+			return;
+		}
+		switch (b[k - 1])
+		{
+		case 'a':
+		  if (ends("ational"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  if (ends("tional"))
+		  {
+			  r("tion");
+			  break;
+		  }
+		  break;
+		case 'c':
+		  if (ends("enci"))
+		  {
+			  r("ence");
+			  break;
+		  }
+		  if (ends("anci"))
+		  {
+			  r("ance");
+			  break;
+		  }
+		  break;
+		case 'e':
+		  if (ends("izer"))
+		  {
+			  r("ize");
+			  break;
+		  }
+		  break;
+		case 'l':
+		  if (ends("bli"))
+		  {
+			  r("ble");
+			  break;
+		  }
+		  if (ends("alli"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("entli"))
+		  {
+			  r("ent");
+			  break;
+		  }
+		  if (ends("eli"))
+		  {
+			  r("e");
+			  break;
+		  }
+		  if (ends("ousli"))
+		  {
+			  r("ous");
+			  break;
+		  }
+		  break;
+		case 'o':
+		  if (ends("ization"))
+		  {
+			  r("ize");
+			  break;
+		  }
+		  if (ends("ation"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  if (ends("ator"))
+		  {
+			  r("ate");
+			  break;
+		  }
+		  break;
+		case 's':
+		  if (ends("alism"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("iveness"))
+		  {
+			  r("ive");
+			  break;
+		  }
+		  if (ends("fulness"))
+		  {
+			  r("ful");
+			  break;
+		  }
+		  if (ends("ousness"))
+		  {
+			  r("ous");
+			  break;
+		  }
+		  break;
+		case 't':
+		  if (ends("aliti"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  if (ends("iviti"))
+		  {
+			  r("ive");
+			  break;
+		  }
+		  if (ends("biliti"))
+		  {
+			  r("ble");
+			  break;
+		  }
+		  break;
+		case 'g':
+		  if (ends("logi"))
+		  {
+			  r("log");
+			  break;
+		  }
+		}
+	  }
+
+	  /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+	  private void step4()
+	  {
+		switch (b[k])
+		{
+		case 'e':
+		  if (ends("icate"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  if (ends("ative"))
+		  {
+			  r("");
+			  break;
+		  }
+		  if (ends("alize"))
+		  {
+			  r("al");
+			  break;
+		  }
+		  break;
+		case 'i':
+		  if (ends("iciti"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  break;
+		case 'l':
+		  if (ends("ical"))
+		  {
+			  r("ic");
+			  break;
+		  }
+		  if (ends("ful"))
+		  {
+			  r("");
+			  break;
+		  }
+		  break;
+		case 's':
+		  if (ends("ness"))
+		  {
+			  r("");
+			  break;
+		  }
+		  break;
+		}
+	  }
+
+	  /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+	  private void step5()
+	  {
+		if (k == k0) // for Bug 1
+		{
+			return;
+		}
+		switch (b[k - 1])
+		{
+		case 'a':
+		  if (ends("al"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'c':
+		  if (ends("ance"))
+		  {
+			  break;
+		  }
+		  if (ends("ence"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'e':
+		  if (ends("er"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'i':
+		  if (ends("ic"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'l':
+		  if (ends("able"))
+		  {
+			  break;
+		  }
+		  if (ends("ible"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'n':
+		  if (ends("ant"))
+		  {
+			  break;
+		  }
+		  if (ends("ement"))
+		  {
+			  break;
+		  }
+		  if (ends("ment"))
+		  {
+			  break;
+		  }
+		  /* element etc. not stripped before the m */
+		  if (ends("ent"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'o':
+		  if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+		  {
+			  break;
+		  }
+		  /* j >= 0 fixes Bug 2 */
+		  if (ends("ou"))
+		  {
+			  break;
+		  }
+		  return;
+		  /* takes care of -ous */
+		case 's':
+		  if (ends("ism"))
+		  {
+			  break;
+		  }
+		  return;
+		case 't':
+		  if (ends("ate"))
+		  {
+			  break;
+		  }
+		  if (ends("iti"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'u':
+		  if (ends("ous"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'v':
+		  if (ends("ive"))
+		  {
+			  break;
+		  }
+		  return;
+		case 'z':
+		  if (ends("ize"))
+		  {
+			  break;
+		  }
+		  return;
+		default:
+		  return;
+		}
+		if (m() > 1)
+		{
+		  k = j;
+		}
+	  }
+
+	  /* step6() removes a final -e if m() > 1. */
+
+	  private void step6()
+	  {
+		j = k;
+		if (b[k] == 'e')
+		{
+		  int a = m();
+		  if (a > 1 || a == 1 && !cvc(k - 1))
+		  {
+			k--;
+		  }
+		}
+		if (b[k] == 'l' && doublec(k) && m() > 1)
+		{
+		  k--;
+		}
+	  }
+
+
+	  /// <summary>
+	  /// Stem a word provided as a String.  Returns the result as a String.
+	  /// </summary>
+	  public virtual string stem(string s)
+	  {
+		if (stem(s.ToCharArray(), s.Length))
+		{
+		  return ToString();
+		}
+		else
+		{
+		  return s;
+		}
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a char[].  Returns true if the stemming process
+	  /// resulted in a word different from the input.  You can retrieve the
+	  /// result with getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] word)
+	  {
+		return stem(word, word.Length);
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a portion of a char[] array.  Returns
+	  /// true if the stemming process resulted in a word different from
+	  /// the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] wordBuffer, int offset, int wordLen)
+	  {
+		reset();
+		if (b.Length < wordLen)
+		{
+		  b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
+		}
+		Array.Copy(wordBuffer, offset, b, 0, wordLen);
+		i = wordLen;
+		return stem(0);
+	  }
+
+	  /// <summary>
+	  /// Stem a word contained in a leading portion of a char[] array.
+	  /// Returns true if the stemming process resulted in a word different
+	  /// from the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem(char[] word, int wordLen)
+	  {
+		return stem(word, 0, wordLen);
+	  }
+
+	  /// <summary>
+	  /// Stem the word placed into the Stemmer buffer through calls to add().
+	  /// Returns true if the stemming process resulted in a word different
+	  /// from the input.  You can retrieve the result with
+	  /// getResultLength()/getResultBuffer() or toString().
+	  /// </summary>
+	  public virtual bool stem()
+	  {
+		return stem(0);
+	  }
+
+	  public virtual bool stem(int i0)
+	  {
+		k = i - 1;
+		k0 = i0;
+		if (k > k0 + 1)
+		{
+		  step1();
+		  step2();
+		  step3();
+		  step4();
+		  step5();
+		  step6();
+		}
+		// Also, a word is considered dirty if we lopped off letters
+		// Thanks to Ifigenia Vairelles for pointing this out.
+		if (i != k + 1)
+		{
+		  dirty = true;
+		}
+		i = k + 1;
+		return dirty;
+	  }
+
+	  /* Test program for demonstrating the Stemmer.  It reads a file and
+	   * stems each word, writing the result to standard out.
+	   * Usage: Stemmer file-name
+	  public static void main(String[] args) {
+	    PorterStemmer s = new PorterStemmer();
+	
+	    for (int i = 0; i < args.length; i++) {
+	      try {
+	        InputStream in = new FileInputStream(args[i]);
+	        byte[] buffer = new byte[1024];
+	        int bufferLen, offset, ch;
+	
+	        bufferLen = in.read(buffer);
+	        offset = 0;
+	        s.reset();
+	
+	        while(true) {
+	          if (offset < bufferLen)
+	            ch = buffer[offset++];
+	          else {
+	            bufferLen = in.read(buffer);
+	            offset = 0;
+	            if (bufferLen < 0)
+	              ch = -1;
+	            else
+	              ch = buffer[offset++];
+	          }
+	
+	          if (Character.isLetter((char) ch)) {
+	            s.add(Character.toLowerCase((char) ch));
+	          }
+	          else {
+	             s.stem();
+	             System.out.print(s.toString());
+	             s.reset();
+	             if (ch < 0)
+	               break;
+	             else {
+	               System.out.print((char) ch);
+	             }
+	           }
+	        }
+	
+	        in.close();
+	      }
+	      catch (IOException e) {
+	        System.out.println("error reading " + args[i]);
+	      }
+	    }
+	  }*/
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
new file mode 100644
index 0000000..e6d4f76
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishAnalyzer.cs
@@ -0,0 +1,155 @@
+using System;
+
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using LowerCaseFilter = org.apache.lucene.analysis.core.LowerCaseFilter;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using SnowballFilter = org.apache.lucene.analysis.snowball.SnowballFilter;
+	using StandardFilter = org.apache.lucene.analysis.standard.StandardFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+	using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+	using SpanishStemmer = org.tartarus.snowball.ext.SpanishStemmer;
+
+	/// <summary>
+	/// <seealso cref="Analyzer"/> for Spanish.
+	/// <para>
+	/// <a name="version"/>
+	/// </para>
+	/// <para>You must specify the required <seealso cref="Version"/>
+	/// compatibility when creating SpanishAnalyzer:
+	/// <ul>
+	///   <li> As of 3.6, SpanishLightStemFilter is used for less aggressive stemming.
+	/// </ul>
+	/// </para>
+	/// </summary>
+	public sealed class SpanishAnalyzer : StopwordAnalyzerBase
+	{
+	  private readonly CharArraySet stemExclusionSet;
+
+	  /// <summary>
+	  /// File containing default Spanish stopwords. </summary>
+	  public const string DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
+
+	  /// <summary>
+	  /// Returns an unmodifiable instance of the default stop words set. </summary>
+	  /// <returns> default stop words set. </returns>
+	  public static CharArraySet DefaultStopSet
+	  {
+		  get
+		  {
+			return DefaultSetHolder.DEFAULT_STOP_SET;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+	  /// accesses the static final set the first time.;
+	  /// </summary>
+	  private class DefaultSetHolder
+	  {
+		internal static readonly CharArraySet DEFAULT_STOP_SET;
+
+		static DefaultSetHolder()
+		{
+		  try
+		  {
+			DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(typeof(SnowballFilter), DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
+		  }
+		  catch (IOException)
+		  {
+			// default set should always be present as it is part of the
+			// distribution (JAR)
+			throw new Exception("Unable to load default stopword set");
+		  }
+		}
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the default stop words: <seealso cref="#DEFAULT_STOPWORD_FILE"/>.
+	  /// </summary>
+	  public SpanishAnalyzer(Version matchVersion) : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords) : this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
+	  {
+	  }
+
+	  /// <summary>
+	  /// Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+	  /// provided this analyzer will add a <seealso cref="SetKeywordMarkerFilter"/> before
+	  /// stemming.
+	  /// </summary>
+	  /// <param name="matchVersion"> lucene compatibility version </param>
+	  /// <param name="stopwords"> a stopword set </param>
+	  /// <param name="stemExclusionSet"> a set of terms not to be stemmed </param>
+	  public SpanishAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) : base(matchVersion, stopwords)
+	  {
+		this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
+	  }
+
+	  /// <summary>
+	  /// Creates a
+	  /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  /// which tokenizes all the text in the provided <seealso cref="Reader"/>.
+	  /// </summary>
+	  /// <returns> A
+	  ///         <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
+	  ///         built from an <seealso cref="StandardTokenizer"/> filtered with
+	  ///         <seealso cref="StandardFilter"/>, <seealso cref="LowerCaseFilter"/>, <seealso cref="StopFilter"/>
+	  ///         , <seealso cref="SetKeywordMarkerFilter"/> if a stem exclusion set is
+	  ///         provided and <seealso cref="SpanishLightStemFilter"/>. </returns>
+	  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.Tokenizer source = new org.apache.lucene.analysis.standard.StandardTokenizer(matchVersion, reader);
+		Tokenizer source = new StandardTokenizer(matchVersion, reader);
+		TokenStream result = new StandardFilter(matchVersion, source);
+		result = new LowerCaseFilter(matchVersion, result);
+		result = new StopFilter(matchVersion, result, stopwords);
+		if (!stemExclusionSet.Empty)
+		{
+		  result = new SetKeywordMarkerFilter(result, stemExclusionSet);
+		}
+		if (matchVersion.onOrAfter(Version.LUCENE_36))
+		{
+		  result = new SpanishLightStemFilter(result);
+		}
+		else
+		{
+		  result = new SnowballFilter(result, new SpanishStemmer());
+		}
+		return new TokenStreamComponents(source, result);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
new file mode 100644
index 0000000..73834e1
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Es/SpanishLightStemFilter.cs
@@ -0,0 +1,66 @@
+namespace org.apache.lucene.analysis.es
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+	/// <summary>
+	/// A <seealso cref="TokenFilter"/> that applies <seealso cref="SpanishLightStemmer"/> to stem Spanish
+	/// words.
+	/// <para>
+	/// To prevent terms from being stemmed use an instance of
+	/// <seealso cref="SetKeywordMarkerFilter"/> or a custom <seealso cref="TokenFilter"/> that sets
+	/// the <seealso cref="KeywordAttribute"/> before this <seealso cref="TokenStream"/>.
+	/// </para>
+	/// </summary>
+	public sealed class SpanishLightStemFilter : TokenFilter
+	{
+	  private readonly SpanishLightStemmer stemmer = new SpanishLightStemmer();
+	  private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+	  private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+
+	  public SpanishLightStemFilter(TokenStream input) : base(input)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+	  public override bool incrementToken()
+	  {
+		if (input.incrementToken())
+		{
+		  if (!keywordAttr.Keyword)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+			termAtt.Length = newlen;
+		  }
+		  return true;
+		}
+		else
+		{
+		  return false;
+		}
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message