lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [05/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:38:54 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sinks/TokenTypeSinkTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sinks/TokenTypeSinkTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sinks/TokenTypeSinkTokenizerTest.cs
new file mode 100644
index 0000000..ecb782a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sinks/TokenTypeSinkTokenizerTest.cs
@@ -0,0 +1,95 @@
+namespace org.apache.lucene.analysis.sinks
+{
+
+	/// <summary>
+	/// Copyright 2004 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+	public class TokenTypeSinkTokenizerTest : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws java.io.IOException
+	  public virtual void test()
+	  {
+		TokenTypeSinkFilter sinkFilter = new TokenTypeSinkFilter("D");
+		string test = "The quick red fox jumped over the lazy brown dogs";
+
+		TeeSinkTokenFilter ttf = new TeeSinkTokenFilter(new WordTokenFilter(this, new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)));
+		TeeSinkTokenFilter.SinkTokenStream sink = ttf.newSinkTokenStream(sinkFilter);
+
+		bool seenDogs = false;
+
+		CharTermAttribute termAtt = ttf.addAttribute(typeof(CharTermAttribute));
+		TypeAttribute typeAtt = ttf.addAttribute(typeof(TypeAttribute));
+		ttf.reset();
+		while (ttf.incrementToken())
+		{
+		  if (termAtt.ToString().Equals("dogs"))
+		  {
+			seenDogs = true;
+			assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().Equals("D") == true);
+		  }
+		  else
+		  {
+			assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().Equals("word"));
+		  }
+		}
+		assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+
+		int sinkCount = 0;
+		sink.reset();
+		while (sink.incrementToken())
+		{
+		  sinkCount++;
+		}
+
+		assertTrue("sink Size: " + sinkCount + " is not: " + 1, sinkCount == 1);
+	  }
+
+	  private class WordTokenFilter : TokenFilter
+	  {
+		  private readonly TokenTypeSinkTokenizerTest outerInstance;
+
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+		internal WordTokenFilter(TokenTypeSinkTokenizerTest outerInstance, TokenStream input) : base(input)
+		{
+			this.outerInstance = outerInstance;
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (!input.incrementToken())
+		  {
+			  return false;
+		  }
+
+		  if (termAtt.ToString().Equals("dogs"))
+		  {
+			typeAtt.Type = "D";
+		  }
+		  return true;
+		}
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs
new file mode 100644
index 0000000..43de13c
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowball.cs
@@ -0,0 +1,232 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TokenStreamComponents = org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PayloadAttribute = org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using Version = org.apache.lucene.util.Version;
+
+	public class TestSnowball : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEnglish() throws Exception
+	  public virtual void testEnglish()
+	  {
+		Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
+		assertAnalyzesTo(a, "he abhorred accents", new string[]{"he", "abhor", "accent"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopwords() throws Exception
+	  public virtual void testStopwords()
+	  {
+		Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English", StandardAnalyzer.STOP_WORDS_SET);
+		assertAnalyzesTo(a, "the quick brown fox jumped", new string[]{"quick", "brown", "fox", "jump"});
+	  }
+
+	  /// <summary>
+	  /// Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure
+	  /// we lowercase I correct for non-Turkish languages in either case.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEnglishLowerCase() throws Exception
+	  public virtual void testEnglishLowerCase()
+	  {
+		Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
+		assertAnalyzesTo(a, "cryogenic", new string[] {"cryogen"});
+		assertAnalyzesTo(a, "CRYOGENIC", new string[] {"cryogen"});
+
+		Analyzer b = new SnowballAnalyzer(Version.LUCENE_30, "English");
+		assertAnalyzesTo(b, "cryogenic", new string[] {"cryogen"});
+		assertAnalyzesTo(b, "CRYOGENIC", new string[] {"cryogen"});
+	  }
+
+	  /// <summary>
+	  /// Test turkish lowercasing
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTurkish() throws Exception
+	  public virtual void testTurkish()
+	  {
+		Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "Turkish");
+
+		assertAnalyzesTo(a, "ağacı", new string[] {"ağaç"});
+		assertAnalyzesTo(a, "AĞACI", new string[] {"ağaç"});
+	  }
+
+	  /// <summary>
+	  /// Test turkish lowercasing (old buggy behavior) </summary>
+	  /// @deprecated (3.1) Remove this when support for 3.0 indexes is no longer required (5.0) 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("(3.1) Remove this when support for 3.0 indexes is no longer required (5.0)") public void testTurkishBWComp() throws Exception
+	  [Obsolete("(3.1) Remove this when support for 3.0 indexes is no longer required (5.0)")]
+	  public virtual void testTurkishBWComp()
+	  {
+		Analyzer a = new SnowballAnalyzer(Version.LUCENE_30, "Turkish");
+		// AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci.
+		// this fails due to wrong casing, because the stemmer
+		// will only remove -ı, not -i
+		assertAnalyzesTo(a, "ağacı", new string[] {"ağaç"});
+		assertAnalyzesTo(a, "AĞACI", new string[] {"ağaci"});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws Exception
+	  public virtual void testReusableTokenStream()
+	  {
+		Analyzer a = new SnowballAnalyzer(TEST_VERSION_CURRENT, "English");
+		assertAnalyzesTo(a, "he abhorred accents", new string[]{"he", "abhor", "accent"});
+		assertAnalyzesTo(a, "she abhorred him", new string[]{"she", "abhor", "him"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFilterTokens() throws Exception
+	  public virtual void testFilterTokens()
+	  {
+		SnowballFilter filter = new SnowballFilter(new TestTokenStream(this), "English");
+		CharTermAttribute termAtt = filter.getAttribute(typeof(CharTermAttribute));
+		OffsetAttribute offsetAtt = filter.getAttribute(typeof(OffsetAttribute));
+		TypeAttribute typeAtt = filter.getAttribute(typeof(TypeAttribute));
+		PayloadAttribute payloadAtt = filter.getAttribute(typeof(PayloadAttribute));
+		PositionIncrementAttribute posIncAtt = filter.getAttribute(typeof(PositionIncrementAttribute));
+		FlagsAttribute flagsAtt = filter.getAttribute(typeof(FlagsAttribute));
+
+		filter.incrementToken();
+
+		assertEquals("accent", termAtt.ToString());
+		assertEquals(2, offsetAtt.startOffset());
+		assertEquals(7, offsetAtt.endOffset());
+		assertEquals("wrd", typeAtt.type());
+		assertEquals(3, posIncAtt.PositionIncrement);
+		assertEquals(77, flagsAtt.Flags);
+		assertEquals(new BytesRef(new sbyte[]{0,1,2,3}), payloadAtt.Payload);
+	  }
+
+	  private sealed class TestTokenStream : TokenStream
+	  {
+		  private readonly TestSnowball outerInstance;
+
+		internal readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+		internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+		internal readonly PayloadAttribute payloadAtt = addAttribute(typeof(PayloadAttribute));
+		internal readonly PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+		internal readonly FlagsAttribute flagsAtt = addAttribute(typeof(FlagsAttribute));
+
+		internal TestTokenStream(TestSnowball outerInstance) : base()
+		{
+			this.outerInstance = outerInstance;
+		}
+
+		public override bool incrementToken()
+		{
+		  clearAttributes();
+		  termAtt.setEmpty().append("accents");
+		  offsetAtt.setOffset(2, 7);
+		  typeAtt.Type = "wrd";
+		  posIncAtt.PositionIncrement = 3;
+		  payloadAtt.Payload = new BytesRef(new sbyte[]{0,1,2,3});
+		  flagsAtt.Flags = 77;
+		  return true;
+		}
+	  }
+
+	  /// <summary>
+	  /// for testing purposes ONLY </summary>
+	  public static string[] SNOWBALL_LANGS = new string[] {"Armenian", "Basque", "Catalan", "Danish", "Dutch", "English", "Finnish", "French", "German2", "German", "Hungarian", "Irish", "Italian", "Kp", "Lovins", "Norwegian", "Porter", "Portuguese", "Romanian", "Russian", "Spanish", "Swedish", "Turkish"};
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		foreach (String lang in SNOWBALL_LANGS)
+		{
+		  Analyzer a = new AnalyzerAnonymousInnerClassHelper(this);
+		  checkOneTerm(a, "", "");
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestSnowball outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestSnowball outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new Analyzer.TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws java.io.IOException
+	  public virtual void testRandomStrings()
+	  {
+		foreach (string lang in SNOWBALL_LANGS)
+		{
+		  checkRandomStrings(lang);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void checkRandomStrings(final String snowballLanguage) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+	  public virtual void checkRandomStrings(string snowballLanguage)
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, snowballLanguage);
+		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestSnowball outerInstance;
+
+		  private string snowballLanguage;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestSnowball outerInstance, string snowballLanguage)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.snowballLanguage = snowballLanguage;
+		  }
+
+		  protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new MockTokenizer(reader);
+			return new Analyzer.TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
+		  }
+	  }
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballPorterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballPorterFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballPorterFilterFactory.cs
new file mode 100644
index 0000000..2197f19
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballPorterFilterFactory.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/// <summary>
+	/// Copyright 2004 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+	using EnglishStemmer = org.tartarus.snowball.ext.EnglishStemmer;
+
+
+	public class TestSnowballPorterFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+		string text = "The fledgling banks were counting on a big boom in banking";
+		EnglishStemmer stemmer = new EnglishStemmer();
+		string[] test = text.Split("\\s", true);
+		string[] gold = new string[test.Length];
+		for (int i = 0; i < test.Length; i++)
+		{
+		  stemmer.Current = test[i];
+		  stemmer.stem();
+		  gold[i] = stemmer.Current;
+		}
+
+		Reader reader = new StringReader(text);
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("SnowballPorter", "language", "English").create(stream);
+		assertTokenStreamContents(stream, gold);
+	  }
+
+	  /// <summary>
+	  /// Test the protected words mechanism of SnowballPorterFilterFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testProtected() throws Exception
+	  public virtual void testProtected()
+	  {
+		Reader reader = new StringReader("ridding of some stemming");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("SnowballPorter", TEST_VERSION_CURRENT, new StringMockResourceLoader("ridding"), "protected", "protwords.txt", "language", "English").create(stream);
+
+		assertTokenStreamContents(stream, new string[] {"ridding", "of", "some", "stem"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("SnowballPorter", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballVocab.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballVocab.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballVocab.cs
new file mode 100644
index 0000000..b598421
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Snowball/TestSnowballVocab.cs
@@ -0,0 +1,106 @@
+using System;
+
+namespace org.apache.lucene.analysis.snowball
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+	/// <summary>
+	/// Test the snowball filters against the snowball data tests
+	/// </summary>
+	public class TestSnowballVocab : LuceneTestCase
+	{
+	  /// <summary>
+	  /// Run all languages against their snowball vocabulary tests.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemmers() throws java.io.IOException
+	  public virtual void testStemmers()
+	  {
+		assertCorrectOutput("Danish", "danish");
+		assertCorrectOutput("Dutch", "dutch");
+		assertCorrectOutput("English", "english");
+		// disabled due to snowball java code generation bug: 
+		// see http://article.gmane.org/gmane.comp.search.snowball/1139
+		// assertCorrectOutput("Finnish", "finnish");
+		assertCorrectOutput("French", "french");
+		assertCorrectOutput("German", "german");
+		assertCorrectOutput("German2", "german2");
+		assertCorrectOutput("Hungarian", "hungarian");
+		assertCorrectOutput("Italian", "italian");
+		assertCorrectOutput("Kp", "kraaij_pohlmann");
+		// disabled due to snowball java code generation bug: 
+		// see http://article.gmane.org/gmane.comp.search.snowball/1139
+		// assertCorrectOutput("Lovins", "lovins");
+		assertCorrectOutput("Norwegian", "norwegian");
+		assertCorrectOutput("Porter", "porter");
+		assertCorrectOutput("Portuguese", "portuguese");
+		assertCorrectOutput("Romanian", "romanian");
+		assertCorrectOutput("Russian", "russian");
+		assertCorrectOutput("Spanish", "spanish");
+		assertCorrectOutput("Swedish", "swedish");
+		assertCorrectOutput("Turkish", "turkish");
+	  }
+
+	  /// <summary>
+	  /// For the supplied language, run the stemmer against all strings in voc.txt
+	  /// The output should be the same as the string in output.txt
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void assertCorrectOutput(final String snowballLanguage, String dataDirectory) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+	  private void assertCorrectOutput(string snowballLanguage, string dataDirectory)
+	  {
+		if (VERBOSE)
+		{
+			Console.WriteLine("checking snowball language: " + snowballLanguage);
+		}
+
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, snowballLanguage);
+
+		assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestSnowballVocab outerInstance;
+
+		  private string snowballLanguage;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestSnowballVocab outerInstance, string snowballLanguage)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.snowballLanguage = snowballLanguage;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestStandardFactories.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestStandardFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestStandardFactories.cs
new file mode 100644
index 0000000..c6f2e09
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestStandardFactories.cs
@@ -0,0 +1,249 @@
+using System.Text;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the standard lucene factories are working.
+	/// </summary>
+	public class TestStandardFactories : BaseTokenStreamFactoryTestCase
+	{
+	  /// <summary>
+	  /// Test StandardTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStandardTokenizer() throws Exception
+	  public virtual void testStandardTokenizer()
+	  {
+		Reader reader = new StringReader("Wha\u0301t's this thing do?");
+		TokenStream stream = tokenizerFactory("Standard").create(reader);
+		assertTokenStreamContents(stream, new string[] {"Wha\u0301t's", "this", "thing", "do"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStandardTokenizerMaxTokenLength() throws Exception
+	  public virtual void testStandardTokenizerMaxTokenLength()
+	  {
+		StringBuilder builder = new StringBuilder();
+		for (int i = 0 ; i < 100 ; ++i)
+		{
+		  builder.Append("abcdefg"); // 7 * 100 = 700 char "word"
+		}
+		string longWord = builder.ToString();
+		string content = "one two three " + longWord + " four five six";
+		Reader reader = new StringReader(content);
+		Tokenizer stream = tokenizerFactory("Standard", "maxTokenLength", "1000").create(reader);
+		assertTokenStreamContents(stream, new string[] {"one", "two", "three", longWord, "four", "five", "six"});
+	  }
+
+	  /// <summary>
+	  /// Test ClassicTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testClassicTokenizer() throws Exception
+	  public virtual void testClassicTokenizer()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("Classic").create(reader);
+		assertTokenStreamContents(stream, new string[] {"What's", "this", "thing", "do"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testClassicTokenizerMaxTokenLength() throws Exception
+	  public virtual void testClassicTokenizerMaxTokenLength()
+	  {
+		StringBuilder builder = new StringBuilder();
+		for (int i = 0 ; i < 100 ; ++i)
+		{
+		  builder.Append("abcdefg"); // 7 * 100 = 700 char "word"
+		}
+		string longWord = builder.ToString();
+		string content = "one two three " + longWord + " four five six";
+		Reader reader = new StringReader(content);
+		Tokenizer stream = tokenizerFactory("Classic", "maxTokenLength", "1000").create(reader);
+		assertTokenStreamContents(stream, new string[] {"one", "two", "three", longWord, "four", "five", "six"});
+	  }
+
+	  /// <summary>
+	  /// Test ClassicFilterFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStandardFilter() throws Exception
+	  public virtual void testStandardFilter()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("Classic").create(reader);
+		stream = tokenFilterFactory("Classic").create(stream);
+		assertTokenStreamContents(stream, new string[] {"What", "this", "thing", "do"});
+	  }
+
+	  /// <summary>
+	  /// Test KeywordTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeywordTokenizer() throws Exception
+	  public virtual void testKeywordTokenizer()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("Keyword").create(reader);
+		assertTokenStreamContents(stream, new string[] {"What's this thing do?"});
+	  }
+
+	  /// <summary>
+	  /// Test WhitespaceTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWhitespaceTokenizer() throws Exception
+	  public virtual void testWhitespaceTokenizer()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("Whitespace").create(reader);
+		assertTokenStreamContents(stream, new string[] {"What's", "this", "thing", "do?"});
+	  }
+
+	  /// <summary>
+	  /// Test LetterTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterTokenizer() throws Exception
+	  public virtual void testLetterTokenizer()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("Letter").create(reader);
+		assertTokenStreamContents(stream, new string[] {"What", "s", "this", "thing", "do"});
+	  }
+
+	  /// <summary>
+	  /// Test LowerCaseTokenizerFactory
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLowerCaseTokenizer() throws Exception
+	  public virtual void testLowerCaseTokenizer()
+	  {
+		Reader reader = new StringReader("What's this thing do?");
+		TokenStream stream = tokenizerFactory("LowerCase").create(reader);
+		assertTokenStreamContents(stream, new string[] {"what", "s", "this", "thing", "do"});
+	  }
+
+	  /// <summary>
+	  /// Ensure the ASCIIFoldingFilterFactory works
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testASCIIFolding() throws Exception
+	  public virtual void testASCIIFolding()
+	  {
+		Reader reader = new StringReader("Česká");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("ASCIIFolding").create(stream);
+		assertTokenStreamContents(stream, new string[] {"Ceska"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenizerFactory("Standard", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenizerFactory("Classic", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenizerFactory("Whitespace", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenizerFactory("Letter", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenizerFactory("LowerCase", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenFilterFactory("ASCIIFolding", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenFilterFactory("Standard", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+
+		try
+		{
+		  tokenFilterFactory("Classic", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestUAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestUAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestUAX29URLEmailTokenizerFactory.cs
new file mode 100644
index 0000000..50542ff
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Standard/TestUAX29URLEmailTokenizerFactory.cs
@@ -0,0 +1,169 @@
+using System;
+using System.Text;
+
+namespace org.apache.lucene.analysis.standard
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using ClasspathResourceLoader = org.apache.lucene.analysis.util.ClasspathResourceLoader;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// A few tests based on org.apache.lucene.analysis.TestUAX29URLEmailTokenizer
+	/// </summary>
+
+	public class TestUAX29URLEmailTokenizerFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUAX29URLEmailTokenizer() throws Exception
+	  public virtual void testUAX29URLEmailTokenizer()
+	  {
+		Reader reader = new StringReader("Wha\u0301t's this thing do?");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"Wha\u0301t's", "this", "thing", "do"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArabic() throws Exception
+	  public virtual void testArabic()
+	  {
+		Reader reader = new StringReader("الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChinese() throws Exception
+	  public virtual void testChinese()
+	  {
+		Reader reader = new StringReader("我是中国人。 1234 Tests ");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"我", "是", "中", "国", "人", "1234", "Tests"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+	  public virtual void testKorean()
+	  {
+		Reader reader = new StringReader("안녕하세요 한글입니다");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"안녕하세요", "한글입니다"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHyphen() throws Exception
+	  public virtual void testHyphen()
+	  {
+		Reader reader = new StringReader("some-dashed-phrase");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"some", "dashed", "phrase"});
+	  }
+
+	  // Test with some URLs from TestUAX29URLEmailTokenizer's 
+	  // urls.from.random.text.with.urls.txt
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testURLs() throws Exception
+	  public virtual void testURLs()
+	  {
+		string textWithURLs = "http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on\n" + " some extra\nWords thrown in here. " + "http://c5-3486.bisynxu.FR/aI.YnNms/" + " samba Halta gamba " + "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n" + "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n" + "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m" + " inter Locutio " + "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n" + "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7" + " blah Sirrah woof " + "http://[a42:a7b6::]/qSmxSUU4z/%52qVl4\n";
+		Reader reader = new StringReader(textWithURLs);
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on", "some", "extra", "Words", "thrown", "in", "here", "http://c5-3486.bisynxu.FR/aI.YnNms/", "samba", "Halta", "gamba", "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R", "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb", "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m", "inter", "Locutio", "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/", "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7", "blah", "Sirrah", "woof", "http://[a42:a7b6::]/qSmxSUU4z/%52qVl4"});
+	  }
+
+	  // Test with some emails from TestUAX29URLEmailTokenizer's 
+	  // email.addresses.from.random.text.with.email.addresses.txt
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmails() throws Exception
+	  public virtual void testEmails()
+	  {
+		string textWithEmails = " some extra\nWords thrown in here. " + "dJ8ngFi@avz13m.CC\n" + "kU-l6DS@[082.015.228.189]\n" + "\"%U\u0012@?\\B\"@Fl2d.md" + " samba Halta gamba " + "Bvd#@tupjv.sn\n" + "SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt\n" + "~+Kdz@3mousnl.SE\n" + " inter Locutio " + "C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY\n" + "}0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM" + " blah Sirrah woof " + "lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae\n" + "lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H\n";
+		Reader reader = new StringReader(textWithEmails);
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"some", "extra", "Words", "thrown", "in", "here", "dJ8ngFi@avz13m.CC", "kU-l6DS@[082.015.228.189]", "\"%U\u0012@?\\B\"@Fl2d.md", "samba", "Halta", "gamba", "Bvd#@tupjv.sn", "SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt", "~+Kdz@3mousnl.SE", "inter", "Locutio", "C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY", "}0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM", "blah", "Sirrah", "woof", "lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae", "lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTokenLength() throws Exception
+	  public virtual void testMaxTokenLength()
+	  {
+		StringBuilder builder = new StringBuilder();
+		for (int i = 0 ; i < 100 ; ++i)
+		{
+		  builder.Append("abcdefg"); // 7 * 100 = 700 char "word"
+		}
+		string longWord = builder.ToString();
+		string content = "one two three " + longWord + " four five six";
+		Reader reader = new StringReader(content);
+		TokenStream stream = tokenizerFactory("UAX29URLEmail", "maxTokenLength", "1000").create(reader);
+		assertTokenStreamContents(stream, new string[] {"one", "two", "three", longWord, "four", "five", "six"});
+	  }
+
+	  /// @deprecated nuke this test in lucene 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("nuke this test in lucene 5.0") public void testMatchVersion() throws Exception
+	  [Obsolete("nuke this test in lucene 5.0")]
+	  public virtual void testMatchVersion()
+	  {
+		Reader reader = new StringReader("ざ");
+		TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
+		assertTokenStreamContents(stream, new string[] {"ざ"});
+
+		reader = new StringReader("ざ");
+		stream = tokenizerFactory("UAX29URLEmail", Version.LUCENE_31, new ClasspathResourceLoader(this.GetType())).create(reader);
+		assertTokenStreamContents(stream, new string[] {"さ"}); // old broken behavior
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenizerFactory("UAX29URLEmail", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIllegalArguments() throws Exception
+	 public virtual void testIllegalArguments()
+	 {
+		try
+		{
+		  tokenizerFactory("UAX29URLEmail", "maxTokenLength", "-1").create(new StringReader("hello"));
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("maxTokenLength must be greater than zero"));
+		}
+	 }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishAnalyzer.cs
new file mode 100644
index 0000000..1848f66
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishAnalyzer.cs
@@ -0,0 +1,70 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	public class TestSwedishAnalyzer : BaseTokenStreamTestCase
+	{
+	  /// <summary>
+	  /// This test fails with NPE when the 
+	  /// stopwords file is missing in classpath 
+	  /// </summary>
+	  public virtual void testResourcesAvailable()
+	  {
+		new SwedishAnalyzer(TEST_VERSION_CURRENT);
+	  }
+
+	  /// <summary>
+	  /// test stopwords and stemming </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasics() throws java.io.IOException
+	  public virtual void testBasics()
+	  {
+		Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT);
+		// stemming
+		checkOneTerm(a, "jaktkarlarne", "jaktkarl");
+		checkOneTerm(a, "jaktkarlens", "jaktkarl");
+		// stopword
+		assertAnalyzesTo(a, "och", new string[] {});
+	  }
+
+	  /// <summary>
+	  /// test use of exclusion set </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testExclude() throws java.io.IOException
+	  public virtual void testExclude()
+	  {
+		CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false);
+		Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT, SwedishAnalyzer.DefaultStopSet, exclusionSet);
+		checkOneTerm(a, "jaktkarlarne", "jaktkarlarne");
+		checkOneTerm(a, "jaktkarlens", "jaktkarl");
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new SwedishAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilter.cs
new file mode 100644
index 0000000..f4b29bd
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilter.cs
@@ -0,0 +1,123 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using SetKeywordMarkerFilter = org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
+//	import static org.apache.lucene.analysis.VocabularyAssert.*;
+
+	/// <summary>
+	/// Simple tests for <seealso cref="SwedishLightStemFilter"/>
+	/// </summary>
+	public class TestSwedishLightStemFilter : BaseTokenStreamTestCase
+	{
+	  private Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(source, new SwedishLightStemFilter(source));
+		  }
+	  }
+
+	  /// <summary>
+	  /// Test against a vocabulary from the reference impl </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVocabulary() throws java.io.IOException
+	  public virtual void testVocabulary()
+	  {
+		assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKeyword() throws java.io.IOException
+	  public virtual void testKeyword()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet exclusionSet = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false);
+		CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlens"), false);
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, exclusionSet);
+		checkOneTerm(a, "jaktkarlens", "jaktkarlens");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestSwedishLightStemFilter outerInstance;
+
+		  private CharArraySet exclusionSet;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestSwedishLightStemFilter outerInstance, CharArraySet exclusionSet)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.exclusionSet = exclusionSet;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
+			return new TokenStreamComponents(source, new SwedishLightStemFilter(sink));
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+		checkOneTerm(a, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestSwedishLightStemFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestSwedishLightStemFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilterFactory.cs
new file mode 100644
index 0000000..6a53c73
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Sv/TestSwedishLightStemFilterFactory.cs
@@ -0,0 +1,57 @@
+namespace org.apache.lucene.analysis.sv
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure the Swedish Light stem factory is working.
+	/// </summary>
+	public class TestSwedishLightStemFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStemming() throws Exception
+	  public virtual void testStemming()
+	  {
+		Reader reader = new StringReader("äpplen äpple");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("SwedishLightStem").create(stream);
+		assertTokenStreamContents(stream, new string[] {"äppl", "äppl"});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("SwedishLightStem", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestMultiWordSynonyms.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestMultiWordSynonyms.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestMultiWordSynonyms.cs
new file mode 100644
index 0000000..98f531f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestMultiWordSynonyms.cs
@@ -0,0 +1,62 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+
+
+	/// <summary>
+	/// @since solr 1.4
+	/// </summary>
+	public class TestMultiWordSynonyms : BaseTokenStreamFactoryTestCase
+	{
+
+	  /// @deprecated Remove this test in 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("Remove this test in 5.0") public void testMultiWordSynonymsOld() throws java.io.IOException
+	  [Obsolete("Remove this test in 5.0")]
+	  public virtual void testMultiWordSynonymsOld()
+	  {
+		IList<string> rules = new List<string>();
+		rules.Add("a b c,d");
+		SlowSynonymMap synMap = new SlowSynonymMap(true);
+		SlowSynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);
+
+		SlowSynonymFilter ts = new SlowSynonymFilter(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false), synMap);
+		// This fails because ["e","e"] is the value of the token stream
+		assertTokenStreamContents(ts, new string[] {"a", "e"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMultiWordSynonyms() throws Exception
+	  public virtual void testMultiWordSynonyms()
+	  {
+		Reader reader = new StringReader("a e");
+		TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+		stream = tokenFilterFactory("Synonym", TEST_VERSION_CURRENT, new StringMockResourceLoader("a b c,d"), "synonyms", "synonyms.txt").create(stream);
+		// This fails because ["e","e"] is the value of the token stream
+		assertTokenStreamContents(stream, new string[] {"a", "e"});
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs
new file mode 100644
index 0000000..0a337cd
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSlowSynonymFilter.cs
@@ -0,0 +1,387 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using org.apache.lucene.analysis.tokenattributes;
+
+	/// @deprecated Remove this test in Lucene 5.0 
+	[Obsolete("Remove this test in Lucene 5.0")]
+	public class TestSlowSynonymFilter : BaseTokenStreamTestCase
+	{
+
+	  internal static IList<string> strings(string str)
+	  {
+		string[] arr = str.Split(" ", true);
+		return Arrays.asList(arr);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertTokenizesTo(SlowSynonymMap dict, String input, String expected[]) throws java.io.IOException
+	  internal static void assertTokenizesTo(SlowSynonymMap dict, string input, string[] expected)
+	  {
+		Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
+		assertTokenStreamContents(stream, expected);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertTokenizesTo(SlowSynonymMap dict, String input, String expected[], int posIncs[]) throws java.io.IOException
+	  internal static void assertTokenizesTo(SlowSynonymMap dict, string input, string[] expected, int[] posIncs)
+	  {
+		Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
+		SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
+		assertTokenStreamContents(stream, expected, posIncs);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertTokenizesTo(SlowSynonymMap dict, java.util.List<org.apache.lucene.analysis.Token> input, String expected[], int posIncs[]) throws java.io.IOException
+	  internal static void assertTokenizesTo(SlowSynonymMap dict, IList<Token> input, string[] expected, int[] posIncs)
+	  {
+		TokenStream tokenizer = new IterTokenStream(input);
+		SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
+		assertTokenStreamContents(stream, expected, posIncs);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertTokenizesTo(SlowSynonymMap dict, java.util.List<org.apache.lucene.analysis.Token> input, String expected[], int startOffsets[], int endOffsets[], int posIncs[]) throws java.io.IOException
+	  internal static void assertTokenizesTo(SlowSynonymMap dict, IList<Token> input, string[] expected, int[] startOffsets, int[] endOffsets, int[] posIncs)
+	  {
+		TokenStream tokenizer = new IterTokenStream(input);
+		SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
+		assertTokenStreamContents(stream, expected, startOffsets, endOffsets, posIncs);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMatching() throws java.io.IOException
+	  public virtual void testMatching()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = false;
+		bool merge = true;
+		map.add(strings("a b"), tokens("ab"), orig, merge);
+		map.add(strings("a c"), tokens("ac"), orig, merge);
+		map.add(strings("a"), tokens("aa"), orig, merge);
+		map.add(strings("b"), tokens("bb"), orig, merge);
+		map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+		map.add(strings("x c"), tokens("xc"), orig, merge);
+
+		assertTokenizesTo(map, "$", new string[] {"$"});
+		assertTokenizesTo(map, "a", new string[] {"aa"});
+		assertTokenizesTo(map, "a $", new string[] {"aa", "$"});
+		assertTokenizesTo(map, "$ a", new string[] {"$", "aa"});
+		assertTokenizesTo(map, "a a", new string[] {"aa", "aa"});
+		assertTokenizesTo(map, "b", new string[] {"bb"});
+		assertTokenizesTo(map, "z x c v", new string[] {"zxcv"});
+		assertTokenizesTo(map, "z x c $", new string[] {"z", "xc", "$"});
+
+		// repeats
+		map.add(strings("a b"), tokens("ab"), orig, merge);
+		map.add(strings("a b"), tokens("ab"), orig, merge);
+
+		// FIXME: the below test intended to be { "ab" }
+		assertTokenizesTo(map, "a b", new string[] {"ab", "ab", "ab"});
+
+		// check for lack of recursion
+		map.add(strings("zoo"), tokens("zoo"), orig, merge);
+		assertTokenizesTo(map, "zoo zoo $ zoo", new string[] {"zoo", "zoo", "$", "zoo"});
+		map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+		// FIXME: the below test intended to be { "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo" }
+		// maybe this was just a typo in the old test????
+		assertTokenizesTo(map, "zoo zoo $ zoo", new string[] {"zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testIncludeOrig() throws java.io.IOException
+	  public virtual void testIncludeOrig()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = true;
+		bool merge = true;
+		map.add(strings("a b"), tokens("ab"), orig, merge);
+		map.add(strings("a c"), tokens("ac"), orig, merge);
+		map.add(strings("a"), tokens("aa"), orig, merge);
+		map.add(strings("b"), tokens("bb"), orig, merge);
+		map.add(strings("z x c v"), tokens("zxcv"), orig, merge);
+		map.add(strings("x c"), tokens("xc"), orig, merge);
+
+		assertTokenizesTo(map, "$", new string[] {"$"}, new int[] {1});
+		assertTokenizesTo(map, "a", new string[] {"a", "aa"}, new int[] {1, 0});
+		assertTokenizesTo(map, "a", new string[] {"a", "aa"}, new int[] {1, 0});
+		assertTokenizesTo(map, "$ a", new string[] {"$", "a", "aa"}, new int[] {1, 1, 0});
+		assertTokenizesTo(map, "a $", new string[] {"a", "aa", "$"}, new int[] {1, 0, 1});
+		assertTokenizesTo(map, "$ a !", new string[] {"$", "a", "aa", "!"}, new int[] {1, 1, 0, 1});
+		assertTokenizesTo(map, "a a", new string[] {"a", "aa", "a", "aa"}, new int[] {1, 0, 1, 0});
+		assertTokenizesTo(map, "b", new string[] {"b", "bb"}, new int[] {1, 0});
+		assertTokenizesTo(map, "z x c v", new string[] {"z", "zxcv", "x", "c", "v"}, new int[] {1, 0, 1, 1, 1});
+		assertTokenizesTo(map, "z x c $", new string[] {"z", "x", "xc", "c", "$"}, new int[] {1, 1, 0, 1, 1});
+
+		// check for lack of recursion
+		map.add(strings("zoo zoo"), tokens("zoo"), orig, merge);
+		// CHECKME: I think the previous test (with 4 zoo's), was just a typo.
+		assertTokenizesTo(map, "zoo zoo $ zoo", new string[] {"zoo", "zoo", "zoo", "$", "zoo"}, new int[] {1, 0, 1, 1, 1});
+
+		map.add(strings("zoo"), tokens("zoo zoo"), orig, merge);
+		assertTokenizesTo(map, "zoo zoo $ zoo", new string[] {"zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"}, new int[] {1, 0, 1, 1, 1, 0, 1});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMapMerge() throws java.io.IOException
+	  public virtual void testMapMerge()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = false;
+		bool merge = true;
+		map.add(strings("a"), tokens("a5,5"), orig, merge);
+		map.add(strings("a"), tokens("a3,3"), orig, merge);
+
+		assertTokenizesTo(map, "a", new string[] {"a3", "a5"}, new int[] {1, 2});
+
+		map.add(strings("b"), tokens("b3,3"), orig, merge);
+		map.add(strings("b"), tokens("b5,5"), orig, merge);
+
+		assertTokenizesTo(map, "b", new string[] {"b3", "b5"}, new int[] {1, 2});
+
+		map.add(strings("a"), tokens("A3,3"), orig, merge);
+		map.add(strings("a"), tokens("A5,5"), orig, merge);
+
+		assertTokenizesTo(map, "a", new string[] {"a3", "A3", "a5", "A5"}, new int[] {1, 0, 2, 0});
+
+		map.add(strings("a"), tokens("a1"), orig, merge);
+		assertTokenizesTo(map, "a", new string[] {"a1", "a3", "A3", "a5", "A5"}, new int[] {1, 2, 0, 2, 0});
+
+		map.add(strings("a"), tokens("a2,2"), orig, merge);
+		map.add(strings("a"), tokens("a4,4 a6,2"), orig, merge);
+		assertTokenizesTo(map, "a", new string[] {"a1", "a2", "a3", "A3", "a4", "a5", "A5", "a6"}, new int[] {1, 1, 1, 0, 1, 1, 0, 1});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOverlap() throws java.io.IOException
+	  public virtual void testOverlap()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = false;
+		bool merge = true;
+		map.add(strings("qwe"), tokens("qq/ww/ee"), orig, merge);
+		map.add(strings("qwe"), tokens("xx"), orig, merge);
+		map.add(strings("qwe"), tokens("yy"), orig, merge);
+		map.add(strings("qwe"), tokens("zz"), orig, merge);
+		assertTokenizesTo(map, "$", new string[] {"$"});
+		assertTokenizesTo(map, "qwe", new string[] {"qq", "ww", "ee", "xx", "yy", "zz"}, new int[] {1, 0, 0, 0, 0, 0});
+
+		// test merging within the map
+
+		map.add(strings("a"), tokens("a5,5 a8,3 a10,2"), orig, merge);
+		map.add(strings("a"), tokens("a3,3 a7,4 a9,2 a11,2 a111,100"), orig, merge);
+		assertTokenizesTo(map, "a", new string[] {"a3", "a5", "a7", "a8", "a9", "a10", "a11", "a111"}, new int[] {1, 2, 2, 1, 1, 1, 1, 100});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPositionIncrements() throws java.io.IOException
+	  public virtual void testPositionIncrements()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = false;
+		bool merge = true;
+
+		// test that generated tokens start at the same posInc as the original
+		map.add(strings("a"), tokens("aa"), orig, merge);
+		assertTokenizesTo(map, tokens("a,5"), new string[] {"aa"}, new int[] {5});
+		assertTokenizesTo(map, tokens("b,1 a,0"), new string[] {"b", "aa"}, new int[] {1, 0});
+
+		// test that offset of first replacement is ignored (always takes the orig offset)
+		map.add(strings("b"), tokens("bb,100"), orig, merge);
+		assertTokenizesTo(map, tokens("b,5"), new string[] {"bb"}, new int[] {5});
+		assertTokenizesTo(map, tokens("c,1 b,0"), new string[] {"c", "bb"}, new int[] {1, 0});
+
+		// test that subsequent tokens are adjusted accordingly
+		map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+		assertTokenizesTo(map, tokens("c,5"), new string[] {"cc", "c2"}, new int[] {5, 2});
+		assertTokenizesTo(map, tokens("d,1 c,0"), new string[] {"d", "cc", "c2"}, new int[] {1, 0, 2});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testPositionIncrementsWithOrig() throws java.io.IOException
+	  public virtual void testPositionIncrementsWithOrig()
+	  {
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = true;
+		bool merge = true;
+
+		// test that generated tokens start at the same offset as the original
+		map.add(strings("a"), tokens("aa"), orig, merge);
+		assertTokenizesTo(map, tokens("a,5"), new string[] {"a", "aa"}, new int[] {5, 0});
+		assertTokenizesTo(map, tokens("b,1 a,0"), new string[] {"b", "a", "aa"}, new int[] {1, 0, 0});
+
+		// test that offset of first replacement is ignored (always takes the orig offset)
+		map.add(strings("b"), tokens("bb,100"), orig, merge);
+		assertTokenizesTo(map, tokens("b,5"), new string[] {"b", "bb"}, new int[] {5, 0});
+		assertTokenizesTo(map, tokens("c,1 b,0"), new string[] {"c", "b", "bb"}, new int[] {1, 0, 0});
+
+		// test that subsequent tokens are adjusted accordingly
+		map.add(strings("c"), tokens("cc,100 c2,2"), orig, merge);
+		assertTokenizesTo(map, tokens("c,5"), new string[] {"c", "cc", "c2"}, new int[] {5, 0, 2});
+		assertTokenizesTo(map, tokens("d,1 c,0"), new string[] {"d", "c", "cc", "c2"}, new int[] {1, 0, 0, 2});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsetBug() throws java.io.IOException
+	  public virtual void testOffsetBug()
+	  {
+		// With the following rules:
+		// a a=>b
+		// x=>y
+		// analysing "a x" causes "y" to have a bad offset (end less than start)
+		// SOLR-167
+		SlowSynonymMap map = new SlowSynonymMap();
+
+		bool orig = false;
+		bool merge = true;
+
+		map.add(strings("a a"), tokens("b"), orig, merge);
+		map.add(strings("x"), tokens("y"), orig, merge);
+
+		// "a a x" => "b y"
+		assertTokenizesTo(map, tokens("a,1,0,1 a,1,2,3 x,1,4,5"), new string[] {"b", "y"}, new int[] {0, 4}, new int[] {3, 5}, new int[] {1, 1});
+	  }
+
+
+	  /// <summary>
+	  ///*
+	  /// Return a list of tokens according to a test string format:
+	  /// a b c  =>  returns List<Token> [a,b,c]
+	  /// a/b   => tokens a and b share the same spot (b.positionIncrement=0)
+	  /// a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0, c.positionIncrement=0)
+	  /// a,1,10,11  => "a" with positionIncrement=1, startOffset=10, endOffset=11 </summary>
+	  /// @deprecated (3.0) does not support attributes api 
+	  [Obsolete("(3.0) does not support attributes api")]
+	  private IList<Token> tokens(string str)
+	  {
+		string[] arr = str.Split(" ", true);
+		IList<Token> result = new List<Token>();
+		for (int i = 0; i < arr.Length; i++)
+		{
+		  string[] toks = arr[i].Split("/", true);
+		  string[] @params = toks[0].Split(",", true);
+
+		  int posInc;
+		  int start;
+		  int end;
+
+		  if (@params.Length > 1)
+		  {
+			posInc = int.Parse(@params[1]);
+		  }
+		  else
+		  {
+			posInc = 1;
+		  }
+
+		  if (@params.Length > 2)
+		  {
+			start = int.Parse(@params[2]);
+		  }
+		  else
+		  {
+			start = 0;
+		  }
+
+		  if (@params.Length > 3)
+		  {
+			end = int.Parse(@params[3]);
+		  }
+		  else
+		  {
+			end = start + @params[0].Length;
+		  }
+
+		  Token t = new Token(@params[0],start,end,"TEST");
+		  t.PositionIncrement = posInc;
+
+		  result.Add(t);
+		  for (int j = 1; j < toks.Length; j++)
+		  {
+			t = new Token(toks[j],0,0,"TEST");
+			t.PositionIncrement = 0;
+			result.Add(t);
+		  }
+		}
+		return result;
+	  }
+
+	  /// @deprecated (3.0) does not support custom attributes 
+	  [Obsolete("(3.0) does not support custom attributes")]
+	  private class IterTokenStream : TokenStream
+	  {
+		internal readonly Token[] tokens;
+		internal int index = 0;
+		internal CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+		internal OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+		internal PositionIncrementAttribute posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
+		internal FlagsAttribute flagsAtt = addAttribute(typeof(FlagsAttribute));
+		internal TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+		internal PayloadAttribute payloadAtt = addAttribute(typeof(PayloadAttribute));
+
+		public IterTokenStream(params Token[] tokens) : base()
+		{
+		  this.tokens = tokens;
+		}
+
+		public IterTokenStream(ICollection<Token> tokens) : this(tokens.toArray(new Token[tokens.Count]))
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (index >= tokens.Length)
+		  {
+			return false;
+		  }
+		  else
+		  {
+			clearAttributes();
+			Token token = tokens[index++];
+			termAtt.setEmpty().append(token);
+			offsetAtt.setOffset(token.startOffset(), token.endOffset());
+			posIncAtt.PositionIncrement = token.PositionIncrement;
+			flagsAtt.Flags = token.Flags;
+			typeAtt.Type = token.type();
+			payloadAtt.Payload = token.Payload;
+			return true;
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSolrSynonymParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSolrSynonymParser.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSolrSynonymParser.cs
new file mode 100644
index 0000000..735a6c2
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Synonym/TestSolrSynonymParser.cs
@@ -0,0 +1,177 @@
+namespace org.apache.lucene.analysis.synonym
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using EnglishAnalyzer = org.apache.lucene.analysis.en.EnglishAnalyzer;
+	using Test = org.junit.Test;
+
+	/// <summary>
+	/// Tests parser for the Solr synonyms format
+	/// @lucene.experimental
+	/// </summary>
+	public class TestSolrSynonymParser : BaseTokenStreamTestCase
+	{
+
+	  /// <summary>
+	  /// Tests some simple examples from the solr wiki </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSimple() throws Exception
+	  public virtual void testSimple()
+	  {
+		string testFile = "i-pod, ipod, ipoooood\n" + "foo => foo bar\n" + "foo => baz\n" + "this test, that testing";
+
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
+		parser.parse(new StringReader(testFile));
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SynonymMap map = parser.build();
+		SynonymMap map = parser.build();
+
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, map);
+
+		assertAnalyzesTo(analyzer, "ball", new string[] {"ball"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "i-pod", new string[] {"i-pod", "ipod", "ipoooood"}, new int[] {1, 0, 0});
+
+		assertAnalyzesTo(analyzer, "foo", new string[] {"foo", "baz", "bar"}, new int[] {1, 0, 1});
+
+		assertAnalyzesTo(analyzer, "this test", new string[] {"this", "that", "test", "testing"}, new int[] {1, 0, 1, 0});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestSolrSynonymParser outerInstance;
+
+		  private SynonymMap map;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestSolrSynonymParser outerInstance, SynonymMap map)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.map = map;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+			return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
+		  }
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with bad syntax </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test(expected=java.text.ParseException.class) public void testInvalidDoubleMap() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testInvalidDoubleMap()
+	  {
+		string testFile = "a => b => c";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
+		parser.parse(new StringReader(testFile));
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with bad syntax </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test(expected=java.text.ParseException.class) public void testInvalidAnalyzesToNothingOutput() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testInvalidAnalyzesToNothingOutput()
+	  {
+		string testFile = "a => 1";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
+		parser.parse(new StringReader(testFile));
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with bad syntax </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test(expected=java.text.ParseException.class) public void testInvalidAnalyzesToNothingInput() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testInvalidAnalyzesToNothingInput()
+	  {
+		string testFile = "1 => a";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
+		parser.parse(new StringReader(testFile));
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with bad syntax </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test(expected=java.text.ParseException.class) public void testInvalidPositionsInput() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testInvalidPositionsInput()
+	  {
+		string testFile = "testola => the test";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
+		parser.parse(new StringReader(testFile));
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with bad syntax </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Test(expected=java.text.ParseException.class) public void testInvalidPositionsOutput() throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public virtual void testInvalidPositionsOutput()
+	  {
+		string testFile = "the test => testola";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
+		parser.parse(new StringReader(testFile));
+	  }
+
+	  /// <summary>
+	  /// parse a syn file with some escaped syntax chars </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapedStuff() throws Exception
+	  public virtual void testEscapedStuff()
+	  {
+		string testFile = "a\\=>a => b\\=>b\n" + "a\\,a => b\\,b";
+		SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+		parser.parse(new StringReader(testFile));
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SynonymMap map = parser.build();
+		SynonymMap map = parser.build();
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);
+
+		assertAnalyzesTo(analyzer, "ball", new string[] {"ball"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "a=>a", new string[] {"b=>b"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "a,a", new string[] {"b,b"}, new int[] {1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestSolrSynonymParser outerInstance;
+
+		  private SynonymMap map;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestSolrSynonymParser outerInstance, SynonymMap map)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.map = map;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+			return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message