lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [20/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:39:09 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilterFactory.cs
new file mode 100644
index 0000000..9b204a9
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestStopFilterFactory.cs
@@ -0,0 +1,116 @@
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using ClasspathResourceLoader = org.apache.lucene.analysis.util.ClasspathResourceLoader;
+	using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
+
+	public class TestStopFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInform() throws Exception
+	  public virtual void testInform()
+	  {
+		ResourceLoader loader = new ClasspathResourceLoader(this.GetType());
+		assertTrue("loader is null and it shouldn't be", loader != null);
+		StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
+		CharArraySet words = factory.StopWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
+		words = factory.StopWords;
+		assertTrue("words is null and it shouldn't be", words != null);
+		assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
+		assertTrue(factory.IgnoreCase + " does not equal: " + true, factory.IgnoreCase == true);
+
+		factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
+		words = factory.StopWords;
+		assertEquals(8, words.size());
+		assertTrue(words.contains("he"));
+		assertTrue(words.contains("him"));
+		assertTrue(words.contains("his"));
+		assertTrue(words.contains("himself"));
+		assertTrue(words.contains("she"));
+		assertTrue(words.contains("her"));
+		assertTrue(words.contains("hers"));
+		assertTrue(words.contains("herself"));
+
+		// defaults
+		factory = (StopFilterFactory) tokenFilterFactory("Stop");
+		assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.StopWords);
+		assertEquals(false, factory.IgnoreCase);
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("Stop", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusFormats() throws Exception
+	  public virtual void testBogusFormats()
+	  {
+		try
+		{
+		  tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "bogus");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  string msg = expected.Message;
+		  assertTrue(msg, msg.Contains("Unknown"));
+		  assertTrue(msg, msg.Contains("format"));
+		  assertTrue(msg, msg.Contains("bogus"));
+		}
+		try
+		{
+		  tokenFilterFactory("Stop", "format", "bogus");
+							 // implicit default words file
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  string msg = expected.Message;
+		  assertTrue(msg, msg.Contains("can not be specified"));
+		  assertTrue(msg, msg.Contains("format"));
+		  assertTrue(msg, msg.Contains("bogus"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilter.cs
new file mode 100644
index 0000000..793da91
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilter.cs
@@ -0,0 +1,121 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using English = org.apache.lucene.util.English;
+	using Version = org.apache.lucene.util.Version;
+
+
+
+	public class TestTypeTokenFilter : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTypeFilter() throws java.io.IOException
+	  public virtual void testTypeFilter()
+	  {
+		StringReader reader = new StringReader("121 is palindrome, while 123 is not");
+		ISet<string> stopTypes = asSet("<NUM>");
+		TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, true, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes);
+		assertTokenStreamContents(stream, new string[]{"is", "palindrome", "while", "is", "not"});
+	  }
+
+	  /// <summary>
+	  /// Test Position increments applied by TypeTokenFilter with and without enabling this option.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStopPositons() throws java.io.IOException
+	  public virtual void testStopPositons()
+	  {
+		StringBuilder sb = new StringBuilder();
+		for (int i = 10; i < 20; i++)
+		{
+		  if (i % 3 != 0)
+		  {
+			sb.Append(i).Append(" ");
+		  }
+		  else
+		  {
+			string w = English.intToEnglish(i).trim();
+			sb.Append(w).Append(" ");
+		  }
+		}
+		log(sb.ToString());
+		string[] stopTypes = new string[]{"<NUM>"};
+		ISet<string> stopSet = asSet(stopTypes);
+
+		// with increments
+		StringReader reader = new StringReader(sb.ToString());
+		TypeTokenFilter typeTokenFilter = new TypeTokenFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+		testPositons(typeTokenFilter);
+
+		// without increments
+		reader = new StringReader(sb.ToString());
+		typeTokenFilter = new TypeTokenFilter(Version.LUCENE_43, false, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
+		testPositons(typeTokenFilter);
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void testPositons(TypeTokenFilter stpf) throws java.io.IOException
+	  private void testPositons(TypeTokenFilter stpf)
+	  {
+		TypeAttribute typeAtt = stpf.getAttribute(typeof(TypeAttribute));
+		CharTermAttribute termAttribute = stpf.getAttribute(typeof(CharTermAttribute));
+		PositionIncrementAttribute posIncrAtt = stpf.getAttribute(typeof(PositionIncrementAttribute));
+		stpf.reset();
+		bool enablePositionIncrements = stpf.EnablePositionIncrements;
+		while (stpf.incrementToken())
+		{
+		  log("Token: " + termAttribute.ToString() + ": " + typeAtt.type() + " - " + posIncrAtt.PositionIncrement);
+		  assertEquals("if position increment is enabled the positionIncrementAttribute value should be 3, otherwise 1", posIncrAtt.PositionIncrement, enablePositionIncrements ? 3 : 1);
+		}
+		stpf.end();
+		stpf.close();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTypeFilterWhitelist() throws java.io.IOException
+	  public virtual void testTypeFilterWhitelist()
+	  {
+		StringReader reader = new StringReader("121 is palindrome, while 123 is not");
+		ISet<string> stopTypes = Collections.singleton("<NUM>");
+		TokenStream stream = new TypeTokenFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, reader), stopTypes, true);
+		assertTokenStreamContents(stream, new string[]{"121", "123"});
+	  }
+
+	  // print debug info depending on VERBOSE
+	  private static void log(string s)
+	  {
+		if (VERBOSE)
+		{
+		  Console.WriteLine(s);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
new file mode 100644
index 0000000..f298e33
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestTypeTokenFilterFactory.cs
@@ -0,0 +1,102 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+	/// <summary>
+	/// Testcase for <seealso cref="TypeTokenFilterFactory"/>
+	/// </summary>
+	public class TestTypeTokenFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInform() throws Exception
+	  public virtual void testInform()
+	  {
+		TypeTokenFilterFactory factory = (TypeTokenFilterFactory) tokenFilterFactory("Type", "types", "stoptypes-1.txt", "enablePositionIncrements", "true");
+		ISet<string> types = factory.StopTypes;
+		assertTrue("types is null and it shouldn't be", types != null);
+		assertTrue("types Size: " + types.Count + " is not: " + 2, types.Count == 2);
+		assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.EnablePositionIncrements);
+
+		factory = (TypeTokenFilterFactory) tokenFilterFactory("Type", "types", "stoptypes-1.txt, stoptypes-2.txt", "enablePositionIncrements", "false", "useWhitelist", "true");
+		types = factory.StopTypes;
+		assertTrue("types is null and it shouldn't be", types != null);
+		assertTrue("types Size: " + types.Count + " is not: " + 4, types.Count == 4);
+		assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.EnablePositionIncrements);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCreationWithBlackList() throws Exception
+	  public virtual void testCreationWithBlackList()
+	  {
+		TokenFilterFactory factory = tokenFilterFactory("Type", "types", "stoptypes-1.txt, stoptypes-2.txt", "enablePositionIncrements", "true");
+		NumericTokenStream input = new NumericTokenStream();
+		input.IntValue = 123;
+		factory.create(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCreationWithWhiteList() throws Exception
+	  public virtual void testCreationWithWhiteList()
+	  {
+		TokenFilterFactory factory = tokenFilterFactory("Type", "types", "stoptypes-1.txt, stoptypes-2.txt", "enablePositionIncrements", "true", "useWhitelist", "true");
+		NumericTokenStream input = new NumericTokenStream();
+		input.IntValue = 123;
+		factory.create(input);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMissingTypesParameter() throws Exception
+	  public virtual void testMissingTypesParameter()
+	  {
+		try
+		{
+		  tokenFilterFactory("Type", "enablePositionIncrements", "false");
+		  fail("not supplying 'types' parameter should cause an IllegalArgumentException");
+		}
+		catch (System.ArgumentException)
+		{
+		  // everything ok
+		}
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  tokenFilterFactory("Type", "types", "stoptypes-1.txt", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailAnalyzer.cs
new file mode 100644
index 0000000..debb86f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailAnalyzer.cs
@@ -0,0 +1,361 @@
+using System;
+using System.Text;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using UAX29URLEmailAnalyzer = org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer;
+	using Version = org.apache.lucene.util.Version;
+
+
+	public class TestUAX29URLEmailAnalyzer : BaseTokenStreamTestCase
+	{
+
+	  private Analyzer a = new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeDoc() throws java.io.IOException
+	  public virtual void testHugeDoc()
+	  {
+		StringBuilder sb = new StringBuilder();
+		char[] whitespace = new char[4094];
+		Arrays.fill(whitespace, ' ');
+		sb.Append(whitespace);
+		sb.Append("testing 1234");
+		string input = sb.ToString();
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, input, new string[]{"testing", "1234"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArmenian() throws Exception
+	  public virtual void testArmenian()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։", new string[] {"վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "վիքիպեդիայի", "կայքը"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAmharic() throws Exception
+	  public virtual void testAmharic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", new string[] {"ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArabic() throws Exception
+	  public virtual void testArabic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", new string[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", "بالإنجليزية", "truth", "numbers", "wikipedia", "story", "سيتم", "إطلاقه", "في", "2008"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAramaic() throws Exception
+	  public virtual void testAramaic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", new string[] {"ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBengali() throws Exception
+	  public virtual void testBengali()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", new string[] {"এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়
 ছে"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsi() throws Exception
+	  public virtual void testFarsi()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", new string[] {"ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی", "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGreek() throws Exception
+	  public virtual void testGreek()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", new string[] {"γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testThai() throws Exception
+	  public virtual void testThai()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", new string[] {"การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLao() throws Exception
+	  public virtual void testLao()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", new string[] {"ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTibetan() throws Exception
+	  public virtual void testTibetan()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", new string[] {"སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ"});
+	  }
+
+	  /*
+	   * For chinese, tokenize as char (these can later form bigrams or whatever)
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChinese() throws Exception
+	  public virtual void testChinese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", new string[] {"我", "是", "中", "国", "人", "1234", "tests"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmpty() throws Exception
+	  public virtual void testEmpty()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new string[] {});
+	  }
+
+	  /* test various jira issues this analyzer is related to */
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLUCENE1545() throws Exception
+	  public virtual void testLUCENE1545()
+	  {
+		/*
+		 * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTER E.
+		 * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+		 * Expected result is only one token "moͤchte".
+		 */
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new string[] {"moͤchte"});
+	  }
+
+	  /* Tests from StandardAnalyzer, just to show behavior is similar */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAlphanumericSA() throws Exception
+	  public virtual void testAlphanumericSA()
+	  {
+		// alphanumeric tokens
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new string[]{"b2b"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new string[]{"2b"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelimitersSA() throws Exception
+	  public virtual void testDelimitersSA()
+	  {
+		// other delimiters: "-", "/", ","
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testApostrophesSA() throws Exception
+	  public virtual void testApostrophesSA()
+	  {
+		// internal apostrophes: O'Reilly, you're, O'Reilly's
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new string[]{"o'reilly"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new string[]{"you're"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new string[]{"she's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new string[]{"jim's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new string[]{"don't"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new string[]{"o'reilly's"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumericSA() throws Exception
+	  public virtual void testNumericSA()
+	  {
+		// floating point, serial, model numbers, ip addresses, etc.
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new string[]{"21.35"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"r2d2", "c3po"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTextWithNumbersSA() throws Exception
+	  public virtual void testTextWithNumbersSA()
+	  {
+		// numbers
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[]{"david", "has", "5000", "bones"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVariousTextSA() throws Exception
+	  public virtual void testVariousTextSA()
+	  {
+		// various
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"c", "embedded", "developers", "wanted"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "foo", "bar"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[]{"foo", "bar", "foo", "bar"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"quoted", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKoreanSA() throws Exception
+	  public virtual void testKoreanSA()
+	  {
+		// Korean words
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+	  public virtual void testOffsets()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] {"david", "has", "5000", "bones"}, new int[] {0, 6, 10, 15}, new int[] {5, 9, 14, 20});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTypes() throws Exception
+	  public virtual void testTypes()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "david has 5000 bones", new string[] {"david", "has", "5000", "bones"}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSupplementary() throws Exception
+	  public virtual void testSupplementary()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", new string[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+	  public virtual void testKorean()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", new string[] {"훈민정음"}, new string[] {"<HANGUL>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJapanese() throws Exception
+	  public virtual void testJapanese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", new string[] {"仮", "名", "遣", "い", "カタカナ"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCombiningMarks() throws Exception
+	  public virtual void testCombiningMarks()
+	  {
+		checkOneTerm(a, "ざ", "ざ"); // hiragana
+		checkOneTerm(a, "ザ", "ザ"); // katakana
+		checkOneTerm(a, "壹゙", "壹゙"); // ideographic
+		checkOneTerm(a, "아゙", "아゙"); // hangul
+	  }
+
+	  /// @deprecated remove this and sophisticated backwards layer in 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("remove this and sophisticated backwards layer in 5.0") public void testCombiningMarksBackwards() throws Exception
+	  [Obsolete("remove this and sophisticated backwards layer in 5.0")]
+	  public virtual void testCombiningMarksBackwards()
+	  {
+		Analyzer a = new UAX29URLEmailAnalyzer(Version.LUCENE_33);
+		checkOneTerm(a, "ざ", "さ"); // hiragana Bug
+		checkOneTerm(a, "ザ", "ザ"); // katakana Works
+		checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
+		checkOneTerm(a, "아゙", "아゙"); // hangul Works
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasicEmails() throws Exception
+	  public virtual void testBasicEmails()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "one test@example.com two three [A@example.CO.UK] \"ArakaBanassaMassanaBakarA\" <info@Info.info>", new string[] {"one", "test@example.com", "two", "three", "a@example.co.uk", "arakabanassamassanabakara", "info@info.info"}, new string[] {"<ALPHANUM>", "<EMAIL>", "<ALPHANUM>", "<ALPHANUM>", "<EMAIL>", "<ALPHANUM>", "<EMAIL>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMailtoSchemeEmails() throws Exception
+	  public virtual void testMailtoSchemeEmails()
+	  {
+		// See LUCENE-3880
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "MAILTO:Test@Example.ORG", new string[] {"mailto", "test@example.org"}, new string[] {"<ALPHANUM>", "<EMAIL>"});
+
+		// TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:personA@example.com,personB@example.com?cc=personC@example.com" + "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that", new string[] {"mailto", "persona@example.com", ",personb@example.com", "?cc=personc@example.com", "subject", "subjectivity", "body", "corpusivity", "20or", "20something","20like", "20that"}, new string[] {"<ALPHANUM>", "<EMAIL>", "<EMAIL>", "<EMAIL>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"}); // TODO: Hex decoding + re-tokenization -  TODO: split field keys/values
+					// TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBasicURLs() throws Exception
+	  public virtual void testBasicURLs()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "a <HTTPs://example.net/omg/isnt/that/NICE?no=its&n%30t#mntl-E>b-D ftp://www.example.com/ABC.txt file:///C:/path/to/a/FILE.txt C", new string[] {"https://example.net/omg/isnt/that/nice?no=its&n%30t#mntl-e", "b", "d", "ftp://www.example.com/abc.txt", "file:///c:/path/to/a/file.txt", "c"}, new string[] {"<URL>", "<ALPHANUM>", "<ALPHANUM>", "<URL>", "<URL>", "<ALPHANUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNoSchemeURLs() throws Exception
+	  public virtual void testNoSchemeURLs()
+	  {
+		// ".ph" is a Top Level Domain
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "<index.ph>", new string[]{"index.ph"}, new string[]{"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.ph", new string[]{"index.ph"}, new string[]{"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.php", new string[]{"index.php"}, new string[]{"<ALPHANUM>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index.phα", new string[]{"index.phα"}, new string[]{"<ALPHANUM>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index-h.php", new string[] {"index", "h.php"}, new string[] {"<ALPHANUM>","<ALPHANUM>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index2.php", new string[] {"index2", "php"}, new string[] {"<ALPHANUM>", "<ALPHANUM>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "index2.ph9,", new string[] {"index2", "ph9"}, new string[] {"<ALPHANUM>", "<ALPHANUM>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com,example.ph,index.php,index2.php,example2.ph", new string[] {"example.com", "example.ph", "index.php", "index2", "php", "example2.ph"}, new string[] {"<URL>", "<URL>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080 example.com/path/here example.com?query=something example.com#fragment", new string[] {"example.com:8080", "example.com/path/here", "example.com?query=something", "example.com#fragment"}, new string[] {"<URL>", "<URL>", "<URL>", "<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here?query=something#fragment", new string[] {"example.com:8080/path/here?query=something#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here?query=something", new string[] {"example.com:8080/path/here?query=something"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here#fragment", new string[] {"example.com:8080/path/here#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080/path/here", new string[] {"example.com:8080/path/here"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080?query=something#fragment", new string[] {"example.com:8080?query=something#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080?query=something", new string[] {"example.com:8080?query=something"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com:8080#fragment", new string[] {"example.com:8080#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here?query=something#fragment", new string[] {"example.com/path/here?query=something#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here?query=something", new string[] {"example.com/path/here?query=something"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com/path/here#fragment", new string[] {"example.com/path/here#fragment"}, new string[] {"<URL>"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "example.com?query=something#fragment", new string[] {"example.com?query=something#fragment"}, new string[] {"<URL>"});
+	  }
+
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new UAX29URLEmailAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailTokenizer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailTokenizer.cs
new file mode 100644
index 0000000..d8ccc13
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestUAX29URLEmailTokenizer.cs
@@ -0,0 +1,737 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
+	using UAX29URLEmailTokenizer = org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using Version = org.apache.lucene.util.Version;
+
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	public class TestUAX29URLEmailTokenizer : BaseTokenStreamTestCase
+	{
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHugeDoc() throws java.io.IOException
+	  public virtual void testHugeDoc()
+	  {
+		StringBuilder sb = new StringBuilder();
+		char[] whitespace = new char[4094];
+		Arrays.fill(whitespace, ' ');
+		sb.Append(whitespace);
+		sb.Append("testing 1234");
+		string input = sb.ToString();
+		UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+		BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new string[] {"testing", "1234"});
+	  }
+
+	  private Analyzer a = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+
+			Tokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+
+	  /// <summary>
+	  /// Passes through tokens with type "<URL>" and blocks all other types. </summary>
+	  private class URLFilter : TokenFilter
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+		public URLFilter(TestUAX29URLEmailTokenizer outerInstance, TokenStream @in) : base(@in)
+		{
+			this.outerInstance = outerInstance;
+		}
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  bool isTokenAvailable = false;
+		  while (input.incrementToken())
+		  {
+			if (typeAtt.type() == UAX29URLEmailTokenizer.TOKEN_TYPES[UAX29URLEmailTokenizer.URL])
+			{
+			  isTokenAvailable = true;
+			  break;
+			}
+		  }
+		  return isTokenAvailable;
+		}
+	  }
+
+	  /// <summary>
+	  /// Passes through tokens with type "<EMAIL>" and blocks all other types. </summary>
+	  private class EmailFilter : TokenFilter
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+		public EmailFilter(TestUAX29URLEmailTokenizer outerInstance, TokenStream @in) : base(@in)
+		{
+			this.outerInstance = outerInstance;
+		}
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  bool isTokenAvailable = false;
+		  while (input.incrementToken())
+		  {
+			if (typeAtt.type() == UAX29URLEmailTokenizer.TOKEN_TYPES[UAX29URLEmailTokenizer.EMAIL])
+			{
+			  isTokenAvailable = true;
+			  break;
+			}
+		  }
+		  return isTokenAvailable;
+		}
+	  }
+
+	  private Analyzer urlAnalyzer = new AnalyzerAnonymousInnerClassHelper();
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
+			tokenizer.MaxTokenLength = int.MaxValue; // Tokenize arbitrary length URLs
+			TokenFilter filter = new URLFilter(tokenizer);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+	  }
+
+	  private Analyzer emailAnalyzer = new AnalyzerAnonymousInnerClassHelper2();
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper2()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(TEST_VERSION_CURRENT, reader);
+			TokenFilter filter = new EmailFilter(tokenizer);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArmenian() throws Exception
+	  public virtual void testArmenian()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։", new string[] {"Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAmharic() throws Exception
+	  public virtual void testAmharic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", new string[] {"ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testArabic() throws Exception
+	  public virtual void testArabic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.", new string[] {"الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا", "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAramaic() throws Exception
+	  public virtual void testAramaic()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀", new string[] {"ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ", "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBengali() throws Exception
+	  public virtual void testBengali()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।", new string[] {"এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার", "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়
 ছে"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFarsi() throws Exception
+	  public virtual void testFarsi()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.", new string[] {"ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی", "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGreek() throws Exception
+	  public virtual void testGreek()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.", new string[] {"Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που", "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testThai() throws Exception
+	  public virtual void testThai()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", new string[] {"การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLao() throws Exception
+	  public virtual void testLao()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", new string[] {"ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTibetan() throws Exception
+	  public virtual void testTibetan()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །", new string[] {"སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ"});
+	  }
+
+	  /*
+	   * For chinese, tokenize as char (these can later form bigrams or whatever)
+	   */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChinese() throws Exception
+	  public virtual void testChinese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 1234 Tests ", new string[] {"我", "是", "中", "国", "人", "1234", "Tests"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmpty() throws Exception
+	  public virtual void testEmpty()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new string[] {});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new string[] {});
+	  }
+
+	  /* test various jira issues this analyzer is related to */
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLUCENE1545() throws Exception
+	  public virtual void testLUCENE1545()
+	  {
+		/*
+		 * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
+		 * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+		 * Expected result is only on token "moͤchte".
+		 */
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new string[] {"moͤchte"});
+	  }
+
+	  /* Tests from StandardAnalyzer, just to show behavior is similar */
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAlphanumericSA() throws Exception
+	  public virtual void testAlphanumericSA()
+	  {
+		// alphanumeric tokens
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new string[]{"B2B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new string[]{"2B"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelimitersSA() throws Exception
+	  public virtual void testDelimitersSA()
+	  {
+		// other delimiters: "-", "/", ","
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testApostrophesSA() throws Exception
+	  public virtual void testApostrophesSA()
+	  {
+		// internal apostrophes: O'Reilly, you're, O'Reilly's
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new string[]{"O'Reilly"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new string[]{"you're"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new string[]{"she's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new string[]{"Jim's"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new string[]{"don't"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new string[]{"O'Reilly's"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumericSA() throws Exception
+	  public virtual void testNumericSA()
+	  {
+		// floating point, serial, model numbers, ip addresses, etc.
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new string[]{"21.35"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"R2D2", "C3PO"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTextWithNumbersSA() throws Exception
+	  public virtual void testTextWithNumbersSA()
+	  {
+		// numbers
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[]{"David", "has", "5000", "bones"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVariousTextSA() throws Exception
+	  public virtual void testVariousTextSA()
+	  {
+		// various
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"C", "embedded", "developers", "wanted"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "FOO", "BAR"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[]{"foo", "bar", "FOO", "BAR"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"QUOTED", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKoreanSA() throws Exception
+	  public virtual void testKoreanSA()
+	  {
+		// Korean words
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+	  public virtual void testOffsets()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] {"David", "has", "5000", "bones"}, new int[] {0, 6, 10, 15}, new int[] {5, 9, 14, 20});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTypes() throws Exception
+	  public virtual void testTypes()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new string[] {"David", "has", "5000", "bones"}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWikiURLs() throws Exception
+	  public virtual void testWikiURLs()
+	  {
+		Reader reader = null;
+		string luceneResourcesWikiPage;
+		try
+		{
+		  reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("LuceneResourcesWikiPage.html"), Encoding.UTF8);
+		  StringBuilder builder = new StringBuilder();
+		  char[] buffer = new char[1024];
+		  int numCharsRead;
+		  while (-1 != (numCharsRead = reader.read(buffer)))
+		  {
+			builder.Append(buffer, 0, numCharsRead);
+		  }
+		  luceneResourcesWikiPage = builder.ToString();
+		}
+		finally
+		{
+		  if (null != reader)
+		  {
+			reader.close();
+		  }
+		}
+		assertTrue(null != luceneResourcesWikiPage && luceneResourcesWikiPage.Length > 0);
+		System.IO.StreamReader bufferedReader = null;
+		string[] urls;
+		try
+		{
+		  IList<string> urlList = new List<string>();
+		  bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), Encoding.UTF8);
+		  string line;
+		  while (null != (line = bufferedReader.ReadLine()))
+		  {
+			line = line.Trim();
+			if (line.Length > 0)
+			{
+			  urlList.Add(line);
+			}
+		  }
+		  urls = urlList.ToArray();
+		}
+		finally
+		{
+		  if (null != bufferedReader)
+		  {
+			bufferedReader.Close();
+		  }
+		}
+		assertTrue(null != urls && urls.Length > 0);
+		BaseTokenStreamTestCase.assertAnalyzesTo(urlAnalyzer, luceneResourcesWikiPage, urls);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmails() throws Exception
+	  public virtual void testEmails()
+	  {
+		Reader reader = null;
+		string randomTextWithEmails;
+		try
+		{
+		  reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.email.addresses.txt"), Encoding.UTF8);
+		  StringBuilder builder = new StringBuilder();
+		  char[] buffer = new char[1024];
+		  int numCharsRead;
+		  while (-1 != (numCharsRead = reader.read(buffer)))
+		  {
+			builder.Append(buffer, 0, numCharsRead);
+		  }
+		  randomTextWithEmails = builder.ToString();
+		}
+		finally
+		{
+		  if (null != reader)
+		  {
+			reader.close();
+		  }
+		}
+		assertTrue(null != randomTextWithEmails && randomTextWithEmails.Length > 0);
+		System.IO.StreamReader bufferedReader = null;
+		string[] emails;
+		try
+		{
+		  IList<string> emailList = new List<string>();
+		  bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("email.addresses.from.random.text.with.email.addresses.txt"), Encoding.UTF8);
+		  string line;
+		  while (null != (line = bufferedReader.ReadLine()))
+		  {
+			line = line.Trim();
+			if (line.Length > 0)
+			{
+			  emailList.Add(line);
+			}
+		  }
+		  emails = emailList.ToArray();
+		}
+		finally
+		{
+		  if (null != bufferedReader)
+		  {
+			bufferedReader.Close();
+		  }
+		}
+		assertTrue(null != emails && emails.Length > 0);
+		BaseTokenStreamTestCase.assertAnalyzesTo(emailAnalyzer, randomTextWithEmails, emails);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMailtoSchemeEmails() throws Exception
+	  public virtual void testMailtoSchemeEmails()
+	  {
+		// See LUCENE-3880
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:test@example.org", new string[] {"mailto", "test@example.org"}, new string[] {"<ALPHANUM>", "<EMAIL>"});
+
+		// TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:personA@example.com,personB@example.com?cc=personC@example.com" + "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that", new string[] {"mailto", "personA@example.com", ",personB@example.com", "?cc=personC@example.com", "subject", "Subjectivity", "body", "Corpusivity", "20or", "20something","20like", "20that"}, new string[] {"<ALPHANUM>", "<EMAIL>", "<EMAIL>", "<EMAIL>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"}); // TODO: Hex decoding + re-tokenization -  TODO: split field keys/values
+							// TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testURLs() throws Exception
+	  public virtual void testURLs()
+	  {
+		Reader reader = null;
+		string randomTextWithURLs;
+		try
+		{
+		  reader = new System.IO.StreamReader(this.GetType().getResourceAsStream("random.text.with.urls.txt"), Encoding.UTF8);
+		  StringBuilder builder = new StringBuilder();
+		  char[] buffer = new char[1024];
+		  int numCharsRead;
+		  while (-1 != (numCharsRead = reader.read(buffer)))
+		  {
+			builder.Append(buffer, 0, numCharsRead);
+		  }
+		  randomTextWithURLs = builder.ToString();
+		}
+		finally
+		{
+		  if (null != reader)
+		  {
+			reader.close();
+		  }
+		}
+		assertTrue(null != randomTextWithURLs && randomTextWithURLs.Length > 0);
+		System.IO.StreamReader bufferedReader = null;
+		string[] urls;
+		try
+		{
+		  IList<string> urlList = new List<string>();
+		  bufferedReader = new System.IO.StreamReader(this.GetType().getResourceAsStream("urls.from.random.text.with.urls.txt"), Encoding.UTF8);
+		  string line;
+		  while (null != (line = bufferedReader.ReadLine()))
+		  {
+			line = line.Trim();
+			if (line.Length > 0)
+			{
+			  urlList.Add(line);
+			}
+		  }
+		  urls = urlList.ToArray();
+		}
+		finally
+		{
+		  if (null != bufferedReader)
+		  {
+			bufferedReader.Close();
+		  }
+		}
+		assertTrue(null != urls && urls.Length > 0);
+		BaseTokenStreamTestCase.assertAnalyzesTo(urlAnalyzer, randomTextWithURLs, urls);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnicodeWordBreaks() throws Exception
+	  public virtual void testUnicodeWordBreaks()
+	  {
+		WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+		wordBreakTest.test(a);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSupplementary() throws Exception
+	  public virtual void testSupplementary()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "𩬅艱鍟䇹愯瀛", new string[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+	  public virtual void testKorean()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음", new string[] {"훈민정음"}, new string[] {"<HANGUL>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJapanese() throws Exception
+	  public virtual void testJapanese()
+	  {
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ", new string[] {"仮", "名", "遣", "い", "カタカナ"}, new string[] {"<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCombiningMarks() throws Exception
+	  public virtual void testCombiningMarks()
+	  {
+		checkOneTerm(a, "ざ", "ざ"); // hiragana
+		checkOneTerm(a, "ザ", "ザ"); // katakana
+		checkOneTerm(a, "壹゙", "壹゙"); // ideographic
+		checkOneTerm(a, "아゙", "아゙"); // hangul
+	  }
+
+	  /// <summary>
+	  /// Multiple consecutive chars in \p{Word_Break = MidLetter},
+	  /// \p{Word_Break = MidNumLet}, and/or \p{Word_Break = MidNum}
+	  /// should trigger a token split.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMid() throws Exception
+	  public virtual void testMid()
+	  {
+		// ':' is in \p{WB:MidLetter}, which should trigger a split unless there is a Letter char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B", new string[] {"A:B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A::B", new string[] {"A", "B"});
+
+		// '.' is in \p{WB:MidNumLet}, which should trigger a split unless there is a Letter or Numeric char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2", new string[] {"1.2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B", new string[] {"A.B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1..2", new string[] {"1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A..B", new string[] {"A", "B"});
+
+		// ',' is in \p{WB:MidNum}, which should trigger a split unless there is a Numeric char on both sides
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2", new string[] {"1,2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,,2", new string[] {"1", "2"});
+
+		// Mixed consecutive \p{WB:MidLetter} and \p{WB:MidNumLet} should trigger a split
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.:B", new string[] {"A", "B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:.B", new string[] {"A", "B"});
+
+		// Mixed consecutive \p{WB:MidNum} and \p{WB:MidNumLet} should trigger a split
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,.2", new string[] {"1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.,2", new string[] {"1", "2"});
+
+		// '_' is in \p{WB:ExtendNumLet}
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A:B", new string[] {"A:B_A:B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A::B", new string[] {"A:B_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1.2", new string[] {"1.2_1.2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A.B", new string[] {"A.B_A.B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1..2", new string[] {"1.2_1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A..B", new string[] {"A.B_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,2", new string[] {"1,2_1,2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,,2", new string[] {"1,2_1", "2"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A.:B", new string[] {"C_A", "B"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A:.B", new string[] {"C_A", "B"});
+
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1,.2", new string[] {"3_1", "2"});
+		BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new string[] {"3_1", "2"});
+	  }
+
+	  /// @deprecated remove this and sophisticated backwards layer in 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("remove this and sophisticated backwards layer in 5.0") public void testCombiningMarksBackwards() throws Exception
+	  [Obsolete("remove this and sophisticated backwards layer in 5.0")]
+	  public virtual void testCombiningMarksBackwards()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper3(this);
+		checkOneTerm(a, "ざ", "さ"); // hiragana Bug
+		checkOneTerm(a, "ザ", "ザ"); // katakana Works
+		checkOneTerm(a, "壹゙", "壹"); // ideographic Bug
+		checkOneTerm(a, "아゙", "아゙"); // hangul Works
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestUAX29URLEmailTokenizer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+
+			Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_31, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  // LUCENE-3880
+	  /// @deprecated remove this and sophisticated backwards layer in 5.0 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("remove this and sophisticated backwards layer in 5.0") public void testMailtoBackwards() throws Exception
+	  [Obsolete("remove this and sophisticated backwards layer in 5.0")]
+	  public virtual void testMailtoBackwards()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper4(this);
+		assertAnalyzesTo(a, "mailto:test@example.org", new string[] {"mailto:test", "example.org"});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper4(TestUAX29URLEmailTokenizer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_34, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  /// @deprecated uses older unicode (6.0). simple test to make sure its basically working 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("uses older unicode (6.0). simple test to make sure its basically working") public void testVersion36() throws Exception
+	  [Obsolete("uses older unicode (6.0). simple test to make sure its basically working")]
+	  public virtual void testVersion36()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper5(this);
+		assertAnalyzesTo(a, "this is just a t\u08E6st lucene@apache.org", new string[] {"this", "is", "just", "a", "t", "st", "lucene@apache.org"}); // new combining mark in 6.1
+	  };
+
+	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper5(TestUAX29URLEmailTokenizer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_36, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  /// @deprecated uses older unicode (6.1). simple test to make sure its basically working 
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Deprecated("uses older unicode (6.1). simple test to make sure its basically working") public void testVersion40() throws Exception
+	  [Obsolete("uses older unicode (6.1). simple test to make sure its basically working")]
+	  public virtual void testVersion40()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper6(this);
+		// U+061C is a new combining mark in 6.3, found using "[[\p{WB:Format}\p{WB:Extend}]&[^\p{Age:6.2}]]"
+		// on the online UnicodeSet utility: <http://unicode.org/cldr/utility/list-unicodeset.jsp>
+		assertAnalyzesTo(a, "this is just a t\u061Cst lucene@apache.org", new string[] {"this", "is", "just", "a", "t", "st", "lucene@apache.org"});
+	  };
+
+	  private class AnalyzerAnonymousInnerClassHelper6 : Analyzer
+	  {
+		  private readonly TestUAX29URLEmailTokenizer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper6(TestUAX29URLEmailTokenizer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new UAX29URLEmailTokenizer(Version.LUCENE_40, reader);
+			return new TokenStreamComponents(tokenizer);
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random large strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message