lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [22/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:39:11 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
new file mode 100644
index 0000000..73ccad8
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestBugInSomething.cs
@@ -0,0 +1,383 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+
+	using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
+	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+	using CommonGramsFilter = org.apache.lucene.analysis.commongrams.CommonGramsFilter;
+	using WordDelimiterFilter = org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
+	using EdgeNGramTokenizer = org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
+	using NGramTokenFilter = org.apache.lucene.analysis.ngram.NGramTokenFilter;
+	using ShingleFilter = org.apache.lucene.analysis.shingle.ShingleFilter;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+	using WikipediaTokenizer = org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
+	using SuppressCodecs = org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressCodecs("Direct") public class TestBugInSomething extends org.apache.lucene.analysis.BaseTokenStreamTestCase
+	public class TestBugInSomething : BaseTokenStreamTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet cas = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, 3, false);
+		CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false);
+		cas.add("jjp");
+		cas.add("wlmwoknt");
+		cas.add("tcgyreo");
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		builder.add("mtqlpi", "");
+		builder.add("mwoknt", "jjp");
+		builder.add("tcgyreo", "zpfpajyws");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap map = builder.build();
+		NormalizeCharMap map = builder.build();
+
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper(this, cas, map);
+		checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestBugInSomething outerInstance;
+
+		  private CharArraySet cas;
+		  private NormalizeCharMap map;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance, CharArraySet cas, NormalizeCharMap map)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.cas = cas;
+			  this.map = map;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer t = new MockTokenizer(new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader), MockTokenFilter.ENGLISH_STOPSET, false, -65);
+			TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas);
+			return new TokenStreamComponents(t, f);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			reader = new MockCharFilter(reader, 0);
+			reader = new MappingCharFilter(map, reader);
+			return reader;
+		  }
+	  }
+
+	  internal CharFilter wrappedStream = new CharFilterAnonymousInnerClassHelper(new StringReader("bogus"));
+
+	  private class CharFilterAnonymousInnerClassHelper : CharFilter
+	  {
+		  public CharFilterAnonymousInnerClassHelper(StringReader java) : base(StringReader)
+		  {
+		  }
+
+
+		  public override void mark(int readAheadLimit)
+		  {
+			throw new System.NotSupportedException("mark(int)");
+		  }
+
+		  public override bool markSupported()
+		  {
+			throw new System.NotSupportedException("markSupported()");
+		  }
+
+		  public override int read()
+		  {
+			throw new System.NotSupportedException("read()");
+		  }
+
+		  public override int read(char[] cbuf)
+		  {
+			throw new System.NotSupportedException("read(char[])");
+		  }
+
+		  public override int read(CharBuffer target)
+		  {
+			throw new System.NotSupportedException("read(CharBuffer)");
+		  }
+
+		  public override bool ready()
+		  {
+			throw new System.NotSupportedException("ready()");
+		  }
+
+		  public override void reset()
+		  {
+			throw new System.NotSupportedException("reset()");
+		  }
+
+		  public override long skip(long n)
+		  {
+			throw new System.NotSupportedException("skip(long)");
+		  }
+
+		  public override int correct(int currentOff)
+		  {
+			throw new System.NotSupportedException("correct(int)");
+		  }
+
+		  public override void close()
+		  {
+			throw new System.NotSupportedException("close()");
+		  }
+
+		  public override int read(char[] arg0, int arg1, int arg2)
+		  {
+			throw new System.NotSupportedException("read(char[], int, int)");
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWrapping() throws Exception
+	  public virtual void testWrapping()
+	  {
+		CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream);
+		try
+		{
+		  cs.mark(1);
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("mark(int)", e.Message);
+		}
+
+		try
+		{
+		  cs.markSupported();
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("markSupported()", e.Message);
+		}
+
+		try
+		{
+		  cs.read();
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("read()", e.Message);
+		}
+
+		try
+		{
+		  cs.read(new char[0]);
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("read(char[])", e.Message);
+		}
+
+		try
+		{
+		  cs.read(CharBuffer.wrap(new char[0]));
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("read(CharBuffer)", e.Message);
+		}
+
+		try
+		{
+		  cs.reset();
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("reset()", e.Message);
+		}
+
+		try
+		{
+		  cs.skip(1);
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("skip(long)", e.Message);
+		}
+
+		try
+		{
+		  cs.correctOffset(1);
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("correct(int)", e.Message);
+		}
+
+		try
+		{
+		  cs.close();
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("close()", e.Message);
+		}
+
+		try
+		{
+		  cs.read(new char[0], 0, 0);
+		  fail();
+		}
+		catch (Exception e)
+		{
+		  assertEquals("read(char[], int, int)", e.Message);
+		}
+	  }
+
+	  // todo: test framework?
+
+	  internal sealed class SopTokenFilter : TokenFilter
+	  {
+
+		internal SopTokenFilter(TokenStream input) : base(input)
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (input.incrementToken())
+		  {
+			Console.WriteLine(input.GetType().Name + "->" + this.reflectAsString(false));
+			return true;
+		  }
+		  else
+		  {
+			return false;
+		  }
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void end() throws java.io.IOException
+		public override void end()
+		{
+		  base.end();
+		  Console.WriteLine(input.GetType().Name + ".end()");
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void close() throws java.io.IOException
+		public override void close()
+		{
+		  base.close();
+		  Console.WriteLine(input.GetType().Name + ".close()");
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
+		public override void reset()
+		{
+		  base.reset();
+		  Console.WriteLine(input.GetType().Name + ".reset()");
+		}
+	  }
+
+	  // LUCENE-5269
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnicodeShinglesAndNgrams() throws Exception
+	  public virtual void testUnicodeShinglesAndNgrams()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+		checkRandomData(random(), analyzer, 2000);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestBugInSomething outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestBugInSomething outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, reader, 2, 94);
+			//TokenStream stream = new SopTokenFilter(tokenizer);
+			TokenStream stream = new ShingleFilter(tokenizer, 5);
+			//stream = new SopTokenFilter(stream);
+			stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83);
+			//stream = new SopTokenFilter(stream);
+			return new TokenStreamComponents(tokenizer, stream);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCuriousWikipediaString() throws Exception
+	  public virtual void testCuriousWikipediaString()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.util.CharArraySet protWords = new org.apache.lucene.analysis.util.CharArraySet(TEST_VERSION_CURRENT, new java.util.HashSet<>(java.util.Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
+		CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<>(Arrays.asList("rrdpafa", "pupmmlu", "xlq", "dyy", "zqrxrrck", "o", "hsrlfvcha")), false);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final byte table[] = new byte[] { -57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20 };
+		sbyte[] table = new sbyte[] {-57, 26, 1, 48, 63, -23, 55, -84, 18, 120, -97, 103, 58, 13, 84, 89, 57, -13, -63, 5, 28, 97, -54, -94, 102, -108, -5, 5, 46, 40, 43, 78, 43, -72, 36, 29, 124, -106, -22, -51, 65, 5, 31, -42, 6, -99, 97, 14, 81, -128, 74, 100, 54, -55, -25, 53, -71, -98, 44, 33, 86, 106, -42, 47, 115, -89, -18, -26, 22, -95, -43, 83, -125, 105, -104, -24, 106, -16, 126, 115, -105, 97, 65, -33, 57, 44, -1, 123, -68, 100, 13, -41, -64, -119, 0, 92, 94, -36, 53, -9, -102, -18, 90, 94, -26, 31, 71, -20};
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, protWords, table);
+		checkAnalysisConsistency(random(), a, false, "B\u28c3\ue0f8[ \ud800\udfc2 </p> jb");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestBugInSomething outerInstance;
+
+		  private CharArraySet protWords;
+		  private sbyte[] table;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestBugInSomething outerInstance, CharArraySet protWords, sbyte[] table)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.protWords = protWords;
+			  this.table = table;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new WikipediaTokenizer(reader);
+			TokenStream stream = new SopTokenFilter(tokenizer);
+			stream = new WordDelimiterFilter(TEST_VERSION_CURRENT, stream, table, -50, protWords);
+			stream = new SopTokenFilter(stream);
+			return new TokenStreamComponents(tokenizer, stream);
+		  }
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
new file mode 100644
index 0000000..9b3f425
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestClassicAnalyzer.cs
@@ -0,0 +1,395 @@
+using System;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	using ClassicAnalyzer = org.apache.lucene.analysis.standard.ClassicAnalyzer;
+	using Document = org.apache.lucene.document.Document;
+	using Field = org.apache.lucene.document.Field;
+	using TextField = org.apache.lucene.document.TextField;
+	using DocsAndPositionsEnum = org.apache.lucene.index.DocsAndPositionsEnum;
+	using DocsEnum = org.apache.lucene.index.DocsEnum;
+	using IndexReader = org.apache.lucene.index.IndexReader;
+	using IndexWriter = org.apache.lucene.index.IndexWriter;
+	using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig;
+	using MultiFields = org.apache.lucene.index.MultiFields;
+	using Term = org.apache.lucene.index.Term;
+	using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+	using RAMDirectory = org.apache.lucene.store.RAMDirectory;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using Version = org.apache.lucene.util.Version;
+
+
+
+	/// <summary>
+	/// Copyright 2004 The Apache Software Foundation
+	/// <p/>
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// <p/>
+	/// http://www.apache.org/licenses/LICENSE-2.0
+	/// <p/>
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	public class TestClassicAnalyzer : BaseTokenStreamTestCase
+	{
+
+	  private Analyzer a = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength() throws Exception
+	  public virtual void testMaxTermLength()
+	  {
+		ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+		sa.MaxTokenLength = 5;
+		assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength2() throws Exception
+	  public virtual void testMaxTermLength2()
+	  {
+		ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "toolong", "xy", "z"});
+		sa.MaxTokenLength = 5;
+
+		assertAnalyzesTo(sa, "ab cd toolong xy z", new string[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMaxTermLength3() throws Exception
+	  public virtual void testMaxTermLength3()
+	  {
+		char[] chars = new char[255];
+		for (int i = 0;i < 255;i++)
+		{
+		  chars[i] = 'a';
+		}
+		string longTerm = new string(chars, 0, 255);
+
+		assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new string[]{"ab", "cd", longTerm, "xy", "z"});
+		assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new string[]{"ab", "cd", "xy", "z"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAlphanumeric() throws Exception
+	  public virtual void testAlphanumeric()
+	  {
+		// alphanumeric tokens
+		assertAnalyzesTo(a, "B2B", new string[]{"b2b"});
+		assertAnalyzesTo(a, "2B", new string[]{"2b"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnderscores() throws Exception
+	  public virtual void testUnderscores()
+	  {
+		// underscores are delimiters, but not in email addresses (below)
+		assertAnalyzesTo(a, "word_having_underscore", new string[]{"word", "having", "underscore"});
+		assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new string[]{"word", "underscore", "stopwords"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDelimiters() throws Exception
+	  public virtual void testDelimiters()
+	  {
+		// other delimiters: "-", "/", ","
+		assertAnalyzesTo(a, "some-dashed-phrase", new string[]{"some", "dashed", "phrase"});
+		assertAnalyzesTo(a, "dogs,chase,cats", new string[]{"dogs", "chase", "cats"});
+		assertAnalyzesTo(a, "ac/dc", new string[]{"ac", "dc"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testApostrophes() throws Exception
+	  public virtual void testApostrophes()
+	  {
+		// internal apostrophes: O'Reilly, you're, O'Reilly's
+		// possessives are actually removed by StardardFilter, not the tokenizer
+		assertAnalyzesTo(a, "O'Reilly", new string[]{"o'reilly"});
+		assertAnalyzesTo(a, "you're", new string[]{"you're"});
+		assertAnalyzesTo(a, "she's", new string[]{"she"});
+		assertAnalyzesTo(a, "Jim's", new string[]{"jim"});
+		assertAnalyzesTo(a, "don't", new string[]{"don't"});
+		assertAnalyzesTo(a, "O'Reilly's", new string[]{"o'reilly"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTSADash() throws Exception
+	  public virtual void testTSADash()
+	  {
+		// t and s had been stopwords in Lucene <= 2.0, which made it impossible
+		// to correctly search for these terms:
+		assertAnalyzesTo(a, "s-class", new string[]{"s", "class"});
+		assertAnalyzesTo(a, "t-com", new string[]{"t", "com"});
+		// 'a' is still a stopword:
+		assertAnalyzesTo(a, "a-class", new string[]{"class"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCompanyNames() throws Exception
+	  public virtual void testCompanyNames()
+	  {
+		// company names
+		assertAnalyzesTo(a, "AT&T", new string[]{"at&t"});
+		assertAnalyzesTo(a, "Excite@Home", new string[]{"excite@home"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLucene1140() throws Exception
+	  public virtual void testLucene1140()
+	  {
+		try
+		{
+		  ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+		  assertAnalyzesTo(analyzer, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});
+		}
+		catch (System.NullReferenceException)
+		{
+		  fail("Should not throw an NPE and it did");
+		}
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testDomainNames() throws Exception
+	  public virtual void testDomainNames()
+	  {
+		// Current lucene should not show the bug
+		ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+		// domain names
+		assertAnalyzesTo(a2, "www.nutch.org", new string[]{"www.nutch.org"});
+		//Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
+		// the following should be recognized as HOST:
+		assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});
+
+		// 2.3 should show the bug. But, alas, it's obsolete, we don't support it.
+		// a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+		// assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
+
+		// 2.4 should not show the bug. But, alas, it's also obsolete,
+		// so we check latest released (Robert's gonna break this on 4.0 soon :) )
+		a2 = new ClassicAnalyzer(Version.LUCENE_31);
+		assertAnalyzesTo(a2, "www.nutch.org.", new string[]{"www.nutch.org"}, new string[] {"<HOST>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEMailAddresses() throws Exception
+	  public virtual void testEMailAddresses()
+	  {
+		// email addresses, possibly with underscores, periods, etc
+		assertAnalyzesTo(a, "test@example.com", new string[]{"test@example.com"});
+		assertAnalyzesTo(a, "first.lastname@example.com", new string[]{"first.lastname@example.com"});
+		assertAnalyzesTo(a, "first_lastname@example.com", new string[]{"first_lastname@example.com"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNumeric() throws Exception
+	  public virtual void testNumeric()
+	  {
+		// floating point, serial, model numbers, ip addresses, etc.
+		// every other segment must have at least one digit
+		assertAnalyzesTo(a, "21.35", new string[]{"21.35"});
+		assertAnalyzesTo(a, "R2D2 C3PO", new string[]{"r2d2", "c3po"});
+		assertAnalyzesTo(a, "216.239.63.104", new string[]{"216.239.63.104"});
+		assertAnalyzesTo(a, "1-2-3", new string[]{"1-2-3"});
+		assertAnalyzesTo(a, "a1-b2-c3", new string[]{"a1-b2-c3"});
+		assertAnalyzesTo(a, "a1-b-c3", new string[]{"a1-b-c3"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTextWithNumbers() throws Exception
+	  public virtual void testTextWithNumbers()
+	  {
+		// numbers
+		assertAnalyzesTo(a, "David has 5000 bones", new string[]{"david", "has", "5000", "bones"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testVariousText() throws Exception
+	  public virtual void testVariousText()
+	  {
+		// various
+		assertAnalyzesTo(a, "C embedded developers wanted", new string[]{"c", "embedded", "developers", "wanted"});
+		assertAnalyzesTo(a, "foo bar FOO BAR", new string[]{"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new string[]{"foo", "bar", "foo", "bar"});
+		assertAnalyzesTo(a, "\"QUOTED\" word", new string[]{"quoted", "word"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testAcronyms() throws Exception
+	  public virtual void testAcronyms()
+	  {
+		// acronyms have their dots stripped
+		assertAnalyzesTo(a, "U.S.A.", new string[]{"usa"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCPlusPlusHash() throws Exception
+	  public virtual void testCPlusPlusHash()
+	  {
+		// It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
+		assertAnalyzesTo(a, "C++", new string[]{"c"});
+		assertAnalyzesTo(a, "C#", new string[]{"c"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testKorean() throws Exception
+	  public virtual void testKorean()
+	  {
+		// Korean words
+		assertAnalyzesTo(a, "안녕하세요 한글입니다", new string[]{"안녕하세요", "한글입니다"});
+	  }
+
+	  // Compliance with the "old" JavaCC-based analyzer, see:
+	  // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceFileName() throws Exception
+	  public virtual void testComplianceFileName()
+	  {
+		assertAnalyzesTo(a, "2004.jpg", new string[]{"2004.jpg"}, new string[]{"<HOST>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericIncorrect() throws Exception
+	  public virtual void testComplianceNumericIncorrect()
+	  {
+		assertAnalyzesTo(a, "62.46", new string[]{"62.46"}, new string[]{"<HOST>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericLong() throws Exception
+	  public virtual void testComplianceNumericLong()
+	  {
+		assertAnalyzesTo(a, "978-0-94045043-1", new string[]{"978-0-94045043-1"}, new string[]{"<NUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericFile() throws Exception
+	  public virtual void testComplianceNumericFile()
+	  {
+		assertAnalyzesTo(a, "78academyawards/rules/rule02.html", new string[]{"78academyawards/rules/rule02.html"}, new string[]{"<NUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericWithUnderscores() throws Exception
+	  public virtual void testComplianceNumericWithUnderscores()
+	  {
+		assertAnalyzesTo(a, "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", new string[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"}, new string[]{"<NUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceNumericWithDash() throws Exception
+	  public virtual void testComplianceNumericWithDash()
+	  {
+		assertAnalyzesTo(a, "mid-20th", new string[]{"mid-20th"}, new string[]{"<NUM>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComplianceManyTokens() throws Exception
+	  public virtual void testComplianceManyTokens()
+	  {
+		assertAnalyzesTo(a, "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm " + "safari-0-sheikh-zayed-grand-mosque.jpg", new string[]{"money.cnn.com", "magazines", "fortune", "fortune", "archive/2007/03/19/8402357", "index.htm", "safari-0-sheikh", "zayed", "grand", "mosque.jpg"}, new string[]{"<HOST>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>", "<ALPHANUM>", "<HOST>"});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJava14BWCompatibility() throws Exception
+	  public virtual void testJava14BWCompatibility()
+	  {
+		ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
+		assertAnalyzesTo(sa, "test\u02C6test", new string[] {"test", "test"});
+	  }
+
+	  /// <summary>
+	  /// Make sure we skip wicked long terms.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testWickedLongTerm() throws java.io.IOException
+	  public virtual void testWickedLongTerm()
+	  {
+		RAMDirectory dir = new RAMDirectory();
+		IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));
+
+		char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
+		Arrays.fill(chars, 'x');
+		Document doc = new Document();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String bigTerm = new String(chars);
+		string bigTerm = new string(chars);
+
+		// This produces a too-long term:
+		string contents = "abc xyz x" + bigTerm + " another term";
+		doc.add(new TextField("content", contents, Field.Store.NO));
+		writer.addDocument(doc);
+
+		// Make sure we can add another normal document
+		doc = new Document();
+		doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
+		writer.addDocument(doc);
+		writer.close();
+
+		IndexReader reader = IndexReader.open(dir);
+
+		// Make sure all terms < max size were indexed
+		assertEquals(2, reader.docFreq(new Term("content", "abc")));
+		assertEquals(1, reader.docFreq(new Term("content", "bbb")));
+		assertEquals(1, reader.docFreq(new Term("content", "term")));
+		assertEquals(1, reader.docFreq(new Term("content", "another")));
+
+		// Make sure position is still incremented when
+		// massive term is skipped:
+		DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", new BytesRef("another"));
+		assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+		assertEquals(1, tps.freq());
+		assertEquals(3, tps.nextPosition());
+
+		// Make sure the doc that has the massive term is in
+		// the index:
+		assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
+
+		reader.close();
+
+		// Make sure we can add a document with exactly the
+		// maximum length term, and search on that term:
+		doc = new Document();
+		doc.add(new TextField("content", bigTerm, Field.Store.NO));
+		ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+		sa.MaxTokenLength = 100000;
+		writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
+		writer.addDocument(doc);
+		writer.close();
+		reader = IndexReader.open(dir);
+		assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
+		reader.close();
+
+		dir.close();
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random large strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
new file mode 100644
index 0000000..6155918
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestDuelingAnalyzers.cs
@@ -0,0 +1,302 @@
+using System;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+	using LuceneTestCase = org.apache.lucene.util.LuceneTestCase;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+	using Automaton = org.apache.lucene.util.automaton.Automaton;
+	using BasicOperations = org.apache.lucene.util.automaton.BasicOperations;
+	using CharacterRunAutomaton = org.apache.lucene.util.automaton.CharacterRunAutomaton;
+	using State = org.apache.lucene.util.automaton.State;
+	using Transition = org.apache.lucene.util.automaton.Transition;
+
+	/// <summary>
+	/// Compares MockTokenizer (which is simple with no optimizations) with equivalent 
+	/// core tokenizers (that have optimizations like buffering).
+	/// 
+	/// Any tests here need to probably consider unicode version of the JRE (it could
+	/// cause false fails).
+	/// </summary>
+	public class TestDuelingAnalyzers : LuceneTestCase
+	{
+	  private CharacterRunAutomaton jvmLetter;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+	  public override void setUp()
+	  {
+		base.setUp();
+		// build an automaton matching this jvm's letter definition
+		State initial = new State();
+		State accept = new State();
+		accept.Accept = true;
+		for (int i = 0; i <= 0x10FFFF; i++)
+		{
+		  if (char.IsLetter(i))
+		  {
+			initial.addTransition(new Transition(i, i, accept));
+		  }
+		}
+		Automaton single = new Automaton(initial);
+		single.reduce();
+		Automaton repeat = BasicOperations.repeat(single);
+		jvmLetter = new CharacterRunAutomaton(repeat);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterAscii() throws Exception
+	  public virtual void testLetterAscii()
+	  {
+		Random random = random();
+		Analyzer left = new MockAnalyzer(random, jvmLetter, false);
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper(this);
+		for (int i = 0; i < 1000; i++)
+		{
+		  string s = TestUtil.randomSimpleString(random);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+	  // not so useful since its all one token?!
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterAsciiHuge() throws Exception
+	  public virtual void testLetterAsciiHuge()
+	  {
+		Random random = random();
+		int maxLength = 8192; // CharTokenizer.IO_BUFFER_SIZE*2
+		MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+		left.MaxTokenLength = 255; // match CharTokenizer's max token length
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper2(this);
+		int numIterations = atLeast(50);
+		for (int i = 0; i < numIterations; i++)
+		{
+		  string s = TestUtil.randomSimpleString(random, maxLength);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterHtmlish() throws Exception
+	  public virtual void testLetterHtmlish()
+	  {
+		Random random = random();
+		Analyzer left = new MockAnalyzer(random, jvmLetter, false);
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper3(this);
+		for (int i = 0; i < 1000; i++)
+		{
+		  string s = TestUtil.randomHtmlishString(random, 20);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterHtmlishHuge() throws Exception
+	  public virtual void testLetterHtmlishHuge()
+	  {
+		Random random = random();
+		int maxLength = 1024; // this is number of elements, not chars!
+		MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+		left.MaxTokenLength = 255; // match CharTokenizer's max token length
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper4(this);
+		int numIterations = atLeast(50);
+		for (int i = 0; i < numIterations; i++)
+		{
+		  string s = TestUtil.randomHtmlishString(random, maxLength);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper4 : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper4(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterUnicode() throws Exception
+	  public virtual void testLetterUnicode()
+	  {
+		Random random = random();
+		Analyzer left = new MockAnalyzer(random(), jvmLetter, false);
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper5(this);
+		for (int i = 0; i < 1000; i++)
+		{
+		  string s = TestUtil.randomUnicodeString(random);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper5 : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper5(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLetterUnicodeHuge() throws Exception
+	  public virtual void testLetterUnicodeHuge()
+	  {
+		Random random = random();
+		int maxLength = 4300; // CharTokenizer.IO_BUFFER_SIZE + fudge
+		MockAnalyzer left = new MockAnalyzer(random, jvmLetter, false);
+		left.MaxTokenLength = 255; // match CharTokenizer's max token length
+		Analyzer right = new AnalyzerAnonymousInnerClassHelper6(this);
+		int numIterations = atLeast(50);
+		for (int i = 0; i < numIterations; i++)
+		{
+		  string s = TestUtil.randomUnicodeString(random, maxLength);
+		  assertEquals(s, left.tokenStream("foo", newStringReader(s)), right.tokenStream("foo", newStringReader(s)));
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper6 : Analyzer
+	  {
+		  private readonly TestDuelingAnalyzers outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper6(TestDuelingAnalyzers outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new LetterTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+	  }
+
+	  // we only check a few core attributes here.
+	  // TODO: test other things
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void assertEquals(String s, org.apache.lucene.analysis.TokenStream left, org.apache.lucene.analysis.TokenStream right) throws Exception
+	  public virtual void assertEquals(string s, TokenStream left, TokenStream right)
+	  {
+		left.reset();
+		right.reset();
+		CharTermAttribute leftTerm = left.addAttribute(typeof(CharTermAttribute));
+		CharTermAttribute rightTerm = right.addAttribute(typeof(CharTermAttribute));
+		OffsetAttribute leftOffset = left.addAttribute(typeof(OffsetAttribute));
+		OffsetAttribute rightOffset = right.addAttribute(typeof(OffsetAttribute));
+		PositionIncrementAttribute leftPos = left.addAttribute(typeof(PositionIncrementAttribute));
+		PositionIncrementAttribute rightPos = right.addAttribute(typeof(PositionIncrementAttribute));
+
+		while (left.incrementToken())
+		{
+		  assertTrue("wrong number of tokens for input: " + s, right.incrementToken());
+		  assertEquals("wrong term text for input: " + s, leftTerm.ToString(), rightTerm.ToString());
+		  assertEquals("wrong position for input: " + s, leftPos.PositionIncrement, rightPos.PositionIncrement);
+		  assertEquals("wrong start offset for input: " + s, leftOffset.startOffset(), rightOffset.startOffset());
+		  assertEquals("wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
+		};
+		assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
+		left.end();
+		right.end();
+		assertEquals("wrong final offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
+		left.close();
+		right.close();
+	  }
+
+	  // TODO: maybe push this out to TestUtil or LuceneTestCase and always use it instead?
+	  private static Reader newStringReader(string s)
+	  {
+		Random random = random();
+		Reader r = new StringReader(s);
+		if (random.nextBoolean())
+		{
+		  r = new MockReaderWrapper(random, r);
+		}
+		return r;
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
new file mode 100644
index 0000000..8af7962
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestFactories.cs
@@ -0,0 +1,263 @@
+using System;
+using System.Diagnostics;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+	using CharFilterFactory = org.apache.lucene.analysis.util.CharFilterFactory;
+	using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+	using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
+	using StringMockResourceLoader = org.apache.lucene.analysis.util.StringMockResourceLoader;
+	using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+	using TokenizerFactory = org.apache.lucene.analysis.util.TokenizerFactory;
+	using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
+
+	/// <summary>
+	/// Sanity check some things about all factories,
+	/// we do our best to see if we can sanely initialize it with
+	/// no parameters and smoke test it, etc.
+	/// </summary>
+	// TODO: move this, TestRandomChains, and TestAllAnalyzersHaveFactories
+	// to an integration test module that sucks in all analysis modules.
+	// currently the only way to do this is via eclipse etc (LUCENE-3974)
+	public class TestFactories : BaseTokenStreamTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws java.io.IOException
+	  public virtual void test()
+	  {
+		foreach (string tokenizer in TokenizerFactory.availableTokenizers())
+		{
+		  doTestTokenizer(tokenizer);
+		}
+
+		foreach (string tokenFilter in TokenFilterFactory.availableTokenFilters())
+		{
+		  doTestTokenFilter(tokenFilter);
+		}
+
+		foreach (string charFilter in CharFilterFactory.availableCharFilters())
+		{
+		  doTestCharFilter(charFilter);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestTokenizer(String tokenizer) throws java.io.IOException
+	  private void doTestTokenizer(string tokenizer)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.util.TokenizerFactory> factoryClazz = org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(tokenizer);
+		Type<?> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
+		TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
+		if (factory != null)
+		{
+		  // we managed to fully create an instance. check a few more things:
+
+		  // if it implements MultiTermAware, sanity check its impl
+		  if (factory is MultiTermAwareComponent)
+		  {
+			AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+			assertNotNull(mtc);
+			// its not ok to return e.g. a charfilter here: but a tokenizer could wrap a filter around it
+			assertFalse(mtc is CharFilterFactory);
+		  }
+
+		  // beast it just a little, it shouldnt throw exceptions:
+		  // (it should have thrown them in initialize)
+		  checkRandomData(random(), new FactoryAnalyzer(factory, null, null), 100, 20, false, false);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestTokenFilter(String tokenfilter) throws java.io.IOException
+	  private void doTestTokenFilter(string tokenfilter)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.util.TokenFilterFactory> factoryClazz = org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(tokenfilter);
+		Type<?> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
+		TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
+		if (factory != null)
+		{
+		  // we managed to fully create an instance. check a few more things:
+
+		  // if it implements MultiTermAware, sanity check its impl
+		  if (factory is MultiTermAwareComponent)
+		  {
+			AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+			assertNotNull(mtc);
+			// its not ok to return a charfilter or tokenizer here, this makes no sense
+			assertTrue(mtc is TokenFilterFactory);
+		  }
+
+		  // beast it just a little, it shouldnt throw exceptions:
+		  // (it should have thrown them in initialize)
+		  checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void doTestCharFilter(String charfilter) throws java.io.IOException
+	  private void doTestCharFilter(string charfilter)
+	  {
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Class<? extends org.apache.lucene.analysis.util.CharFilterFactory> factoryClazz = org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(charfilter);
+		Type<?> factoryClazz = CharFilterFactory.lookupClass(charfilter);
+		CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
+		if (factory != null)
+		{
+		  // we managed to fully create an instance. check a few more things:
+
+		  // if it implements MultiTermAware, sanity check its impl
+		  if (factory is MultiTermAwareComponent)
+		  {
+			AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).MultiTermComponent;
+			assertNotNull(mtc);
+			// its not ok to return a tokenizer or tokenfilter here, this makes no sense
+			assertTrue(mtc is CharFilterFactory);
+		  }
+
+		  // beast it just a little, it shouldnt throw exceptions:
+		  // (it should have thrown them in initialize)
+		  checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, null, factory), 100, 20, false, false);
+		}
+	  }
+
+	  /// <summary>
+	  /// tries to initialize a factory with no arguments </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.analysis.util.AbstractAnalysisFactory initialize(Class<? extends org.apache.lucene.analysis.util.AbstractAnalysisFactory> factoryClazz) throws java.io.IOException
+	  private AbstractAnalysisFactory initialize<T1>(Type<T1> factoryClazz) where T1 : org.apache.lucene.analysis.util.AbstractAnalysisFactory
+	  {
+		IDictionary<string, string> args = new Dictionary<string, string>();
+		args["luceneMatchVersion"] = TEST_VERSION_CURRENT.ToString();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: Constructor<? extends org.apache.lucene.analysis.util.AbstractAnalysisFactory> ctor;
+		Constructor<?> ctor;
+		try
+		{
+		  ctor = factoryClazz.GetConstructor(typeof(IDictionary));
+		}
+		catch (Exception)
+		{
+		  throw new Exception("factory '" + factoryClazz + "' does not have a proper ctor!");
+		}
+
+		AbstractAnalysisFactory factory = null;
+		try
+		{
+		  factory = ctor.newInstance(args);
+		}
+		catch (InstantiationException e)
+		{
+		  throw new Exception(e);
+		}
+		catch (IllegalAccessException e)
+		{
+		  throw new Exception(e);
+		}
+		catch (InvocationTargetException e)
+		{
+		  if (e.InnerException is System.ArgumentException)
+		  {
+			// its ok if we dont provide the right parameters to throw this
+			return null;
+		  }
+		}
+
+		if (factory is ResourceLoaderAware)
+		{
+		  try
+		  {
+			((ResourceLoaderAware) factory).inform(new StringMockResourceLoader(""));
+		  }
+		  catch (IOException)
+		  {
+			// its ok if the right files arent available or whatever to throw this
+		  }
+		  catch (System.ArgumentException)
+		  {
+			// is this ok? I guess so
+		  }
+		}
+		return factory;
+	  }
+
+	  // some silly classes just so we can use checkRandomData
+	  private TokenizerFactory assertingTokenizer = new TokenizerFactoryAnonymousInnerClassHelper(new Dictionary<string, string>());
+
+	  private class TokenizerFactoryAnonymousInnerClassHelper : TokenizerFactory
+	  {
+		  public TokenizerFactoryAnonymousInnerClassHelper(Dictionary<string, string> java) : base(Hashtable<string, string>)
+		  {
+		  }
+
+		  public override MockTokenizer create(AttributeFactory factory, Reader input)
+		  {
+			return new MockTokenizer(factory, input);
+		  }
+	  }
+
+	  private class FactoryAnalyzer : Analyzer
+	  {
+		internal readonly TokenizerFactory tokenizer;
+		internal readonly CharFilterFactory charFilter;
+		internal readonly TokenFilterFactory tokenfilter;
+
+		internal FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter)
+		{
+		  Debug.Assert(tokenizer != null);
+		  this.tokenizer = tokenizer;
+		  this.charFilter = charFilter;
+		  this.tokenfilter = tokenfilter;
+		}
+
+		protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		{
+		  Tokenizer tf = tokenizer.create(reader);
+		  if (tokenfilter != null)
+		  {
+			return new TokenStreamComponents(tf, tokenfilter.create(tf));
+		  }
+		  else
+		  {
+			return new TokenStreamComponents(tf);
+		  }
+		}
+
+		protected internal override Reader initReader(string fieldName, Reader reader)
+		{
+		  if (charFilter != null)
+		  {
+			return charFilter.create(reader);
+		  }
+		  else
+		  {
+			return reader;
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
new file mode 100644
index 0000000..ecde6df
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Core/TestKeywordAnalyzer.cs
@@ -0,0 +1,143 @@
+namespace org.apache.lucene.analysis.core
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+	using Document = org.apache.lucene.document.Document;
+	using Field = org.apache.lucene.document.Field;
+	using StringField = org.apache.lucene.document.StringField;
+	using TextField = org.apache.lucene.document.TextField;
+	using DirectoryReader = org.apache.lucene.index.DirectoryReader;
+	using DocsEnum = org.apache.lucene.index.DocsEnum;
+	using IndexReader = org.apache.lucene.index.IndexReader;
+	using IndexWriter = org.apache.lucene.index.IndexWriter;
+	using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig;
+	using MultiFields = org.apache.lucene.index.MultiFields;
+	using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+	using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+	using Directory = org.apache.lucene.store.Directory;
+	using RAMDirectory = org.apache.lucene.store.RAMDirectory;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using TestUtil = org.apache.lucene.util.TestUtil;
+
+	public class TestKeywordAnalyzer : BaseTokenStreamTestCase
+	{
+
+	  private Directory directory;
+	  private IndexSearcher searcher;
+	  private IndexReader reader;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+	  public override void setUp()
+	  {
+		base.setUp();
+		directory = newDirectory();
+		IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT)));
+
+		Document doc = new Document();
+		doc.add(new StringField("partnum", "Q36", Field.Store.YES));
+		doc.add(new TextField("description", "Illidium Space Modulator", Field.Store.YES));
+		writer.addDocument(doc);
+
+		writer.close();
+
+		reader = DirectoryReader.open(directory);
+		searcher = newSearcher(reader);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void tearDown() throws Exception
+	  public override void tearDown()
+	  {
+		reader.close();
+		directory.close();
+		base.tearDown();
+	  }
+
+	  /*
+	  public void testPerFieldAnalyzer() throws Exception {
+	    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT));
+	    analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
+	
+	    QueryParser queryParser = new QueryParser(TEST_VERSION_CURRENT, "description", analyzer);
+	    Query query = queryParser.parse("partnum:Q36 AND SPACE");
+	
+	    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+	    assertEquals("Q36 kept as-is",
+	              "+partnum:Q36 +space", query.toString("description"));
+	    assertEquals("doc found!", 1, hits.length);
+	  }
+	  */
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMutipleDocument() throws Exception
+	  public virtual void testMutipleDocument()
+	  {
+		RAMDirectory dir = new RAMDirectory();
+		IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer()));
+		Document doc = new Document();
+		doc.add(new TextField("partnum", "Q36", Field.Store.YES));
+		writer.addDocument(doc);
+		doc = new Document();
+		doc.add(new TextField("partnum", "Q37", Field.Store.YES));
+		writer.addDocument(doc);
+		writer.close();
+
+		IndexReader reader = DirectoryReader.open(dir);
+		DocsEnum td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q36"), MultiFields.getLiveDocs(reader), null, 0);
+		assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+		td = TestUtil.docs(random(), reader, "partnum", new BytesRef("Q37"), MultiFields.getLiveDocs(reader), null, 0);
+		assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+	  }
+
+	  // LUCENE-1441
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+	  public virtual void testOffsets()
+	  {
+		TokenStream stream = (new KeywordAnalyzer()).tokenStream("field", new StringReader("abcd"));
+		try
+		{
+		  OffsetAttribute offsetAtt = stream.addAttribute(typeof(OffsetAttribute));
+		  stream.reset();
+		  assertTrue(stream.incrementToken());
+		  assertEquals(0, offsetAtt.startOffset());
+		  assertEquals(4, offsetAtt.endOffset());
+		  assertFalse(stream.incrementToken());
+		  stream.end();
+		}
+		finally
+		{
+		  IOUtils.closeWhileHandlingException(stream);
+		}
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new KeywordAnalyzer(), 1000 * RANDOM_MULTIPLIER);
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message