lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject lucenenet git commit: Finish porting CommonGrams support
Date Sun, 04 Jan 2015 12:53:58 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/master fbf7f122f -> 67f47ad1b


Finish porting CommonGrams support


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/67f47ad1
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/67f47ad1
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/67f47ad1

Branch: refs/heads/master
Commit: 67f47ad1b63cc2b9b9758d7b4e68e40c129f1226
Parents: fbf7f12
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Sun Jan 4 14:53:42 2015 +0200
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Sun Jan 4 14:53:42 2015 +0200

----------------------------------------------------------------------
 .../Analysis/CommonGrams/CommonGramsFilter.cs   | 325 ++++++++++---------
 .../CommonGrams/CommonGramsFilterFactory.cs     | 172 +++++-----
 .../CommonGrams/CommonGramsQueryFilter.cs       | 220 ++++++-------
 .../CommonGramsQueryFilterFactory.cs            |  91 +++---
 4 files changed, 404 insertions(+), 404 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/67f47ad1/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
index 0d083c4..a91da7a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -18,7 +18,6 @@ using System.Text;
 using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.CommonGrams
 {
@@ -26,163 +25,169 @@ namespace Lucene.Net.Analysis.CommonGrams
 	 * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes
to stop list and associated constructors 
 	 */
 
-	/// <summary>
-	/// Construct bigrams for frequently occurring terms while indexing. Single terms
-	/// are still indexed too, with bigrams overlaid. This is achieved through the
-	/// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>.
Bigrams have a type
-	/// of <seealso cref="#GRAM_TYPE"/> Example:
-	/// <ul>
-	/// <li>input:"the quick brown fox"</li>
-	/// <li>output:|"the","the-quick"|"brown"|"fox"|</li>
-	/// <li>"the-quick" has a position increment of 0 so it is in the same position
-	/// as "the" "the-quick" has a term.type() of "gram"</li>
-	/// 
-	/// </ul>
-	/// </summary>
-
-	/*
-	 * Constructors and makeCommonSet based on similar code in StopFilter
-	 */
-	public sealed class CommonGramsFilter : TokenFilter
-	{
-
-	  public const string GRAM_TYPE = "gram";
-	  private const char SEPARATOR = '_';
-
-	  private readonly CharArraySet commonWords;
-
-	  private readonly StringBuilder buffer = new StringBuilder();
-
-	  private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
-	  private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
-	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
-	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
-	  private readonly PositionLengthAttribute posLenAttribute = addAttribute(typeof(PositionLengthAttribute));
-
-	  private int lastStartOffset;
-	  private bool lastWasCommon;
-	  private State savedState;
-
-	  /// <summary>
-	  /// Construct a token stream filtering the given input using a Set of common
-	  /// words to create bigrams. Outputs both unigrams with position increment and
-	  /// bigrams with position increment 0 type=gram where one or both of the words
-	  /// in a potential bigram are in the set of common words .
-	  /// </summary>
-	  /// <param name="input"> TokenStream input in filter chain </param>
-	  /// <param name="commonWords"> The set of common words. </param>
-	  public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords)
: base(input)
-	  {
-		this.commonWords = commonWords;
-	  }
-
-	  /// <summary>
-	  /// Inserts bigrams for common words into a token stream. For each input token,
-	  /// output the token. If the token and/or the following token are in the list
-	  /// of common words also output a bigram with position increment 0 and
-	  /// type="gram"
-	  /// 
-	  /// TODO:Consider adding an option to not emit unigram stopwords
-	  /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
-	  /// changed to work with this.
-	  /// 
-	  /// TODO: Consider optimizing for the case of three
-	  /// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
-	  /// "of-the", "the-year" but with proper management of positions we could
-	  /// eliminate the middle bigram "of-the"and save a disk seek and a whole set of
-	  /// position lookups.
-	  /// </summary>
-	  public override bool IncrementToken()
-	  {
-		// get the next piece of input
-		if (savedState != null)
-		{
-		  RestoreState(savedState);
-		  savedState = null;
-		  SaveTermBuffer();
-		  return true;
-		}
-		else if (!input.incrementToken())
-		{
-			return false;
-		}
-
-		/* We build n-grams before and after stopwords. 
-		 * When valid, the buffer always contains at least the separator.
-		 * If its empty, there is nothing before this stopword.
-		 */
-		if (lastWasCommon || (Common && buffer.Length > 0))
-		{
-		  savedState = CaptureState();
-		  GramToken();
-		  return true;
-		}
-
-		SaveTermBuffer();
-		return true;
-	  }
-
-	  /// <summary>
-	  /// {@inheritDoc}
-	  /// </summary>
-	  public override void Reset()
-	  {
-		base.Reset();
-		lastWasCommon = false;
-		savedState = null;
-		buffer.Length = 0;
-	  }
-
-	  // ================================================= Helper Methods ================================================
-
-	  /// <summary>
-	  /// Determines if the current token is a common term
-	  /// </summary>
-	  /// <returns> {@code true} if the current token is a common term, {@code false}
otherwise </returns>
-	  private bool Common
-	  {
-		  get
-		  {
-			return commonWords != null && commonWords.contains(termAttribute.Buffer(), 0,
termAttribute.Length);
-		  }
-	  }
-
-	  /// <summary>
-	  /// Saves this information to form the left part of a gram
-	  /// </summary>
-	  private void SaveTermBuffer()
-	  {
-		buffer.Length = 0;
-		buffer.Append(termAttribute.Buffer(), 0, termAttribute.Length);
-		buffer.Append(SEPARATOR);
-		lastStartOffset = offsetAttribute.StartOffset();
-		lastWasCommon = Common;
-	  }
-
-	  /// <summary>
-	  /// Constructs a compound token.
-	  /// </summary>
-	  private void GramToken()
-	  {
-		buffer.Append(termAttribute.Buffer(), 0, termAttribute.Length);
-		int endOffset = offsetAttribute.EndOffset();
-
-		ClearAttributes();
-
-		int length = buffer.Length;
-		char[] termText = termAttribute.Buffer();
-		if (length > termText.Length)
-		{
-		  termText = termAttribute.ResizeBuffer(length);
-		}
-
-		buffer.GetChars(0, length, termText, 0);
-		termAttribute.Length = length;
-		posIncAttribute.PositionIncrement = 0;
-		posLenAttribute.PositionLength = 2; // bigram
-		offsetAttribute.SetOffset(lastStartOffset, endOffset);
-		typeAttribute.Type = GRAM_TYPE;
-		buffer.Length = 0;
-	  }
-	}
+    /// <summary>
+    /// Construct bigrams for frequently occurring terms while indexing. Single terms
+    /// are still indexed too, with bigrams overlaid. This is achieved through the
+    /// use of <seealso cref="PositionIncrementAttribute#setPositionIncrement(int)"/>.
Bigrams have a type
+    /// of <seealso cref="#GRAM_TYPE"/> Example:
+    /// <ul>
+    /// <li>input:"the quick brown fox"</li>
+    /// <li>output:|"the","the-quick"|"brown"|"fox"|</li>
+    /// <li>"the-quick" has a position increment of 0 so it is in the same position
+    /// as "the" "the-quick" has a term.type() of "gram"</li>
+    /// 
+    /// </ul>
+    /// </summary>
+
+    /*
+     * Constructors and makeCommonSet based on similar code in StopFilter
+     */
+    public sealed class CommonGramsFilter : TokenFilter
+    {
+
+        public const string GRAM_TYPE = "gram";
+        private const char SEPARATOR = '_';
+
+        private readonly CharArraySet commonWords;
+
+        private readonly StringBuilder buffer = new StringBuilder();
+
+        private readonly ICharTermAttribute termAttribute;
+        private readonly IOffsetAttribute offsetAttribute;
+        private readonly ITypeAttribute typeAttribute;
+        private readonly IPositionIncrementAttribute posIncAttribute;
+        private readonly IPositionLengthAttribute posLenAttribute;
+
+        private int lastStartOffset;
+        private bool lastWasCommon;
+        private State savedState;
+
+        /// <summary>
+        /// Construct a token stream filtering the given input using a Set of common
+        /// words to create bigrams. Outputs both unigrams with position increment and
+        /// bigrams with position increment 0 type=gram where one or both of the words
+        /// in a potential bigram are in the set of common words .
+        /// </summary>
+        /// <param name="input"> TokenStream input in filter chain </param>
+        /// <param name="commonWords"> The set of common words. </param>
+        public CommonGramsFilter(Version matchVersion, TokenStream input, CharArraySet commonWords)
+            : base(input)
+        {
+            termAttribute = AddAttribute<ICharTermAttribute>();
+            offsetAttribute = AddAttribute<IOffsetAttribute>();
+            typeAttribute = AddAttribute<ITypeAttribute>();
+            posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
+            posLenAttribute = AddAttribute<IPositionLengthAttribute>();
+            this.commonWords = commonWords;
+        }
+
+        /// <summary>
+        /// Inserts bigrams for common words into a token stream. For each input token,
+        /// output the token. If the token and/or the following token are in the list
+        /// of common words also output a bigram with position increment 0 and
+        /// type="gram"
+        /// 
+        /// TODO:Consider adding an option to not emit unigram stopwords
+        /// as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
+        /// changed to work with this.
+        /// 
+        /// TODO: Consider optimizing for the case of three
+        /// commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
+        /// "of-the", "the-year" but with proper management of positions we could
+        /// eliminate the middle bigram "of-the"and save a disk seek and a whole set of
+        /// position lookups.
+        /// </summary>
+        public override bool IncrementToken()
+        {
+            // get the next piece of input
+            if (savedState != null)
+            {
+                RestoreState(savedState);
+                savedState = null;
+                SaveTermBuffer();
+                return true;
+            }
+            else if (!input.IncrementToken())
+            {
+                return false;
+            }
+
+            /* We build n-grams before and after stopwords. 
+             * When valid, the buffer always contains at least the separator.
+             * If its empty, there is nothing before this stopword.
+             */
+            if (lastWasCommon || (Common && buffer.Length > 0))
+            {
+                savedState = CaptureState();
+                GramToken();
+                return true;
+            }
+
+            SaveTermBuffer();
+            return true;
+        }
+
+        /// <summary>
+        /// {@inheritDoc}
+        /// </summary>
+        public override void Reset()
+        {
+            base.Reset();
+            lastWasCommon = false;
+            savedState = null;
+            buffer.Length = 0;
+        }
+
+        // ================================================= Helper Methods ================================================
+
+        /// <summary>
+        /// Determines if the current token is a common term
+        /// </summary>
+        /// <returns> {@code true} if the current token is a common term, {@code false}
otherwise </returns>
+        private bool Common
+        {
+            get
+            {
+                return commonWords != null && commonWords.Contains(termAttribute.Buffer(),
0, termAttribute.Length);
+            }
+        }
+
+        /// <summary>
+        /// Saves this information to form the left part of a gram
+        /// </summary>
+        private void SaveTermBuffer()
+        {
+            buffer.Length = 0;
+            buffer.Append(termAttribute.Buffer(), 0, termAttribute.Length);
+            buffer.Append(SEPARATOR);
+            lastStartOffset = offsetAttribute.StartOffset();
+            lastWasCommon = Common;
+        }
+
+        /// <summary>
+        /// Constructs a compound token.
+        /// </summary>
+        private void GramToken()
+        {
+            buffer.Append(termAttribute.Buffer(), 0, termAttribute.Length);
+            int endOffset = offsetAttribute.EndOffset();
+
+            ClearAttributes();
+
+            var length = buffer.Length;
+            var termText = termAttribute.Buffer();
+            if (length > termText.Length)
+            {
+                termText = termAttribute.ResizeBuffer(length);
+            }
+
+            buffer.GetChars(0, length, termText, 0);
+            termAttribute.Length = length;
+            posIncAttribute.PositionIncrement = 0;
+            posLenAttribute.PositionLength = 2; // bigram
+            offsetAttribute.SetOffset(lastStartOffset, endOffset);
+            typeAttribute.Type = GRAM_TYPE;
+            buffer.Length = 0;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/67f47ad1/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
index fe531cb..fc151d8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs
@@ -2,103 +2,97 @@
 using System.Collections.Generic;
 using Lucene.Net.Analysis.Core;
 using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.commongrams;
-using org.apache.lucene.analysis.util;
 
 namespace Lucene.Net.Analysis.CommonGrams
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
     /// <summary>
-	/// Constructs a <seealso cref="CommonGramsFilter"/>.
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt"
ignoreCase="false"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class CommonGramsFilterFactory : TokenFilterFactory, ResourceLoaderAware
-	{
-	  // TODO: shared base class for Stop/Keep/CommonGrams? 
-	  private CharArraySet commonWords;
-	  private readonly string commonWordFiles;
-	  private readonly string format;
-	  private readonly bool ignoreCase;
+    /// Constructs a <seealso cref="CommonGramsFilter"/>.
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_cmmngrms" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.CommonGramsFilterFactory" words="commongramsstopwords.txt"
ignoreCase="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class CommonGramsFilterFactory : TokenFilterFactory, ResourceLoaderAware
+    {
+        // TODO: shared base class for Stop/Keep/CommonGrams? 
+        private CharArraySet commonWords;
+        private readonly string commonWordFiles;
+        private readonly string format;
+        private readonly bool ignoreCase;
 
-	  /// <summary>
-	  /// Creates a new CommonGramsFilterFactory </summary>
-	  public CommonGramsFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		commonWordFiles = get(args, "words");
-		format = get(args, "format");
-		ignoreCase = getBoolean(args, "ignoreCase", false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+        /// <summary>
+        /// Creates a new CommonGramsFilterFactory </summary>
+        public CommonGramsFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+            commonWordFiles = get(args, "words");
+            format = get(args, "format");
+            ignoreCase = getBoolean(args, "ignoreCase", false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
 
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
-	  public virtual void inform(ResourceLoader loader)
-	  {
-		if (commonWordFiles != null)
-		{
-		  if ("snowball".Equals(format, StringComparison.CurrentCultureIgnoreCase))
-		  {
-			commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
-		  }
-		  else
-		  {
-			commonWords = GetWordSet(loader, commonWordFiles, ignoreCase);
-		  }
-		}
-		else
-		{
-		  commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-		}
-	  }
-
-	  public virtual bool IgnoreCase
-	  {
-		  get
-		  {
-			return ignoreCase;
-		  }
-	  }
-
-	  public virtual CharArraySet CommonWords
-	  {
-		  get
-		  {
-			return commonWords;
-		  }
-	  }
-
-	  public override TokenStream Create(TokenStream input)
-	  {
-		var commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
-		return commonGrams;
-	  }
-	}
+        public virtual void Inform(ResourceLoader loader)
+        {
+            if (commonWordFiles != null)
+            {
+                if ("snowball".Equals(format, StringComparison.CurrentCultureIgnoreCase))
+                {
+                    commonWords = getSnowballWordSet(loader, commonWordFiles, ignoreCase);
+                }
+                else
+                {
+                    commonWords = GetWordSet(loader, commonWordFiles, ignoreCase);
+                }
+            }
+            else
+            {
+                commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+            }
+        }
 
+        public virtual bool IgnoreCase
+        {
+            get
+            {
+                return ignoreCase;
+            }
+        }
 
+        public virtual CharArraySet CommonWords
+        {
+            get
+            {
+                return commonWords;
+            }
+        }
 
+        public override TokenStream Create(TokenStream input)
+        {
+            var commonGrams = new CommonGramsFilter(luceneMatchVersion, input, commonWords);
+            return commonGrams;
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/67f47ad1/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
index e708697..ee7a2c9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilter.cs
@@ -19,113 +19,115 @@ using Lucene.Net.Analysis.Tokenattributes;
 
 namespace Lucene.Net.Analysis.CommonGrams
 {
-	/// <summary>
-	/// Wrap a CommonGramsFilter optimizing phrase queries by only returning single
-	/// words when they are not a member of a bigram.
-	/// 
-	/// Example:
-	/// <ul>
-	/// <li>query input to CommonGramsFilter: "the rain in spain falls mainly"
-	/// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter:
-	/// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"
-	/// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
-	/// "falls", "mainly"
-	/// </ul>
-	/// </summary>
-
-	/*
-	 * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html
and
-	 * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
-	 */
-	public sealed class CommonGramsQueryFilter : TokenFilter
-	{
-
-	  private readonly TypeAttribute typeAttribute = addAttribute(typeof(TypeAttribute));
-	  private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
-
-	  private State previous;
-	  private string previousType;
-	  private bool exhausted;
-
-	  /// <summary>
-	  /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter 
-	  /// </summary>
-	  /// <param name="input"> CommonGramsFilter the QueryFilter will use </param>
-	  public CommonGramsQueryFilter(CommonGramsFilter input) : base(input)
-	  {
-	  }
-
-	  /// <summary>
-	  /// {@inheritDoc}
-	  /// </summary>
-	  public override void Reset()
-	  {
-		base.Reset();
-		previous = null;
-		previousType = null;
-		exhausted = false;
-	  }
-
-	  /// <summary>
-	  /// Output bigrams whenever possible to optimize queries. Only output unigrams
-	  /// when they are not a member of a bigram. Example:
-	  /// <ul>
-	  /// <li>input: "the rain in spain falls mainly"
-	  /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
-	  /// </ul>
-	  /// </summary>
-	  public override bool IncrementToken()
-	  {
-		while (!exhausted && input.incrementToken())
-		{
-		  State current = CaptureState();
-
-		  if (previous != null && !GramType)
-		  {
-			RestoreState(previous);
-			previous = current;
-			previousType = typeAttribute.type();
-
-			if (GramType)
-			{
-			  posIncAttribute.PositionIncrement = 1;
-			}
-			return true;
-		  }
-
-		  previous = current;
-		}
-
-		exhausted = true;
-
-		if (previous == null || GRAM_TYPE.Equals(previousType))
-		{
-		  return false;
-		}
-
-		RestoreState(previous);
-		previous = null;
-
-		if (GramType)
-		{
-		  posIncAttribute.PositionIncrement = 1;
-		}
-		return true;
-	  }
-
-	  // ================================================= Helper Methods ================================================
-
-	  /// <summary>
-	  /// Convenience method to check if the current type is a gram type
-	  /// </summary>
-	  /// <returns> {@code true} if the current type is a gram type, {@code false} otherwise
</returns>
-	  public bool GramType
-	  {
-		  get
-		  {
-			return GRAM_TYPE.Equals(typeAttribute.type());
-		  }
-	  }
-	}
-
+    /// <summary>
+    /// Wrap a CommonGramsFilter optimizing phrase queries by only returning single
+    /// words when they are not a member of a bigram.
+    /// 
+    /// Example:
+    /// <ul>
+    /// <li>query input to CommonGramsFilter: "the rain in spain falls mainly"
+    /// <li>output of CommomGramsFilter/input to CommonGramsQueryFilter:
+    /// |"the, "the-rain"|"rain" "rain-in"|"in, "in-spain"|"spain"|"falls"|"mainly"
+    /// <li>output of CommonGramsQueryFilter:"the-rain", "rain-in" ,"in-spain",
+    /// "falls", "mainly"
+    /// </ul>
+    /// </summary>
+
+    /*
+     * See:http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/TokenStream.html
and
+     * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/package.html?revision=718798
+     */
+    public sealed class CommonGramsQueryFilter : TokenFilter
+    {
+
+        private readonly ITypeAttribute typeAttribute;
+        private readonly IPositionIncrementAttribute posIncAttribute;
+
+        private State previous;
+        private string previousType;
+        private bool exhausted;
+
+        /// <summary>
+        /// Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter

+        /// </summary>
+        /// <param name="input"> CommonGramsFilter the QueryFilter will use </param>
+        public CommonGramsQueryFilter(CommonGramsFilter input)
+            : base(input)
+        {
+            typeAttribute = AddAttribute<ITypeAttribute>();
+            posIncAttribute = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        /// <summary>
+        /// {@inheritDoc}
+        /// </summary>
+        public override void Reset()
+        {
+            base.Reset();
+            previous = null;
+            previousType = null;
+            exhausted = false;
+        }
+
+        /// <summary>
+        /// Output bigrams whenever possible to optimize queries. Only output unigrams
+        /// when they are not a member of a bigram. Example:
+        /// <ul>
+        /// <li>input: "the rain in spain falls mainly"
+        /// <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
+        /// </ul>
+        /// </summary>
+        public override bool IncrementToken()
+        {
+            while (!exhausted && input.IncrementToken())
+            {
+                State current = CaptureState();
+
+                if (previous != null && !GramType)
+                {
+                    RestoreState(previous);
+                    previous = current;
+                    previousType = typeAttribute.Type;
+
+                    if (GramType)
+                    {
+                        posIncAttribute.PositionIncrement = 1;
+                    }
+                    return true;
+                }
+
+                previous = current;
+            }
+
+            exhausted = true;
+
+            if (previous == null || CommonGramsFilter.GRAM_TYPE.Equals(previousType))
+            {
+                return false;
+            }
+
+            RestoreState(previous);
+            previous = null;
+
+            if (GramType)
+            {
+                posIncAttribute.PositionIncrement = 1;
+            }
+            return true;
+        }
+
+        // ================================================= Helper Methods ================================================
+
+        /// <summary>
+        /// Convenience method to check if the current type is a gram type
+        /// </summary>
+        /// <returns> {@code true} if the current type is a gram type, {@code false}
otherwise </returns>
+        public bool GramType
+        {
+            get
+            {
+                return CommonGramsFilter.GRAM_TYPE.Equals(typeAttribute.Type);
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/67f47ad1/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
index ddee353..58299cd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsQueryFilterFactory.cs
@@ -1,55 +1,54 @@
 ´╗┐using System.Collections.Generic;
-using Lucene.Net.Analysis.CommonGrams;
 
-namespace org.apache.lucene.analysis.commongrams
+namespace Lucene.Net.Analysis.CommonGrams
 {
 
-	/*
-	 * Licensed to the Apache Software Foundation (ASF) under one or more
-	 * contributor license agreements.  See the NOTICE file distributed with
-	 * this work for additional information regarding copyright ownership.
-	 * The ASF licenses this file to You under the Apache License, Version 2.0
-	 * (the "License"); you may not use this file except in compliance with
-	 * the License.  You may obtain a copy of the License at
-	 *
-	 *     http://www.apache.org/licenses/LICENSE-2.0
-	 *
-	 * Unless required by applicable law or agreed to in writing, software
-	 * distributed under the License is distributed on an "AS IS" BASIS,
-	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-	 * See the License for the specific language governing permissions and
-	 * limitations under the License.
-	 */
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
 
-	/// <summary>
-	/// Construct <seealso cref="CommonGramsQueryFilter"/>.
-	/// 
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt"
ignoreCase="false"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// </summary>
-	public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory
-	{
+    /// <summary>
+    /// Construct <seealso cref="CommonGramsQueryFilter"/>.
+    /// 
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_cmmngrmsqry" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.CommonGramsQueryFilterFactory" words="commongramsquerystopwords.txt"
ignoreCase="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// </summary>
+    public class CommonGramsQueryFilterFactory : CommonGramsFilterFactory
+    {
 
-	  /// <summary>
-	  /// Creates a new CommonGramsQueryFilterFactory </summary>
-	  public CommonGramsQueryFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-	  }
-
-	  /// <summary>
-	  /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
-	  /// </summary>
-	  public override TokenFilter create(TokenStream input)
-	  {
-		CommonGramsFilter commonGrams = (CommonGramsFilter) base.create(input);
-		return new CommonGramsQueryFilter(commonGrams);
-	  }
-	}
+        /// <summary>
+        /// Creates a new CommonGramsQueryFilterFactory </summary>
+        public CommonGramsQueryFilterFactory(IDictionary<string, string> args)
+            : base(args)
+        {
+        }
 
+        /// <summary>
+        /// Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter
+        /// </summary>
+        public override TokenStream Create(TokenStream input)
+        {
+            var commonGrams = (CommonGramsFilter)base.Create(input);
+            return new CommonGramsQueryFilter(commonGrams);
+        }
+    }
 }
\ No newline at end of file


Mime
View raw message