lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [08/11] lucenenet git commit: Lucene.Net.Analysis.Ngram refactor: member accessibility and documentation comments
Date Fri, 03 Feb 2017 17:51:14 GMT
Lucene.Net.Analysis.Ngram refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/269da1ef
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/269da1ef
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/269da1ef

Branch: refs/heads/api-work
Commit: 269da1ef4ecb679c0e13c914fab3f60c175d9466
Parents: d4b9c00
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sat Feb 4 00:01:15 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Sat Feb 4 00:01:15 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Ngram/EdgeNGramFilterFactory.cs    |   4 +-
 .../Analysis/Ngram/EdgeNGramTokenFilter.cs      |  29 ++---
 .../Analysis/Ngram/EdgeNGramTokenizer.cs        |  31 +++---
 .../Analysis/Ngram/EdgeNGramTokenizerFactory.cs |   4 +-
 .../Ngram/Lucene43EdgeNGramTokenizer.cs         |  47 ++++----
 .../Analysis/Ngram/Lucene43NGramTokenizer.cs    |  12 +--
 .../Analysis/Ngram/NGramFilterFactory.cs        |   4 +-
 .../Analysis/Ngram/NGramTokenFilter.cs          |  33 +++---
 .../Analysis/Ngram/NGramTokenizer.cs            | 107 +++++++++++++------
 .../Analysis/Ngram/NGramTokenizerFactory.cs     |   4 +-
 .../Analysis/Ngram/NGramTokenizerTest.cs        |   2 +-
 11 files changed, 161 insertions(+), 116 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
index 2efb5fc..2e3e0ed 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
@@ -1,7 +1,7 @@
 using Lucene.Net.Analysis.Util;
 using System.Collections.Generic;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,7 +37,7 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly string side;
 
         /// <summary>
-        /// Creates a new EdgeNGramFilterFactory </summary>
+        /// Creates a new <see cref="EdgeNGramFilterFactory"/> </summary>
         public EdgeNGramFilterFactory(IDictionary<string, string> args)
             : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
index 01677cf..4c1fff1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
@@ -1,9 +1,9 @@
-using System;
-using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
+using System;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -27,8 +27,8 @@ namespace Lucene.Net.Analysis.Ngram
     /// <para>
     /// This <see cref="TokenFilter"/> create n-grams from the beginning edge or ending
edge of a input token.
     /// </para>
-    /// <para><a name="version"/>As of Lucene 4.4, this filter does not support
-    /// <see cref="Side#BACK"/> (you can use <see cref="ReverseStringFilter"/>
up-front and
+    /// <para>As of Lucene 4.4, this filter does not support
+    /// <see cref="Side.BACK"/> (you can use <see cref="Reverse.ReverseStringFilter"/>
up-front and
     /// afterward to get the same behavior), handles supplementary characters
     /// correctly and does not update offsets anymore.
     /// </para>
@@ -43,7 +43,6 @@ namespace Lucene.Net.Analysis.Ngram
         /// Specifies which side of the input the n-gram should be generated from </summary>
         public enum Side
         {
-
             /// <summary>
             /// Get the n-gram from the front of the input </summary>
             FRONT,
@@ -54,7 +53,9 @@ namespace Lucene.Net.Analysis.Ngram
             BACK,
         }
 
-        // Get the appropriate Side from a string
+        /// <summary>
+        /// Get the appropriate <see cref="Side"/> from a string
+        /// </summary>
         public static Side GetSide(string sideName)
         {
             Side result;
@@ -86,9 +87,9 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly IPositionLengthAttribute posLenAtt;
 
         /// <summary>
-        /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in
the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
         /// <param name="input"> <see cref="TokenStream"/> holding the input
to be tokenized </param>
         /// <param name="side"> the <see cref="Side"/> from which to chop off
an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
@@ -136,9 +137,9 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in
the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
         /// <param name="input"> <see cref="TokenStream"/> holding the input
to be tokenized </param>
         /// <param name="sideLabel"> the name of the <see cref="Side"/> from
which to chop off an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
@@ -150,9 +151,9 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="EdgeNGramTokenFilter"/> that can generate n-grams in
the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
         /// <param name="input"> <see cref="TokenStream"/> holding the input
to be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -163,7 +164,7 @@ namespace Lucene.Net.Analysis.Ngram
         {
         }
 
-        public override bool IncrementToken()
+        public override sealed bool IncrementToken()
         {
             while (true)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
index 09ad7f8..9eba29f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizer.cs
@@ -1,7 +1,7 @@
 using Lucene.Net.Util;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,14 +25,15 @@ namespace Lucene.Net.Analysis.Ngram
     /// <para>
     /// This <see cref="Tokenizer"/> create n-grams from the beginning edge or ending
edge of a input token.
     /// </para>
-    /// <para><a name="version" /> As of Lucene 4.4, this tokenizer<ul>
-    /// <li>can handle <code>maxGram</code> larger than 1024 chars, but
beware that this will result in increased memory usage
-    /// <li>doesn't trim the input,
-    /// <li>sets position increments equal to 1 instead of 1 for the first token and
0 for all other ones
-    /// <li>doesn't support backward n-grams anymore.
-    /// <li>supports <see cref="#isTokenChar(int) pre-tokenization"/>,
-    /// <li>correctly handles supplementary characters.
-    /// </ul>
+    /// <para>As of Lucene 4.4, this tokenizer
+    /// <list type="bullet">
+    ///     <item>can handle <code>maxGram</code> larger than 1024 chars,
but beware that this will result in increased memory usage</item>
+    ///     <item>doesn't trim the input,</item>
+    ///     <item>sets position increments equal to 1 instead of 1 for the first token
and 0 for all other ones</item>
+    ///     <item>doesn't support backward n-grams anymore.</item>
+    ///     <item>supports <see cref="Util.CharTokenizer.IsTokenChar(int)"/>
pre-tokenization,</item>
+    ///     <item>correctly handles supplementary characters.</item>
+    /// </list>
     /// </para>
     /// <para>Although <b style="color:red">highly</b> discouraged, it
is still possible
     /// to use the old behavior through <see cref="Lucene43EdgeNGramTokenizer"/>.
@@ -44,10 +45,10 @@ namespace Lucene.Net.Analysis.Ngram
         public const int DEFAULT_MIN_GRAM_SIZE = 1;
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="EdgeNGramTokenizer"/> that can generate n-grams in the
sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram, int
maxGram)
@@ -58,9 +59,9 @@ namespace Lucene.Net.Analysis.Ngram
         /// <summary>
         /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public EdgeNGramTokenizer(LuceneVersion version, AttributeSource.AttributeFactory
factory, TextReader input, int minGram, int maxGram)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
index 5273ae4..d3f2bb6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
@@ -4,7 +4,7 @@ using System;
 using System.Collections.Generic;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -39,7 +39,7 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly string side;
 
         /// <summary>
-        /// Creates a new EdgeNGramTokenizerFactory </summary>
+        /// Creates a new <see cref="EdgeNGramTokenizerFactory"/> </summary>
         public EdgeNGramTokenizerFactory(IDictionary<string, string> args) : base(args)
         {
             minGramSize = GetInt(args, "minGramSize", EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
index 3ed7187..eb09a94 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43EdgeNGramTokenizer.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
 using System;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -41,7 +41,6 @@ namespace Lucene.Net.Analysis.Ngram
         /// Specifies which side of the input the n-gram should be generated from </summary>
         public enum Side
         {
-
             /// <summary>
             /// Get the n-gram from the front of the input </summary>
             FRONT,
@@ -52,7 +51,7 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         // Get the appropriate Side from a string
-        internal static Side GetSide(string sideName)
+        public static Side GetSide(string sideName)
         {
             Side result;
             if (!Enum.TryParse(sideName, true, out result))
@@ -73,10 +72,10 @@ namespace Lucene.Net.Analysis.Ngram
 
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="side"> the <see cref="Side"/> from which to chop off
an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -88,11 +87,11 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="side"> the <see cref="Side"/> from which to chop off
an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -104,10 +103,10 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="sideLabel"> the name of the <see cref="Side"/> from
which to chop off an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -118,11 +117,11 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="sideLabel"> the name of the <see cref="Side"/> from
which to chop off an n-gram </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -133,10 +132,10 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public Lucene43EdgeNGramTokenizer(LuceneVersion version, TextReader input, int minGram,
int maxGram)
@@ -145,11 +144,11 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given
range
+        /// Creates <see cref="Lucene43EdgeNGramTokenizer"/> that can generate n-grams
in the sizes of the given range
         /// </summary>
-        /// <param name="version"> the <a href="#version">Lucene match version</a>
</param>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// <param name="version"> the Lucene match version - See <see cref="LuceneVersion"/>
</param>
+        /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public Lucene43EdgeNGramTokenizer(LuceneVersion version, AttributeFactory factory,
TextReader input, int minGram, int maxGram)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
index a0f210a..a79ffba 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/Lucene43NGramTokenizer.cs
@@ -2,7 +2,7 @@
 using System;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -42,7 +42,7 @@ namespace Lucene.Net.Analysis.Ngram
         private IOffsetAttribute offsetAtt;
 
         /// <summary>
-        /// Creates NGramTokenizer with given min and max n-grams. </summary>
+        /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams.
</summary>
         /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -53,9 +53,9 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenizer with given min and max n-grams. </summary>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// Creates <see cref="Lucene43NGramTokenizer"/> with given min and max n-grams.
</summary>
+        /// <param name="factory"> <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public Lucene43NGramTokenizer(AttributeFactory factory, TextReader input, int minGram,
int maxGram)
@@ -65,7 +65,7 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenizer with default min and max n-grams. </summary>
+        /// Creates <see cref="Lucene43NGramTokenizer"/> with default min and max n-grams.
</summary>
         /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         public Lucene43NGramTokenizer(TextReader input)
             : this(input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
index 3c9f738..8b9b726 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramFilterFactory.cs
@@ -1,7 +1,7 @@
 using Lucene.Net.Analysis.Util;
 using System.Collections.Generic;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly int minGramSize;
 
         /// <summary>
-        /// Creates a new NGramFilterFactory </summary>
+        /// Creates a new <see cref="NGramFilterFactory"/> </summary>
         public NGramFilterFactory(IDictionary<string, string> args)
             : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 561e575..26cc8d5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,25 +24,26 @@ namespace Lucene.Net.Analysis.Ngram
 
     /// <summary>
     /// Tokenizes the input into n-grams of the given size(s).
-    /// <a name="version"/>
     /// <para>You must specify the required <see cref="LuceneVersion"/> compatibility
when
-    /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:<ul>
-    /// <li>handles supplementary characters correctly,</li>
-    /// <li>emits all n-grams for the same token at the same position,</li>
-    /// <li>does not modify offsets,</li>
-    /// <li>sorts n-grams by their offset in the original token first, then
-    /// increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
-    /// "c").</li></ul>
+    /// creating a <see cref="NGramTokenFilter"/>. As of Lucene 4.4, this token filters:
+    /// <list type="bullet">
+    ///     <item>handles supplementary characters correctly,</item>
+    ///     <item>emits all n-grams for the same token at the same position,</item>
+    ///     <item>does not modify offsets,</item>
+    ///     <item>sorts n-grams by their offset in the original token first, then
+    ///         increasing length (meaning that "abc" will give "a", "ab", "abc", "b", "bc",
+    ///         "c").</item>
+    /// </list>
     /// </para>
     /// <para>You can make this filter use the old behavior by providing a version
&lt;
-    /// <see cref="Version#LUCENE_44"/> in the constructor but this is not recommended
as
+    /// <see cref="LuceneVersion.LUCENE_44"/> in the constructor but this is not recommended
as
     /// it will lead to broken <see cref="TokenStream"/>s that will cause highlighting
     /// bugs.
     /// </para>
     /// <para>If you were using this <see cref="TokenFilter"/> to perform partial
highlighting,
     /// this won't work anymore since this filter doesn't update offsets. You should
     /// modify your analysis chain to use <see cref="NGramTokenizer"/>, and potentially
-    /// override <see cref="NGramTokenizer#isTokenChar(int)"/> to perform pre-tokenization.
+    /// override <see cref="NGramTokenizer.IsTokenChar(int)"/> to perform pre-tokenization.
     /// </para>
     /// </summary>
     public sealed class NGramTokenFilter : TokenFilter
@@ -70,9 +71,9 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly IOffsetAttribute offsetAtt;
 
         /// <summary>
-        /// Creates NGramTokenFilter with given min and max n-grams. </summary>
+        /// Creates <see cref="NGramTokenFilter"/> with given min and max n-grams.
</summary>
         /// <param name="version"> Lucene version to enable correct position increments.
-        ///                See <a href="#version">above</a> for details. </param>
+        ///                See <see cref="NGramTokenFilter"/> for details. </param>
         /// <param name="input"> <see cref="TokenStream"/> holding the input
to be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -154,9 +155,9 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenFilter with default min and max n-grams. </summary>
+        /// Creates <see cref="NGramTokenFilter"/> with default min and max n-grams.
</summary>
         /// <param name="version"> Lucene version to enable correct position increments.
-        ///                See <a href="#version">above</a> for details. </param>
+        ///                See <see cref="NGramTokenFilter"/> for details. </param>
         /// <param name="input"> <see cref="TokenStream"/> holding the input
to be tokenized </param>
         public NGramTokenFilter(LuceneVersion version, TokenStream input)
             : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
@@ -166,7 +167,7 @@ namespace Lucene.Net.Analysis.Ngram
         /// <summary>
         /// Returns the next token in the stream, or null at EOS.
         /// </summary>
-        public override bool IncrementToken()
+        public override sealed bool IncrementToken()
         {
             while (true)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
index acc42c3..a6ce01d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -6,7 +6,7 @@ using System;
 using System.Diagnostics;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -32,21 +32,66 @@ namespace Lucene.Net.Analysis.Ngram
     /// the same as the term chars.
     /// </para>
     /// <para>For example, "abcde" would be tokenized as (minGram=2, maxGram=3):
-    /// <table>
-    /// <tr><th>Term</th><td>ab</td><td>abc</td><td>bc</td><td>bcd</td><td>cd</td><td>cde</td><td>de</td></tr>
-    /// <tr><th>Position increment</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
-    /// <tr><th>Position length</th><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr>
-    /// <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
-    /// </table>
-    /// <a name="version"/>
+    /// <list type="table">
+    ///     <listheader>
+    ///         <term>Term</term>
+    ///         <term>Position increment</term>
+    ///         <term>Position length</term>
+    ///         <term>Offsets</term>
+    ///     </listheader>
+    ///     <item>
+    ///         <term>ab</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[0,2[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>abc</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[0,3[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>bc</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[1,3[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>bcd</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[1,4[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>cd</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[2,4[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>cde</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[2,5[</term>
+    ///     </item>
+    ///     <item>
+    ///         <term>de</term>
+    ///         <term>1</term>
+    ///         <term>1</term>
+    ///         <term>[3,5[</term>
+    ///     </item>
+    /// </list>
     /// </para>
-    /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
-    /// <li>tokenize in a streaming fashion to support streams which are larger
-    /// than 1024 chars (limit of the previous version),
-    /// <li>count grams based on unicode code points instead of java chars (and
-    /// never split in the middle of surrogate pairs),
-    /// <li>give the ability to <see cref="#isTokenChar(int) pre-tokenize"/>
the stream
-    /// before computing n-grams.</ul>
+    /// <para>This tokenizer changed a lot in Lucene 4.4 in order to:
+    /// <list type="bullet">
+    ///     <item>tokenize in a streaming fashion to support streams which are larger
+    ///         than 1024 chars (limit of the previous version),</item>
+    ///     <item>count grams based on unicode code points instead of java chars (and
+    ///         never split in the middle of surrogate pairs),</item>
+    ///     <item>give the ability to pre-tokenize the stream (<see cref="IsTokenChar(int)"/>)
+    ///         before computing n-grams.</item>
+    /// </list>
     /// </para>
     /// <para>Additionally, this class doesn't trim trailing whitespaces and emits
     /// tokens in a different order, tokens are now emitted by increasing start
@@ -57,7 +102,7 @@ namespace Lucene.Net.Analysis.Ngram
     /// to use the old behavior through <see cref="Lucene43NGramTokenizer"/>.
     /// </para>
     /// </summary>
-    // non-final to allow for overriding isTokenChar, but all other methods should be final
+    // non-sealed to allow for overriding IsTokenChar, but all other methods should be sealed
     public class NGramTokenizer : Tokenizer
     {
         public const int DEFAULT_MIN_NGRAM_SIZE = 1;
@@ -87,8 +132,8 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenizer with given min and max n-grams. </summary>
-        /// <param name="version"> the lucene compatibility <a href="#version">version</a>
</param>
+        /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+        /// <param name="version"> the lucene compatibility version </param>
         /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
@@ -104,10 +149,10 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenizer with given min and max n-grams. </summary>
-        /// <param name="version"> the lucene compatibility <a href="#version">version</a>
</param>
-        /// <param name="factory"> <see cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
to use </param>
-        /// <param name="input"> <see cref="Reader"/> holding the input to be
tokenized </param>
+        /// Creates <see cref="NGramTokenizer"/> with given min and max n-grams. </summary>
+        /// <param name="version"> the lucene compatibility version </param>
+        /// <param name="factory"> <see cref="AttributeSource.AttributeFactory"/>
to use </param>
+        /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         /// <param name="minGram"> the smallest n-gram to generate </param>
         /// <param name="maxGram"> the largest n-gram to generate </param>
         public NGramTokenizer(LuceneVersion version, AttributeFactory factory, TextReader
input, int minGram, int maxGram)
@@ -116,8 +161,8 @@ namespace Lucene.Net.Analysis.Ngram
         }
 
         /// <summary>
-        /// Creates NGramTokenizer with default min and max n-grams. </summary>
-        /// <param name="version"> the lucene compatibility <a href="#version">version</a>
</param>
+        /// Creates <see cref="NGramTokenizer"/> with default min and max n-grams.
</summary>
+        /// <param name="version"> the lucene compatibility version </param>
         /// <param name="input"> <see cref="TextReader"/> holding the input to
be tokenized </param>
         public NGramTokenizer(LuceneVersion version, TextReader input)
               : this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE)
@@ -154,8 +199,6 @@ namespace Lucene.Net.Analysis.Ngram
             charBuffer = CharacterUtils.NewCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram
in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
             buffer = new int[charBuffer.Buffer.Length];
 
-
-
             // Make the term att large enough
             termAtt.ResizeBuffer(2 * maxGram);
         }
@@ -191,7 +234,7 @@ namespace Lucene.Net.Analysis.Ngram
                         Debug.Assert(exhausted);
                         return false;
                     }
-                    consume();
+                    Consume();
                     gramSize = minGram;
                 }
 
@@ -202,7 +245,7 @@ namespace Lucene.Net.Analysis.Ngram
                 bool isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar
!= bufferStart - 1;
                 if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar)
                 {
-                    consume();
+                    Consume();
                     gramSize = minGram;
                     continue;
                 }
@@ -236,19 +279,19 @@ namespace Lucene.Net.Analysis.Ngram
 
         /// <summary>
         /// Consume one code point. </summary>
-        private void consume()
+        private void Consume()
         {
             offset += Character.CharCount(buffer[bufferStart++]);
         }
 
         /// <summary>
         /// Only collect characters which satisfy this condition. </summary>
-        protected internal virtual bool IsTokenChar(int chr)
+        protected virtual bool IsTokenChar(int chr)
         {
             return true;
         }
 
-        public override void End()
+        public override sealed void End()
         {
             base.End();
             Debug.Assert(bufferStart <= bufferEnd);
@@ -262,7 +305,7 @@ namespace Lucene.Net.Analysis.Ngram
             offsetAtt.SetOffset(endOffset, endOffset);
         }
 
-        public override void Reset()
+        public override sealed void Reset()
         {
             base.Reset();
             bufferStart = bufferEnd = buffer.Length;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
index 73865fb..33a81b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizerFactory.cs
@@ -3,7 +3,7 @@ using Lucene.Net.Util;
 using System.Collections.Generic;
 using System.IO;
 
-namespace Lucene.Net.Analysis.Ngram
+namespace Lucene.Net.Analysis.Ngram // LUCENENET TODO: Change namespace, directory, and Git
to NGram
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,7 +37,7 @@ namespace Lucene.Net.Analysis.Ngram
         private readonly int minGramSize;
 
         /// <summary>
-        /// Creates a new NGramTokenizerFactory </summary>
+        /// Creates a new <see cref="NGramTokenizerFactory"/> </summary>
         public NGramTokenizerFactory(IDictionary<string, string> args)
             : base(args)
         {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/269da1ef/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
index b4aac99..d72f4c5 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Ngram/NGramTokenizerTest.cs
@@ -233,7 +233,7 @@ namespace Lucene.Net.Analysis.Ngram
                 this.nonTokenChars = nonTokenChars;
             }
 
-            protected internal override bool IsTokenChar(int chr)
+            protected override bool IsTokenChar(int chr)
             {
                 return nonTokenChars.IndexOf((char)chr) < 0;
             }


Mime
View raw message