lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [2/6] lucenenet git commit: Lucene.Net.Analysis.Cn refactor: member accessibility and documentation comments
Date Thu, 02 Feb 2017 20:15:54 GMT
Lucene.Net.Analysis.Cn refactor: member accessibility and documentation comments


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3e97f31e
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3e97f31e
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3e97f31e

Branch: refs/heads/api-work
Commit: 3e97f31e190f7c3a7781a45e9807e609a1e06393
Parents: 0986545
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Thu Feb 2 23:22:53 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Fri Feb 3 01:13:42 2017 +0700

----------------------------------------------------------------------
 .../Analysis/Cn/ChineseAnalyzer.cs              | 23 +++++-----
 .../Analysis/Cn/ChineseFilter.cs                | 37 ++++++++--------
 .../Analysis/Cn/ChineseFilterFactory.cs         |  6 +--
 .../Analysis/Cn/ChineseTokenizer.cs             | 45 ++++++++++----------
 .../Analysis/Cn/ChineseTokenizerFactory.cs      |  8 ++--
 5 files changed, 61 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
index 5dc0aa6..de0b5e7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseAnalyzer.cs
@@ -21,21 +21,22 @@ namespace Lucene.Net.Analysis.Cn
 	 */
 
     /// <summary>
-    /// An <seealso cref="Analyzer"/> that tokenizes text with <seealso cref="ChineseTokenizer"/>
and
-    /// filters with <seealso cref="ChineseFilter"/> </summary>
-    /// @deprecated (3.1) Use <seealso cref="StandardAnalyzer"/> instead, which has
the same functionality.
+    /// An <see cref="Analyzer"/> that tokenizes text with <see cref="ChineseTokenizer"/>
and
+    /// filters with <see cref="ChineseFilter"/>
+    /// </summary>
+    /// @deprecated (3.1) Use <see cref="Standard.StandardAnalyzer"/> instead, which
has the same functionality.
     /// This analyzer will be removed in Lucene 5.0 
     [Obsolete("(3.1) Use StandardAnalyzer instead, which has the same functionality.")]
     public sealed class ChineseAnalyzer : Analyzer
-    /// <summary>
-    /// Creates
-    /// <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-    /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
-    /// </summary>
-    /// <returns> <seealso cref="org.apache.lucene.analysis.Analyzer.TokenStreamComponents"/>
-    ///         built from a <seealso cref="ChineseTokenizer"/> filtered with
-    ///         <seealso cref="ChineseFilter"/> </returns>
     {
+        /// <summary>
+        /// Creates
+        /// <see cref="Analyzer.TokenStreamComponents"/>
+        /// used to tokenize all the text in the provided <see cref="TextReader"/>.
+        /// </summary>
+        /// <returns> <see cref="Analyzer.TokenStreamComponents"/>
+        ///         built from a <see cref="ChineseTokenizer"/> filtered with
+        ///         <see cref="ChineseFilter"/> </returns>
         protected override TokenStreamComponents CreateComponents(string fieldName, TextReader
reader)
         {
             Tokenizer source = new ChineseTokenizer(reader);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
index 9b3b95a..61e6576 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilter.cs
@@ -25,28 +25,32 @@ namespace Lucene.Net.Analysis.Cn
 	 */
 
     /// <summary>
-    /// A <seealso cref="TokenFilter"/> with a stop word table.  
-    /// <ul>
-    /// <li>Numeric tokens are removed.
-    /// <li>English tokens must be larger than 1 character.
-    /// <li>One Chinese character as one Chinese word.
-    /// </ul>
+    /// A <see cref="TokenFilter"/> with a stop word table.  
+    /// <list type="bullet">
+    ///     <item>Numeric tokens are removed.</item>
+    ///     <item>English tokens must be larger than 1 character.</item>
+    ///     <item>One Chinese character as one Chinese word.</item>
+    /// </list>
     /// TO DO:
-    /// <ol>
-    /// <li>Add Chinese stop words, such as \ue400
-    /// <li>Dictionary based Chinese word extraction
-    /// <li>Intelligent Chinese word extraction
-    /// </ol>
+    /// <list type="number">
+    ///     <item>Add Chinese stop words, such as \ue400</item>
+    ///     <item>Dictionary based Chinese word extraction</item>
+    ///     <item>Intelligent Chinese word extraction</item>
+    /// </list>
     /// </summary>
-    /// @deprecated (3.1) Use <seealso cref="StopFilter"/> instead, which has the same
functionality.
+    /// @deprecated (3.1) Use <see cref="Core.StopFilter"/> instead, which has the
same functionality.
     /// This filter will be removed in Lucene 5.0 
     [Obsolete("(3.1) Use StopFilter instead, which has the same functionality.")]
     public sealed class ChineseFilter : TokenFilter
     {
-
         // Only English now, Chinese to be added later.
-        public static readonly string[] STOP_WORDS = new string[] { "and", "are", "as", "at",
"be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"
};
-
+        public static readonly string[] STOP_WORDS = new string[] {
+            "and", "are", "as", "at", "be", "but", "by",
+            "for", "if", "in", "into", "is", "it",
+            "no", "not", "of", "on", "or", "such",
+            "that", "the", "their", "then", "there", "these",
+            "they", "this", "to", "was", "will", "with"
+        };
 
         private CharArraySet stopTable;
 
@@ -55,13 +59,12 @@ namespace Lucene.Net.Analysis.Cn
         public ChineseFilter(TokenStream @in)
             : base(@in)
         {
-
             stopTable = new CharArraySet(LuceneVersion.LUCENE_CURRENT, Arrays.AsList(STOP_WORDS),
false);
             termAtt = AddAttribute<ICharTermAttribute>();
         }
+
         public override bool IncrementToken()
         {
-
             while (m_input.IncrementToken())
             {
                 char[] text = termAtt.Buffer;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
index d3e30e5..98ddee9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseFilterFactory.cs
@@ -22,14 +22,14 @@ namespace Lucene.Net.Analysis.Cn
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="ChineseFilter"/> </summary>
-    /// @deprecated Use <seealso cref="StopFilterFactory"/> instead. 
+    /// Factory for <see cref="ChineseFilter"/> </summary>
+    /// @deprecated Use <see cref="Core.StopFilterFactory"/> instead. 
     [Obsolete("Use StopFilterFactory instead.")]
     public class ChineseFilterFactory : TokenFilterFactory
     {
 
         /// <summary>
-        /// Creates a new ChineseFilterFactory </summary>
+        /// Creates a new <see cref="ChineseFilterFactory"/> </summary>
         public ChineseFilterFactory(IDictionary<string, string> args) : base(args)
         {
             if (args.Count > 0)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
index 4ae7ff8..eb500bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -26,27 +26,28 @@ namespace Lucene.Net.Analysis.Cn
     /// Tokenize Chinese text as individual chinese characters.
     /// 
     /// <para>
-    /// The difference between ChineseTokenizer and
-    /// CJKTokenizer is that they have different
+    /// The difference between <see cref="ChineseTokenizer"/> and
+    /// <see cref="Cjk.CJKTokenizer"/> is that they have different
     /// token parsing logic.
     /// </para>
     /// <para>
     /// For example, if the Chinese text
     /// "C1C2C3C4" is to be indexed:
-    /// <ul>
-    /// <li>The tokens returned from ChineseTokenizer are C1, C2, C3, C4. 
-    /// <li>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.
-    /// </ul>
+    /// <list type="bullet">
+    ///     <item>The tokens returned from ChineseTokenizer are C1, C2, C3, C4.</item>
+    ///     <item>The tokens returned from the CJKTokenizer are C1C2, C2C3, C3C4.</item>
+    /// </list>
     /// </para>
     /// <para>
-    /// Therefore the index created by CJKTokenizer is much larger.
+    /// Therefore the index created by <see cref="CJKTokenizer"/> is much larger.
     /// </para>
     /// <para>
     /// The problem is that when searching for C1, C1C2, C1C3,
-    /// C4C2, C1C2C3 ... the ChineseTokenizer works, but the
-    /// CJKTokenizer will not work.
-    /// </para> </summary>
-    /// @deprecated (3.1) Use <seealso cref="StandardTokenizer"/> instead, which has
the same functionality.
+    /// C4C2, C1C2C3 ... the <see cref="ChineseTokenizer"/> works, but the
+    /// <see cref="Cjk.CJKTokenizer"/> will not work.
+    /// </para> 
+    /// </summary>
+    /// @deprecated (3.1) Use <see cref="Standard.StandardTokenizer"/> instead, which
has the same functionality.
     /// This filter will be removed in Lucene 5.0 
     [Obsolete("(3.1) Use StandardTokenizer instead, which has the same functionality.")]
     public sealed class ChineseTokenizer : Tokenizer
@@ -82,9 +83,8 @@ namespace Lucene.Net.Analysis.Cn
         private ICharTermAttribute termAtt;
         private IOffsetAttribute offsetAtt;
 
-        private void push(char c)
+        private void Push(char c)
         {
-
             if (length == 0) // start of token
             {
                 start = offset - 1;
@@ -93,9 +93,8 @@ namespace Lucene.Net.Analysis.Cn
 
         }
 
-        private bool flush()
+        private bool Flush()
         {
-
             if (length > 0)
             {
                 //System.out.println(new String(buffer, 0,
@@ -132,7 +131,7 @@ namespace Lucene.Net.Analysis.Cn
                 if (dataLen <= 0)
                 {
                     offset--;
-                    return flush();
+                    return Flush();
                 }
                 else
                 {
@@ -145,10 +144,10 @@ namespace Lucene.Net.Analysis.Cn
                     case UnicodeCategory.DecimalDigitNumber:
                     case UnicodeCategory.LowercaseLetter:
                     case UnicodeCategory.UppercaseLetter:
-                        push(c);
+                        Push(c);
                         if (length == MAX_WORD_LEN)
                         {
-                            return flush();
+                            return Flush();
                         }
                         break;
 
@@ -157,22 +156,22 @@ namespace Lucene.Net.Analysis.Cn
                         {
                             bufferIndex--;
                             offset--;
-                            return flush();
+                            return Flush();
                         }
-                        push(c);
-                        return flush();
+                        Push(c);
+                        return Flush();
 
                     default:
                         if (length > 0)
                         {
-                            return flush();
+                            return Flush();
                         }
                         break;
                 }
             }
         }
 
-        public override void End()
+        public override sealed void End()
         {
             base.End();
             // set final offset

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3e97f31e/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
index 2eef7be..b71906e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizerFactory.cs
@@ -24,14 +24,14 @@ namespace Lucene.Net.Analysis.Cn
 	 */
 
     /// <summary>
-    /// Factory for <seealso cref="ChineseTokenizer"/> </summary>
-    /// @deprecated Use <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizerFactory"/>
instead. 
+    /// Factory for <see cref="ChineseTokenizer"/>
+    /// </summary>
+    /// @deprecated Use <see cref="Standard.StandardTokenizerFactory"/> instead. 
     [Obsolete("Use StandardTokenizerFactory instead.")]
     public class ChineseTokenizerFactory : TokenizerFactory
     {
-
         /// <summary>
-        /// Creates a new ChineseTokenizerFactory </summary>
+        /// Creates a new <see cref="ChineseTokenizerFactory"/> </summary>
         public ChineseTokenizerFactory(IDictionary<string, string> args)
               : base(args)
         {


Mime
View raw message