lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [13/14] lucenenet git commit: IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.
Date Thu, 02 Feb 2017 12:42:44 GMT
IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for
Analysis.Common, so they are disabled by default. They are only used for Highlighter.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc7b5b52
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc7b5b52
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc7b5b52

Branch: refs/heads/api-work
Commit: fc7b5b52dd64877d5d63498b3d2df4e54c569bd8
Parents: 506f55a
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Thu Feb 2 18:02:48 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Thu Feb 2 18:30:37 2017 +0700

----------------------------------------------------------------------
 src/IcuBreakIterator.cs                         | 23 +++++++++++++++-----
 .../PostingsHighlight/PostingsHighlighter.cs    |  5 ++++-
 2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
index 0bf6007..6028ba1 100644
--- a/src/IcuBreakIterator.cs
+++ b/src/IcuBreakIterator.cs
@@ -50,6 +50,8 @@ namespace Lucene.Net
         /// </summary>
         protected int m_end;
 
+        private bool enableHacks = false;
+
         public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
             : this(type, CultureInfo.CurrentCulture)
         {
@@ -63,6 +65,13 @@ namespace Lucene.Net
             this.type = type;
         }
 
+        
+        public virtual bool EnableHacks
+        {
+            get { return enableHacks; }
+            set { enableHacks = value; }
+        }
+
         /// <summary>
         /// Sets the current iteration position to the beginning of the text.
         /// </summary>
@@ -280,20 +289,22 @@ namespace Lucene.Net
 
         private void LoadBoundaries(int start, int end)
         {
-            //boundaries = new List<int>();
-
             IEnumerable<Icu.Boundary> icuBoundaries;
             string offsetText = text.Substring(start, end - start);
 
-
             if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
             {
-                // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't
broken
-                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-",
"a"), true);
+                if (enableHacks)
+                {
+                    // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words
aren't broken
+                    offsetText = offsetText.Replace("-", "a");
+                }
+                
+                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
             }
             else
             {
-                if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
                 {
                     // LUCENENET TODO: HACK - newline character causes incorrect sentence
breaking.
                     offsetText = offsetText.Replace("\n", " ");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
index 63c48bc..db04ee1 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
@@ -131,7 +131,10 @@ namespace Lucene.Net.Search.PostingsHighlight
         /// </summary>
         protected virtual BreakIterator GetBreakIterator(string field)
         {
-            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture);
+            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)
+            {
+                EnableHacks = true
+            };
         }
 
         /// <summary>


Mime
View raw message