lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [2/3] lucenenet git commit: Lucene.Net.Analysis.ICU: Renamed ICU directory Icu to match namespace conventions
Date Sun, 10 Sep 2017 22:02:33 GMT
Lucene.Net.Analysis.ICU: Renamed ICU directory Icu to match namespace conventions


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1191c20d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1191c20d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1191c20d

Branch: refs/heads/master
Commit: 1191c20dfb3761f7fd5205b3708f3479fbfa8b01
Parents: 60e8125
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Mon Sep 11 03:56:06 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Mon Sep 11 05:00:23 2017 +0700

----------------------------------------------------------------------
 .../Analysis/ICU/ICUFoldingFilter.cs            |  32 ---
 .../Analysis/ICU/ICUFoldingFilterFactory.cs     |  31 ---
 .../Analysis/ICU/ICUNormalizer2CharFilter.cs    | 235 -------------------
 .../ICU/ICUNormalizer2CharFilterFactory.cs      |  60 -----
 .../Analysis/ICU/ICUNormalizer2Filter.cs        |  60 -----
 .../Analysis/ICU/ICUNormalizer2FilterFactory.cs |  59 -----
 .../Analysis/ICU/ICUTransformFilter.cs          | 152 ------------
 .../Analysis/ICU/ICUTransformFilterFactory.cs   |  38 ---
 .../ICU/Segmentation/BreakIteratorWrapper.cs    | 166 -------------
 .../ICU/Segmentation/CharArrayIterator.cs       | 134 -----------
 .../ICU/Segmentation/CompositeBreakIterator.cs  | 132 -----------
 .../Segmentation/DefaultICUTokenizerConfig.cs   | 127 ----------
 .../Analysis/ICU/Segmentation/ICUTokenizer.cs   | 229 ------------------
 .../ICU/Segmentation/ICUTokenizerConfig.cs      |  33 ---
 .../ICU/Segmentation/ICUTokenizerFactory.cs     | 139 -----------
 .../Analysis/ICU/Segmentation/ScriptIterator.cs | 206 ----------------
 .../ICU/TokenAttributes/ScriptAttribute.cs      |  42 ----
 .../ICU/TokenAttributes/ScriptAttributeImpl.cs  |  80 -------
 .../Analysis/Icu/ICUFoldingFilter.cs            |  32 +++
 .../Analysis/Icu/ICUFoldingFilterFactory.cs     |  31 +++
 .../Analysis/Icu/ICUNormalizer2CharFilter.cs    | 235 +++++++++++++++++++
 .../Icu/ICUNormalizer2CharFilterFactory.cs      |  60 +++++
 .../Analysis/Icu/ICUNormalizer2Filter.cs        |  60 +++++
 .../Analysis/Icu/ICUNormalizer2FilterFactory.cs |  59 +++++
 .../Analysis/Icu/ICUTransformFilter.cs          | 152 ++++++++++++
 .../Analysis/Icu/ICUTransformFilterFactory.cs   |  38 +++
 .../Icu/Segmentation/BreakIteratorWrapper.cs    | 166 +++++++++++++
 .../Icu/Segmentation/CharArrayIterator.cs       | 134 +++++++++++
 .../Icu/Segmentation/CompositeBreakIterator.cs  | 132 +++++++++++
 .../Segmentation/DefaultICUTokenizerConfig.cs   | 127 ++++++++++
 .../Analysis/Icu/Segmentation/ICUTokenizer.cs   | 229 ++++++++++++++++++
 .../Icu/Segmentation/ICUTokenizerConfig.cs      |  33 +++
 .../Icu/Segmentation/ICUTokenizerFactory.cs     | 139 +++++++++++
 .../Analysis/Icu/Segmentation/ScriptIterator.cs | 206 ++++++++++++++++
 .../Icu/TokenAttributes/ScriptAttribute.cs      |  42 ++++
 .../Icu/TokenAttributes/ScriptAttributeImpl.cs  |  80 +++++++
 36 files changed, 1955 insertions(+), 1955 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
deleted file mode 100644
index 4ca8278..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilter.cs
+++ /dev/null
@@ -1,32 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Icu;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public sealed class ICUFoldingFilter : ICUNormalizer2Filter
-//    {
-//        private static readonly Normalizer2 normalizer;
-
-//        /// <summary>
-//        /// Create a new ICUFoldingFilter on the specified input
-//        /// </summary>
-//        public ICUFoldingFilter(TokenStream input)
-//            : base(input, normalizer)
-//        {
-//        }
-
-//        static ICUFoldingFilter()
-//        {
-//            normalizer = Normalizer2.GetInstance(
-//                typeof(ICUFoldingFilter).Assembly.FindAndGetManifestResourceStream(typeof(ICUFoldingFilter), "utr30.nrm"),
-//                "utr30", Normalizer2.Mode.COMPOSE);
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
deleted file mode 100644
index c25cf93..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUFoldingFilterFactory.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Lucene.Net.Analysis.Util;
-//using System;
-//using System.Collections.Generic;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public class ICUFoldingFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
-//    {
-//        /// <summary>Creates a new ICUFoldingFilterFactory</summary>
-//        public ICUFoldingFilterFactory(IDictionary<string, string> args)
-//            : base(args)
-//        {
-//            if (args.Count != 0)
-//            {
-//                throw new ArgumentException("Unknown parameters: " + args);
-//            }
-//        }
-
-//        public override TokenStream Create(TokenStream input)
-//        {
-//            return new ICUFoldingFilter(input);
-//        }
-
-//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
-//        {
-//            return this;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
deleted file mode 100644
index 4254298..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilter.cs
+++ /dev/null
@@ -1,235 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Lucene.Net.Analysis.CharFilters;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.IO;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    /// <summary>
-//    /// Normalize token text with ICU's <see cref="Normalizer2"/>.
-//    /// </summary>
-//    public sealed class ICUNormalizer2CharFilter : BaseCharFilter
-//    {
-//        private static readonly int IO_BUFFER_SIZE = 128;
-
-//        private readonly Normalizer2 normalizer;
-//        private readonly StringBuilder inputBuffer = new StringBuilder();
-//        private readonly StringBuilder resultBuffer = new StringBuilder();
-
-//        private bool inputFinished;
-//        private bool afterQuickCheckYes;
-//        private int checkedInputBoundary;
-//        private int charCount;
-
-
-//        /**
-//         * Create a new Normalizer2CharFilter that combines NFKC normalization, Case
-//         * Folding, and removes Default Ignorables (NFKC_Casefold)
-//         */
-//        public ICUNormalizer2CharFilter(TextReader input)
-//            : this(input, new Normalizer2(Icu.Normalizer.UNormalizationMode.UNORM_NFKC) /*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
-//        {
-//        }
-
-//        /**
-//         * Create a new Normalizer2CharFilter with the specified Normalizer2
-//         * @param in text
-//         * @param normalizer normalizer to use
-//         */
-//        public ICUNormalizer2CharFilter(TextReader input, Normalizer2 normalizer)
-//            : base(input)
-//        {
-//            if (normalizer == null)
-//            {
-//                throw new ArgumentNullException("normalizer");
-//            }
-//            this.normalizer = normalizer;
-//        }
-
-//        public override int Read(char[] cbuf, int off, int len)
-//        {
-//            if (off < 0) throw new ArgumentException("off < 0");
-//            if (off >= cbuf.Length) throw new ArgumentException("off >= cbuf.length");
-//            if (len <= 0) throw new ArgumentException("len <= 0");
-
-//            while (!inputFinished || inputBuffer.Length > 0 || resultBuffer.Length > 0)
-//            {
-//                int retLen;
-
-//                if (resultBuffer.Length > 0)
-//                {
-//                    retLen = OutputFromResultBuffer(cbuf, off, len);
-//                    if (retLen > 0)
-//                    {
-//                        return retLen;
-//                    }
-//                }
-
-//                int resLen = ReadAndNormalizeFromInput();
-//                if (resLen > 0)
-//                {
-//                    retLen = OutputFromResultBuffer(cbuf, off, len);
-//                    if (retLen > 0)
-//                    {
-//                        return retLen;
-//                    }
-//                }
-
-//                ReadInputToBuffer();
-//            }
-
-//            return -1;
-//        }
-
-//        private readonly char[] tmpBuffer = new char[IO_BUFFER_SIZE];
-
-//        private int ReadInputToBuffer()
-//        {
-//            int len = m_input.Read(tmpBuffer, 0, tmpBuffer.Length);
-//            if (len == -1)
-//            {
-//                inputFinished = true;
-//                return 0;
-//            }
-//            inputBuffer.Append(tmpBuffer, 0, len);
-
-//            // if checkedInputBoundary was at the end of a buffer, we need to check that char again
-//            checkedInputBoundary = Math.Max(checkedInputBoundary - 1, 0);
-//            // this loop depends on 'isInert' (changes under normalization) but looks only at characters.
-//            // so we treat all surrogates as non-inert for simplicity
-//            if (normalizer.IsInert(tmpBuffer[len - 1]) && !char.IsSurrogate(tmpBuffer[len - 1]))
-//            {
-//                return len;
-//            }
-//            else return len + ReadInputToBuffer();
-//        }
-
-//        private int ReadAndNormalizeFromInput()
-//        {
-//            if (inputBuffer.Length <= 0)
-//            {
-//                afterQuickCheckYes = false;
-//                return 0;
-//            }
-//            if (!afterQuickCheckYes)
-//            {
-//                int resLen2 = ReadFromInputWhileSpanQuickCheckYes();
-//                afterQuickCheckYes = true;
-//                if (resLen2 > 0) return resLen2;
-//            }
-//            int resLen = ReadFromIoNormalizeUptoBoundary();
-//            if (resLen > 0)
-//            {
-//                afterQuickCheckYes = false;
-//            }
-//            return resLen;
-//        }
-
-//        private int ReadFromInputWhileSpanQuickCheckYes()
-//        {
-//            int end = normalizer.spanQuickCheckYes(inputBuffer);
-//            if (end > 0)
-//            {
-//                //resultBuffer.Append(inputBuffer.subSequence(0, end));
-//                resultBuffer.Append(inputBuffer.ToString(0, end));
-//                //inputBuffer.delete(0, end);
-//                inputBuffer.Remove(0, end);
-//                checkedInputBoundary = Math.Max(checkedInputBoundary - end, 0);
-//                charCount += end;
-//            }
-//            return end;
-//        }
-
-//        private int ReadFromIoNormalizeUptoBoundary()
-//        {
-//            // if there's no buffer to normalize, return 0
-//            if (inputBuffer.Length <= 0)
-//            {
-//                return 0;
-//            }
-
-//            bool foundBoundary = false;
-//            int bufLen = inputBuffer.Length;
-
-//            while (checkedInputBoundary <= bufLen - 1)
-//            {
-//                int charLen = Character.CharCount(inputBuffer.CodePointAt(checkedInputBoundary));
-//                checkedInputBoundary += charLen;
-//                if (checkedInputBoundary < bufLen && normalizer.HasBoundaryBefore(inputBuffer
-//                  .CodePointAt(checkedInputBoundary)))
-//                {
-//                    foundBoundary = true;
-//                    break;
-//                }
-//            }
-//            if (!foundBoundary && checkedInputBoundary >= bufLen && inputFinished)
-//            {
-//                foundBoundary = true;
-//                checkedInputBoundary = bufLen;
-//            }
-
-//            if (!foundBoundary)
-//            {
-//                return 0;
-//            }
-
-//            return NormalizeInputUpto(checkedInputBoundary);
-//        }
-
-//        private int NormalizeInputUpto(int length)
-//        {
-//            int destOrigLen = resultBuffer.Length;
-//            normalizer.NormalizeSecondAndAppend(resultBuffer, inputBuffer.ToString(0, length));
-//              //inputBuffer.SubSequence(0, length));
-
-//            //inputBuffer.Delete(0, length);
-//            inputBuffer.Remove(0, length);
-//            checkedInputBoundary = Math.Max(checkedInputBoundary - length, 0);
-//            int resultLength = resultBuffer.Length - destOrigLen;
-//            RecordOffsetDiff(length, resultLength);
-//            return resultLength;
-//        }
-
-//        private void RecordOffsetDiff(int inputLength, int outputLength)
-//        {
-//            if (inputLength == outputLength)
-//            {
-//                charCount += outputLength;
-//                return;
-//            }
-//            int diff = inputLength - outputLength;
-//            int cumuDiff = LastCumulativeDiff;
-//            if (diff < 0)
-//            {
-//                for (int i = 1; i <= -diff; ++i)
-//                {
-//                    AddOffCorrectMap(charCount + i, cumuDiff - i);
-//                }
-//            }
-//            else
-//            {
-//                AddOffCorrectMap(charCount + outputLength, cumuDiff + diff);
-//            }
-//            charCount += outputLength;
-//        }
-
-//        private int OutputFromResultBuffer(char[] cbuf, int begin, int len)
-//        {
-//            len = Math.Min(resultBuffer.Length, len);
-//            //resultBuffer.GetChars(0, len, cbuf, begin);
-//            resultBuffer.CopyTo(0, cbuf, begin, len);
-//            if (len > 0)
-//            {
-//                //resultBuffer.delete(0, len);
-//                resultBuffer.Remove(0, len);
-//            }
-//            return len;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
deleted file mode 100644
index bd4cbe5..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2CharFilterFactory.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Icu;
-//using Lucene.Net.Analysis.Util;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.IO;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public class ICUNormalizer2CharFilterFactory : CharFilterFactory, IMultiTermAwareComponent
-//    {
-//        private readonly Normalizer2 normalizer;
-
-//        /// <summary>Creates a new ICUNormalizer2CharFilterFactory</summary>
-//        public ICUNormalizer2CharFilterFactory(IDictionary<string, string> args)
-//            : base(args)
-//        {
-//            string name = Get(args, "name", "NFKC");
-//            //string name = Get(args, "name", "nfkc_cf");
-//            //string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");
-//            //Normalizer2 normalizer = Normalizer2.getInstance
-//            //    (null, name, "compose".Equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
-
-//            var mode = (Icu.Normalizer.UNormalizationMode)Enum.Parse(typeof(Icu.Normalizer.UNormalizationMode), "UNORM_" + name);
-//            Normalizer2 normalizer = new Normalizer2(mode);
-
-//            string filter = Get(args, "filter");
-//            if (filter != null)
-//            {
-//                //UnicodeSet set = new UnicodeSet(filter);
-//                var set = UnicodeSet.ToCharacters(filter);
-//                if (set.Any())
-//                {
-//                    //set.freeze();
-//                    normalizer = new FilteredNormalizer2(normalizer, set);
-//                }
-//            }
-//            if (args.Count != 0)
-//            {
-//                throw new ArgumentException("Unknown parameters: " + args);
-//            }
-//            this.normalizer = normalizer;
-//        }
-
-//        public override TextReader Create(TextReader input)
-//        {
-//            return new ICUNormalizer2CharFilter(input, normalizer);
-//        }
-
-//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
-//        {
-//            return this;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
deleted file mode 100644
index bca3d24..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2Filter.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Icu;
-//using Lucene.Net.Analysis.TokenAttributes;
-//using Lucene.Net.Support;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public class ICUNormalizer2Filter : TokenFilter
-//    {
-//        private readonly ICharTermAttribute termAtt;
-//        private readonly Normalizer2 normalizer;
-
-//        /// <summary>
-//        /// Create a new <see cref="Normalizer2Filter"/> that combines NFKC normalization, Case
-//        /// Folding, and removes Default Ignorables (NFKC_Casefold)
-//        /// </summary>
-//        /// <param name="input"></param>
-//        public ICUNormalizer2Filter(TokenStream input)
-//            : this(input, new Normalizer2(Normalizer.UNormalizationMode.UNORM_NFKC) /*Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)*/)
-//        {
-//        }
-
-//        /// <summary>
-//        /// Create a new <see cref="Normalizer2Filter"/> with the specified <see cref="Normalizer2"/>
-//        /// </summary>
-//        /// <param name="input">stream</param>
-//        /// <param name="normalizer">normalizer to use</param>
-//        public ICUNormalizer2Filter(TokenStream input, Normalizer2 normalizer)
-//            : base(input)
-//        {
-//            this.normalizer = normalizer;
-//            this.termAtt = AddAttribute<ICharTermAttribute>();
-//        }
-
-//        public override sealed bool IncrementToken()
-//        {
-//            if (m_input.IncrementToken())
-//            {
-//                var term = termAtt.ToString();
-//                try
-//                {
-//                    if (!normalizer.IsNormalized(term))
-//                    {
-//                        termAtt.SetEmpty().Append(normalizer.Normalize(term));
-//                    }
-//                }
-//                catch (System.Exception ex)
-//                {
-
-//                }
-//                return true;
-//            }
-//            else
-//            {
-//                return false;
-//            }
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
deleted file mode 100644
index c17fb7f..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUNormalizer2FilterFactory.cs
+++ /dev/null
@@ -1,59 +0,0 @@
-// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
-
-//using Icu;
-//using Lucene.Net.Analysis.Util;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public class ICUNormalizer2FilterFactory : TokenFilterFactory, IMultiTermAwareComponent
-//    {
-//        private readonly Normalizer2 normalizer;
-
-//        /// <summary>Creates a new ICUNormalizer2FilterFactory</summary>
-//        public ICUNormalizer2FilterFactory(IDictionary<string, string> args)
-//            : base(args)
-//        {
-//            string name = Get(args, "name", "NFKC");
-//            //string name = Get(args, "name", "nfkc_cf");
-//            //string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");
-
-//            var mode = (Normalizer.UNormalizationMode)Enum.Parse(typeof(Normalizer.UNormalizationMode), "UNORM_" + name);
-//            Normalizer2 normalizer = new Normalizer2(mode);
-
-//            //Normalizer2 normalizer = Normalizer2.getInstance
-//            //    (null, name, "compose".Equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
-
-//            string filter = Get(args, "filter");
-//            if (filter != null)
-//            {
-//                //UnicodeSet set = new UnicodeSet(filter);
-//                var set = UnicodeSet.ToCharacters(filter);
-//                if (set.Any())
-//                {
-//                    //set.freeze();
-//                    normalizer = new FilteredNormalizer2(normalizer, set);
-//                }
-//            }
-//            if (args.Count != 0)
-//            {
-//                throw new ArgumentException("Unknown parameters: " + args);
-//            }
-//            this.normalizer = normalizer;
-//        }
-
-//        // TODO: support custom normalization
-//        public override TokenStream Create(TokenStream input)
-//        {
-//            return new ICUNormalizer2Filter(input, normalizer);
-//        }
-
-//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
-//        {
-//            return this;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
deleted file mode 100644
index 7f22c3d..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilter.cs
+++ /dev/null
@@ -1,152 +0,0 @@
-// LUCENENET TODO: Port issues - missing Transliterator dependency from icu.net
-
-//using Lucene.Net.Analysis.TokenAttributes;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public sealed class ICUTransformFilter : TokenFilter
-//    {
-//        // Transliterator to transform the text
-//        private readonly Transliterator transform;
-
-//        // Reusable position object
-//        private readonly Transliterator.Position position = new Transliterator.Position();
-
-//        // term attribute, will be updated with transformed text.
-//        private readonly ICharTermAttribute termAtt;
-
-//        // Wraps a termAttribute around the replaceable interface.
-//        private readonly ReplaceableTermAttribute replaceableAttribute = new ReplaceableTermAttribute();
-
-//        /// <summary>
-//        /// Create a new ICUTransformFilter that transforms text on the given stream.
-//        /// </summary>
-//        /// <param name="input"><see cref="TokenStream"/> to filter.</param>
-//        /// <param name="transform">Transliterator to transform the text.</param>
-//        public ICUTransformFilter(TokenStream input, Transliterator transform)
-//            : base(input)
-//        {
-//            this.transform = transform;
-//            this.termAtt = AddAttribute<ICharTermAttribute>();
-
-//            /* 
-//             * This is cheating, but speeds things up a lot.
-//             * If we wanted to use pkg-private APIs we could probably do better.
-//             */
-//            if (transform.getFilter() == null && transform is com.ibm.icu.text.RuleBasedTransliterator)
-//            {
-//                UnicodeSet sourceSet = transform.getSourceSet();
-//                if (sourceSet != null && !sourceSet.isEmpty())
-//                    transform.setFilter(sourceSet);
-//            }
-//        }
-
-//        public override bool IncrementToken()
-//        {
-//            /*
-//             * Wrap around replaceable. clear the positions, and transliterate.
-//             */
-//            if (m_input.IncrementToken())
-//            {
-//                replaceableAttribute.SetText(termAtt);
-
-//                int length = termAtt.Length;
-//                position.start = 0;
-//                position.limit = length;
-//                position.contextStart = 0;
-//                position.contextLimit = length;
-
-//                transform.FilteredTransliterate(replaceableAttribute, position, false);
-//                return true;
-//            }
-//            else
-//            {
-//                return false;
-//            }
-//        }
-
-//        /// <summary>
-//        /// Wrap a <see cref="ICharTermAttribute"/> with the Replaceable API.
-//        /// </summary>
-//        private sealed class ReplaceableTermAttribute //: IReplaceable
-//        {
-//            private char[] buffer;
-//            private int length;
-//            private ICharTermAttribute token;
-
-//            public void SetText(ICharTermAttribute token)
-//            {
-//                this.token = token;
-//                this.buffer = token.Buffer;
-//                this.length = token.Length;
-//            }
-
-//            public int Char32At(int pos)
-//            {
-//                return UTF16.charAt(buffer, 0, length, pos);
-//            }
-
-//            public char CharAt(int pos)
-//            {
-//                return buffer[pos];
-//            }
-
-//            public void Copy(int start, int limit, int dest)
-//            {
-//                char[] text = new char[limit - start];
-//                GetChars(start, limit, text, 0);
-//                Replace(dest, dest, text, 0, limit - start);
-//            }
-
-//            public void GetChars(int srcStart, int srcLimit, char[] dst, int dstStart)
-//            {
-//                System.Array.Copy(buffer, srcStart, dst, dstStart, srcLimit - srcStart);
-//            }
-
-//            public bool HasMetaData
-//            {
-//                get { return false; }
-//            }
-
-//            public int Length
-//            {
-//                get { return length; }
-//            }
-
-//            public void Replace(int start, int limit, string text)
-//            {
-//                int charsLen = text.Length;
-//                int newLength = ShiftForReplace(start, limit, charsLen);
-//                // insert the replacement text
-//                //text.getChars(0, charsLen, buffer, start);
-//                text.CopyTo(0, buffer, start, charsLen);
-//                token.Length = (length = newLength);
-//            }
-
-//            public void Replace(int start, int limit, char[] text, int charsStart,
-//                int charsLen)
-//            {
-//                // shift text if necessary for the replacement
-//                int newLength = ShiftForReplace(start, limit, charsLen);
-//                // insert the replacement text
-//                System.Array.Copy(text, charsStart, buffer, start, charsLen);
-//                token.Length = (length = newLength);
-//            }
-
-//            /// <summary>shift text (if necessary) for a replacement operation</summary>
-//            private int ShiftForReplace(int start, int limit, int charsLen)
-//            {
-//                int replacementLength = limit - start;
-//                int newLength = length - replacementLength + charsLen;
-//                // resize if necessary
-//                if (newLength > length)
-//                    buffer = token.ResizeBuffer(newLength);
-//                // if the substring being replaced is longer or shorter than the
-//                // replacement, need to shift things around
-//                if (replacementLength != charsLen && limit < length)
-//                    System.Array.Copy(buffer, limit, buffer, start + charsLen, length - limit);
-//                return newLength;
-//            }
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
deleted file mode 100644
index 081ebf5..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/ICUTransformFilterFactory.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-// LUCENENET TODO: Port issues - missing Transliterator dependency from icu.net
-
-//using Lucene.Net.Analysis.Util;
-//using System;
-//using System.Collections.Generic;
-
-//namespace Lucene.Net.Analysis.ICU
-//{
-//    public class ICUTransformFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
-//    {
-//        private readonly Transliterator transliterator;
-
-//        // TODO: add support for custom rules
-//        /// <summary>Creates a new ICUTransformFilterFactory</summary>
-//        public ICUTransformFilterFactory(IDictionary<string, string> args)
-//            : base(args)
-//        {
-//            string id = Require(args, "id");
-//            string direction = Get(args, "direction", new string[] { "forward", "reverse" }, "forward", false);
-//            int dir = "forward".Equals(direction) ? Transliterator.FORWARD : Transliterator.REVERSE;
-//            transliterator = Transliterator.getInstance(id, dir);
-//            if (args.Count != 0)
-//            {
-//                throw new ArgumentException("Unknown parameters: " + args);
-//            }
-//        }
-
-//        public override TokenStream Create(TokenStream input)
-//        {
-//            return new ICUTransformFilter(input, transliterator);
-//        }
-
-//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
-//        {
-//            return this;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
deleted file mode 100644
index c124a88..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/BreakIteratorWrapper.cs
+++ /dev/null
@@ -1,166 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Icu;
-//using Lucene.Net.Analysis.Util;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// Contain all the issues surrounding BreakIterators in ICU in one place.
-//    /// Basically this boils down to the fact that they aren't very friendly to any
-//    /// sort of OO design.
-//    /// <para/>
-//    /// http://bugs.icu-project.org/trac/ticket/5901: RBBI.getRuleStatus(), hoist to
-//    /// BreakIterator from RuleBasedBreakIterator
-//    /// <para/>
-//    /// DictionaryBasedBreakIterator is a subclass of RuleBasedBreakIterator, but
-//    /// doesn't actually behave as a subclass: it always returns 0 for
-//    /// getRuleStatus(): 
-//    /// http://bugs.icu-project.org/trac/ticket/4730: Thai RBBI, no boundary type
-//    /// tags
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    internal abstract class BreakIteratorWrapper
-//    {
-//        protected readonly CharArrayIterator textIterator = new CharArrayIterator();
-//        protected char[] text;
-//        protected int start;
-//        protected int length;
-
-//        public abstract int Next();
-//        public abstract int Current { get; }
-//        public abstract int GetRuleStatus();
-//        public abstract void SetText(CharacterIterator text);
-
-//        public void SetText(char[] text, int start, int length)
-//        {
-//            this.text = text;
-//            this.start = start;
-//            this.length = length;
-//            textIterator.SetText(text, start, length);
-//            SetText(textIterator);
-//        }
-
-//        /**
-//         * If its a RuleBasedBreakIterator, the rule status can be used for token type. If its
-//         * any other BreakIterator, the rulestatus method is not available, so treat
-//         * it like a generic BreakIterator.
-//         */
-//        public static BreakIteratorWrapper Wrap(Icu.BreakIterator breakIterator)
-//        {
-//            if (breakIterator is Icu.RuleBasedBreakIterator)
-//                return new RBBIWrapper((Icu.RuleBasedBreakIterator)breakIterator);
-//            else
-//                return new BIWrapper(breakIterator);
-//        }
-
-//        /**
-//         * RuleBasedBreakIterator wrapper: RuleBasedBreakIterator (as long as its not
-//         * a DictionaryBasedBreakIterator) behaves correctly.
-//         */
-//        private sealed class RBBIWrapper : BreakIteratorWrapper
-//        {
-//            private readonly Icu.RuleBasedBreakIterator rbbi;
-
-//            internal RBBIWrapper(Icu.RuleBasedBreakIterator rbbi)
-//            {
-//                this.rbbi = rbbi;
-//            }
-
-//            public override int Current
-//            {
-//                get { return rbbi.Current; }
-//            }
-
-//            public override int GetRuleStatus()
-//            {
-//                return rbbi.GetRuleStatus();
-//            }
-
-//            public override int Next()
-//            {
-//                return rbbi.Next();
-//            }
-
-//            public override void SetText(CharacterIterator text)
-//            {
-//                rbbi.SetText(text);
-//            }
-//        }
-
-//        /**
-//         * Generic BreakIterator wrapper: Either the rulestatus method is not
-//         * available or always returns 0. Calculate a rulestatus here so it behaves
-//         * like RuleBasedBreakIterator.
-//         * 
-//         * Note: This is slower than RuleBasedBreakIterator.
-//         */
-//        private sealed class BIWrapper : BreakIteratorWrapper
-//        {
-//            private readonly Support.BreakIterator bi;
-//            private int status;
-
-//            internal BIWrapper(Support.BreakIterator bi)
-//            {
-//                this.bi = bi;
-//            }
-
-//            public override int Current
-//            {
-//                get { return bi.Current; }
-//            }
-
-//            public override int GetRuleStatus()
-//            {
-//                return status;
-//            }
-
-//            public override int Next()
-//            {
-//                int current = bi.Current;
-//                int next = bi.Next();
-//                status = CalcStatus(current, next);
-//                return next;
-//            }
-
-//            private int CalcStatus(int current, int next)
-//            {
-//                if (current == Support.BreakIterator.DONE || next == Support.BreakIterator.DONE)
-//                    return RuleBasedBreakIterator.WORD_NONE;
-
-//                int begin = start + current;
-//                int end = start + next;
-
-//                int codepoint;
-//                for (int i = begin; i < end; i += UTF16.getCharCount(codepoint))
-//                {
-//                    codepoint = UTF16.charAt(text, 0, end, begin);
-
-//                    if (UCharacter.isDigit(codepoint))
-//                        return RuleBasedBreakIterator.WORD_NUMBER;
-//                    else if (UCharacter.isLetter(codepoint))
-//                    {
-//                        // TODO: try to separately specify ideographic, kana? 
-//                        // [currently all bundled as letter for this case]
-//                        return RuleBasedBreakIterator.WORD_LETTER;
-//                    }
-//                }
-
-//                return RuleBasedBreakIterator.WORD_NONE;
-//            }
-
-//            public override void SetText(CharacterIterator text)
-//            {
-//                bi.SetText(text);
-//                status = RuleBasedBreakIterator.WORD_NONE;
-//            }
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
deleted file mode 100644
index 209d583..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CharArrayIterator.cs
+++ /dev/null
@@ -1,134 +0,0 @@
-using Lucene.Net.Support;
-using System;
-using System.Diagnostics.CodeAnalysis;
-
-namespace Lucene.Net.Analysis.Icu.Segmentation
-{
-    /// <summary>
-    /// Wraps a char[] as CharacterIterator for processing with a BreakIterator
-    /// <para/>
-    /// @lucene.experimental
-    /// </summary>
-    internal sealed class CharArrayIterator : CharacterIterator
-    {
-        private char[] array;
-        private int start;
-        private int index;
-        private int length;
-        private int limit;
-
-        [WritableArray]
-        [SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
-        public char[] Text
-        {
-            get
-            {
-                return array;
-            }
-        }
-
-        public int Start
-        {
-            get { return start; }
-        }
-
-        public int Length
-        {
-            get { return length; }
-        }
-
-        /// <summary>
-        /// Set a new region of text to be examined by this iterator
-        /// </summary>
-        /// <param name="array">text buffer to examine</param>
-        /// <param name="start">offset into buffer</param>
-        /// <param name="length"> maximum length to examine</param>
-        public void SetText(char[] array, int start, int length)
-        {
-            this.array = array;
-            this.start = start;
-            this.index = start;
-            this.length = length;
-            this.limit = start + length;
-        }
-
-        public override char Current
-        {
-            get { return (index == limit) ? DONE : array[index]; }
-        }
-
-        public override char First()
-        {
-            index = start;
-            return Current;
-        }
-
-        public override int BeginIndex
-        {
-            get { return 0; }
-        }
-
-        public override int EndIndex
-        {
-            get { return length; }
-        }
-
-        public override int Index
-        {
-            get { return index - start; }
-        }
-
-        public override char Last()
-        {
-            index = (limit == start) ? limit : limit - 1;
-            return Current;
-        }
-
-        public override char Next()
-        {
-            if (++index >= limit)
-            {
-                index = limit;
-                return DONE;
-            }
-            else
-            {
-                return Current;
-            }
-        }
-
-        public override char Previous()
-        {
-            if (--index < start)
-            {
-                index = start;
-                return DONE;
-            }
-            else
-            {
-                return Current;
-            }
-        }
-
-        public override char SetIndex(int position)
-        {
-            if (position < BeginIndex || position > EndIndex)
-                throw new ArgumentException("Illegal Position: " + position);
-            index = start + position;
-            return Current;
-        }
-
-        public override string GetTextAsString()
-        {
-            return new string(array);
-        }
-
-        public override object Clone()
-        {
-            CharArrayIterator clone = new CharArrayIterator();
-            clone.SetText(array, start, length);
-            clone.index = index;
-            return clone;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
deleted file mode 100644
index a004193..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/CompositeBreakIterator.cs
+++ /dev/null
@@ -1,132 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// An internal BreakIterator for multilingual text, following recommendations
-//    /// from: UAX #29: Unicode Text Segmentation. (http://unicode.org/reports/tr29/)
-//    /// <para/>
-//    /// See http://unicode.org/reports/tr29/#Tailoring for the motivation of this
-//    /// design.
-//    /// <para/>
-//    /// Text is first divided into script boundaries. The processing is then
-//    /// delegated to the appropriate break iterator for that specific script.
-//    /// <para/>
-//    /// This break iterator also allows you to retrieve the ISO 15924 script code
-//    /// associated with a piece of text.
-//    /// <para/>
-//    /// See also UAX #29, UTR #24
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    internal sealed class CompositeBreakIterator
-//    {
-//        private readonly ICUTokenizerConfig config;
-//        private readonly BreakIteratorWrapper[] wordBreakers = new BreakIteratorWrapper[UScript.CODE_LIMIT];
-
-//        private BreakIteratorWrapper rbbi;
-//        private readonly ScriptIterator scriptIterator;
-
-//        private char[] text;
-
-//        public CompositeBreakIterator(ICUTokenizerConfig config)
-//        {
-//            this.config = config;
-//            this.scriptIterator = new ScriptIterator(config.CombineCJ);
-//        }
-
-//        /**
-//         * Retrieve the next break position. If the RBBI range is exhausted within the
-//         * script boundary, examine the next script boundary.
-//         * 
-//         * @return the next break position or BreakIterator.DONE
-//         */
-//        public int Next()
-//        {
-//            int next = rbbi.Next();
-//            while (next == Support.BreakIterator.DONE && scriptIterator.Next())
-//            {
-//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
-//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
-//                    scriptIterator.GetScriptLimit() - scriptIterator.GetScriptStart());
-//                next = rbbi.Next();
-//            }
-//            return (next == Support.BreakIterator.DONE) ? Support.BreakIterator.DONE : next
-//                + scriptIterator.GetScriptStart();
-//        }
-
-//        /**
-//         * Retrieve the current break position.
-//         * 
-//         * @return the current break position or BreakIterator.DONE
-//         */
-//        public int Current
-//        {
-//            get
-//            {
-//                int current = rbbi.Current;
-//                return (current == Support.BreakIterator.DONE) ? Support.BreakIterator.DONE : current
-//                    + scriptIterator.GetScriptStart();
-//            }
-//        }
-
-//        /**
-//         * Retrieve the rule status code (token type) from the underlying break
-//         * iterator
-//         * 
-//         * @return rule status code (see RuleBasedBreakIterator constants)
-//         */
-//        public int GetRuleStatus()
-//        {
-//            return rbbi.GetRuleStatus();
-//        }
-
-//        /**
-//         * Retrieve the UScript script code for the current token. This code can be
-//         * decoded with UScript into a name or ISO 15924 code.
-//         * 
-//         * @return UScript script code for the current token.
-//         */
-//        public int GetScriptCode()
-//        {
-//            return scriptIterator.GetScriptCode();
-//        }
-
-//        /**
-//         * Set a new region of text to be examined by this iterator
-//         * 
-//         * @param text buffer of text
-//         * @param start offset into buffer
-//         * @param length maximum length to examine
-//         */
-//        public void SetText(char[] text, int start, int length)
-//        {
-//            this.text = text;
-//            scriptIterator.SetText(text, start, length);
-//            if (scriptIterator.Next())
-//            {
-//                rbbi = GetBreakIterator(scriptIterator.GetScriptCode());
-//                rbbi.SetText(text, scriptIterator.GetScriptStart(),
-//                    scriptIterator.GetScriptLimit() - scriptIterator.GetScriptStart());
-//            }
-//            else
-//            {
-//                rbbi = GetBreakIterator(UScript.COMMON);
-//                rbbi.SetText(text, 0, 0);
-//            }
-//        }
-
-//        private BreakIteratorWrapper GetBreakIterator(int scriptCode)
-//        {
-//            if (wordBreakers[scriptCode] == null)
-//                wordBreakers[scriptCode] = BreakIteratorWrapper.Wrap(config.GetBreakIterator(scriptCode));
-//            return wordBreakers[scriptCode];
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
deleted file mode 100644
index fc2a989..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/DefaultICUTokenizerConfig.cs
+++ /dev/null
@@ -1,127 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Icu;
-//using Lucene.Net.Analysis.Standard;
-//using Lucene.Net.Support;
-//using System;
-//using System.Collections.Generic;
-//using System.Globalization;
-//using System.IO;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// Default <see cref="ICUTokenizerConfig"/> that is generally applicable
-//    /// to many languages.
-//    /// </summary>
-//    /// <remarks>
-//    /// Generally tokenizes Unicode text according to UAX#29 
-//    /// ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}), 
-//    /// but with the following tailorings:
-//    /// <list type="bullet">
-//    ///     <item><description>Thai, Lao, and CJK text is broken into words with a dictionary.</description></item>
-//    ///     <item><description>Myanmar, and Khmer text is broken into syllables based on custom BreakIterator rules.</description></item>
-//    /// </list>
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </remarks>
-//    public class DefaultICUTokenizerConfig : ICUTokenizerConfig
-//    {
-//        /** Token type for words containing ideographic characters */
-//        public static readonly string WORD_IDEO = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
-//        /** Token type for words containing Japanese hiragana */
-//        public static readonly string WORD_HIRAGANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
-//        /** Token type for words containing Japanese katakana */
-//        public static readonly string WORD_KATAKANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
-//        /** Token type for words containing Korean hangul  */
-//        public static readonly string WORD_HANGUL = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
-//        /** Token type for words that contain letters */
-//        public static readonly string WORD_LETTER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
-//        /** Token type for words that appear to be numbers */
-//        public static readonly string WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
-
-//        /*
-//         * the default breakiterators in use. these can be expensive to
-//         * instantiate, cheap to clone.
-//         */
-//        // we keep the cjk breaking separate, thats because it cannot be customized (because dictionary
-//        // is only triggered when kind = WORD, but kind = LINE by default and we have no non-evil way to change it)
-//        private static readonly Icu.BreakIterator cjkBreakIterator = new Icu.RuleBasedBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, new Locale()); //BreakIterator.getWordInstance(ULocale.ROOT);
-//                                                                                                                                                                                // the same as ROOT, except no dictionary segmentation for cjk
-//        private static readonly Icu.BreakIterator defaultBreakIterator =
-//            ReadBreakIterator("Default.brk");
-//        private static readonly Icu.BreakIterator khmerBreakIterator =
-//            ReadBreakIterator("Khmer.brk");
-//        private static readonly Icu.BreakIterator myanmarBreakIterator =
-//            ReadBreakIterator("Myanmar.brk");
-
-//        // TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
-//        private readonly bool cjkAsWords;
-
-//        /** 
-//         * Creates a new config. This object is lightweight, but the first
-//         * time the class is referenced, breakiterators will be initialized.
-//         * @param cjkAsWords true if cjk text should undergo dictionary-based segmentation, 
-//         *                   otherwise text will be segmented according to UAX#29 defaults.
-//         *                   If this is true, all Han+Hiragana+Katakana words will be tagged as
-//         *                   IDEOGRAPHIC.
-//         */
-//        public DefaultICUTokenizerConfig(bool cjkAsWords)
-//        {
-//            this.cjkAsWords = cjkAsWords;
-//        }
-
-//        public override bool CombineCJ
-//        {
-//            get { return cjkAsWords; }
-//        }
-
-//        public override Icu.BreakIterator GetBreakIterator(int script)
-//        {
-//            switch (script)
-//            {
-//                case UScript.KHMER: return (Icu.BreakIterator)khmerBreakIterator.Clone();
-//                case UScript.MYANMAR: return (Icu.BreakIterator)myanmarBreakIterator.Clone();
-//                case UScript.JAPANESE: return (Icu.BreakIterator)cjkBreakIterator.Clone();
-//                default: return (Icu.BreakIterator)defaultBreakIterator.Clone();
-//            }
-//        }
-
-//        public override string GetType(int script, int ruleStatus)
-//        {
-//            switch (ruleStatus)
-//            {
-//                case RuleBasedBreakIterator.WORD_IDEO:
-//                    return WORD_IDEO;
-//                case RuleBasedBreakIterator.WORD_KANA:
-//                    return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
-//                case RuleBasedBreakIterator.WORD_LETTER:
-//                    return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
-//                case RuleBasedBreakIterator.WORD_NUMBER:
-//                    return WORD_NUMBER;
-//                default: /* some other custom code */
-//                    return "<OTHER>";
-//            }
-//        }
-
-//        private static RuleBasedBreakIterator ReadBreakIterator(string filename)
-//        {
-//            Stream @is =
-//              typeof(DefaultICUTokenizerConfig).Assembly.FindAndGetManifestResourceStream(typeof(DefaultICUTokenizerConfig), filename);
-//            try
-//            {
-//                RuleBasedBreakIterator bi =
-//                    RuleBasedBreakIterator.GetInstanceFromCompiledRules(@is);
-//                @is.Dispose();
-//                return bi;
-//            }
-//            catch (IOException e)
-//            {
-//                throw new Exception(e.ToString(), e);
-//            }
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
deleted file mode 100644
index 7677c0c..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizer.cs
+++ /dev/null
@@ -1,229 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Lucene.Net.Analysis.ICU.TokenAttributes;
-//using Lucene.Net.Analysis.TokenAttributes;
-//using System;
-//using System.Collections.Generic;
-//using System.Diagnostics;
-//using System.IO;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// Breaks text into words according to UAX #29: Unicode Text Segmentation
-//    /// (http://www.unicode.org/reports/tr29/)
-//    /// <para/>
-//    /// Words are broken across script boundaries, then segmented according to
-//    /// the BreakIterator and typing provided by the <see cref="ICUTokenizerConfig"/>
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    /// <seealso cref="ICUTokenizerConfig"/>
-//    public sealed class ICUTokenizer : Tokenizer
-//    {
-//        private static readonly int IOBUFFER = 4096;
-//        private readonly char[] buffer = new char[IOBUFFER];
-//        /** true length of text in the buffer */
-//        private int length = 0;
-//        /** length in buffer that can be evaluated safely, up to a safe end point */
-//        private int usableLength = 0;
-//        /** accumulated offset of previous buffers for this reader, for offsetAtt */
-//        private int offset = 0;
-
-//        private readonly CompositeBreakIterator breaker; /* tokenizes a char[] of text */
-//        private readonly ICUTokenizerConfig config;
-//        private readonly IOffsetAttribute offsetAtt;
-//        private readonly ICharTermAttribute termAtt;
-//        private readonly ITypeAttribute typeAtt;
-//        private readonly IScriptAttribute scriptAtt;
-
-//        /**
-//        * Construct a new ICUTokenizer that breaks text into words from the given
-//        * Reader.
-//        * <p>
-//        * The default script-specific handling is used.
-//        * <p>
-//        * The default attribute factory is used.
-//        * 
-//        * @param input Reader containing text to tokenize.
-//        * @see DefaultICUTokenizerConfig
-//        */
-//        public ICUTokenizer(TextReader input)
-//            : this(input, new DefaultICUTokenizerConfig(true))
-//        {
-//        }
-
-//        /**
-//         * Construct a new ICUTokenizer that breaks text into words from the given
-//         * Reader, using a tailored BreakIterator configuration.
-//         * <p>
-//         * The default attribute factory is used.
-//         *
-//         * @param input Reader containing text to tokenize.
-//         * @param config Tailored BreakIterator configuration 
-//         */
-//        public ICUTokenizer(TextReader input, ICUTokenizerConfig config)
-//            : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config)
-//        {
-//        }
-
-//        /**
-//         * Construct a new ICUTokenizer that breaks text into words from the given
-//         * Reader, using a tailored BreakIterator configuration.
-//         *
-//         * @param factory AttributeFactory to use
-//         * @param input Reader containing text to tokenize.
-//         * @param config Tailored BreakIterator configuration 
-//         */
-//        public ICUTokenizer(AttributeFactory factory, TextReader input, ICUTokenizerConfig config)
-//            : base(factory, input)
-//        {
-//            this.config = config;
-//            breaker = new CompositeBreakIterator(config);
-
-//            this.offsetAtt = AddAttribute<IOffsetAttribute>();
-//            this.termAtt = AddAttribute<ICharTermAttribute>();
-//            this.typeAtt = AddAttribute<ITypeAttribute>();
-//            this.scriptAtt = AddAttribute<IScriptAttribute>();
-//        }
-
-
-//        public override bool IncrementToken()
-//        {
-//            ClearAttributes();
-//            if (length == 0)
-//                Refill();
-//            while (!IncrementTokenBuffer())
-//            {
-//                Refill();
-//                if (length <= 0) // no more bytes to read;
-//                    return false;
-//            }
-//            return true;
-//        }
-
-
-//        public override void Reset()
-//        {
-//            base.Reset();
-//            breaker.SetText(buffer, 0, 0);
-//            length = usableLength = offset = 0;
-//        }
-
-//        public override void End()
-//        {
-//            base.End();
-//            int finalOffset = (length < 0) ? offset : offset + length;
-//            offsetAtt.SetOffset(CorrectOffset(finalOffset), CorrectOffset(finalOffset));
-//        }
-
-//        /*
-//         * This tokenizes text based upon the longest matching rule, and because of 
-//         * this, isn't friendly to a Reader.
-//         * 
-//         * Text is read from the input stream in 4kB chunks. Within a 4kB chunk of
-//         * text, the last unambiguous break point is found (in this implementation:
-//         * white space character) Any remaining characters represent possible partial
-//         * words, so are appended to the front of the next chunk.
-//         * 
-//         * There is the possibility that there are no unambiguous break points within
-//         * an entire 4kB chunk of text (binary data). So there is a maximum word limit
-//         * of 4kB since it will not try to grow the buffer in this case.
-//         */
-
-//        /**
-//         * Returns the last unambiguous break position in the text.
-//         * 
-//         * @return position of character, or -1 if one does not exist
-//         */
-//        private int FindSafeEnd()
-//        {
-//            for (int i = length - 1; i >= 0; i--)
-//                if (char.IsWhiteSpace(buffer[i]))
-//                    return i + 1;
-//            return -1;
-//        }
-
-//        /**
-//         * Refill the buffer, accumulating the offset and setting usableLength to the
-//         * last unambiguous break position
-//         * 
-//         * @throws IOException If there is a low-level I/O error.
-//         */
-//        private void Refill()
-//        {
-//            offset += usableLength;
-//            int leftover = length - usableLength;
-//            System.Array.Copy(buffer, usableLength, buffer, 0, leftover);
-//            int requested = buffer.Length - leftover;
-//            int returned = Read(m_input, buffer, leftover, requested);
-//            length = returned + leftover;
-//            if (returned < requested) /* reader has been emptied, process the rest */
-//                usableLength = length;
-//            else
-//            { /* still more data to be read, find a safe-stopping place */
-//                usableLength = FindSafeEnd();
-//                if (usableLength < 0)
-//                    usableLength = length; /*
-//                                * more than IOBUFFER of text without space,
-//                                * gonna possibly truncate tokens
-//                                */
-//            }
-
-//            breaker.SetText(buffer, 0, Math.Max(0, usableLength));
-//        }
-
-//        // TODO: refactor to a shared readFully somewhere
-//        // (NGramTokenizer does this too):
-//        /** commons-io's readFully, but without bugs if offset != 0 */
-//        private static int Read(TextReader input, char[] buffer, int offset, int length)
-//        {
-//            Debug.Assert(length >= 0, "length must not be negative: " + length);
-
-//            int remaining = length;
-//            while (remaining > 0)
-//            {
-//                int location = length - remaining;
-//                int count = input.Read(buffer, offset + location, remaining);
-//                if (-1 == count)
-//                { // EOF
-//                    break;
-//                }
-//                remaining -= count;
-//            }
-//            return length - remaining;
-//        }
-
-//        /*
-//         * return true if there is a token from the buffer, or null if it is
-//         * exhausted.
-//         */
-//        private bool IncrementTokenBuffer()
-//        {
-//            int start = breaker.Current;
-//            if (start == Support.BreakIterator.DONE)
-//                return false; // BreakIterator exhausted
-
-//            // find the next set of boundaries, skipping over non-tokens (rule status 0)
-//            int end = breaker.Next();
-//            while (start != Support.BreakIterator.DONE && breaker.GetRuleStatus() == 0)
-//            {
-//                start = end;
-//                end = breaker.Next();
-//            }
-
-//            if (start == Support.BreakIterator.DONE)
-//                return false; // BreakIterator exhausted
-
-//            termAtt.CopyBuffer(buffer, start, end - start);
-//            offsetAtt.SetOffset(CorrectOffset(offset + start), CorrectOffset(offset + end));
-//            typeAtt.Type = config.GetType(breaker.GetScriptCode(), breaker.GetRuleStatus());
-//            scriptAtt.Code = breaker.GetScriptCode();
-
-//            return true;
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
deleted file mode 100644
index 0c13316..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerConfig.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Lucene.Net.Support;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// Class that allows for tailored Unicode Text Segmentation on
-//    /// a per-writing system basis.
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    public abstract class ICUTokenizerConfig
-//    {
-//        /// <summary>
-//        /// Sole constructor. (For invocation by subclass 
-//        /// constructors, typically implicit.)
-//        /// </summary>
-//        public ICUTokenizerConfig() { }
-//        /// <summary>
-//        /// Return a breakiterator capable of processing a given script.
-//        /// </summary>
-//        public abstract Icu.BreakIterator GetBreakIterator(int script);
-//        /// <summary>
-//        /// Return a token type value for a given script and BreakIterator rule status.
-//        /// </summary>
-//        public abstract string GetType(int script, int ruleStatus);
-//        /// <summary>
-//        /// true if Han, Hiragana, and Katakana scripts should all be returned as Japanese
-//        /// </summary>
-//        public abstract bool CombineCJ { get; }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
deleted file mode 100644
index 14aa9c0..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ICUTokenizerFactory.cs
+++ /dev/null
@@ -1,139 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Icu;
-//using Lucene.Net.Analysis.Util;
-//using Lucene.Net.Support;
-//using Lucene.Net.Util;
-//using System;
-//using System.Collections.Generic;
-//using System.Diagnostics;
-//using System.IO;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-
-//    public class ICUTokenizerFactory : TokenizerFactory, IResourceLoaderAware
-//    {
-//        internal static readonly string RULEFILES = "rulefiles";
-//        private readonly IDictionary<int, string> tailored;
-//        private ICUTokenizerConfig config;
-//        private readonly bool cjkAsWords;
-
-//        /// <summary>Creates a new ICUTokenizerFactory</summary>
-//        public ICUTokenizerFactory(IDictionary<string, string> args)
-//            : base(args)
-//        {
-//            tailored = new Dictionary<int, string>();
-//            string rulefilesArg = Get(args, RULEFILES);
-//            if (rulefilesArg != null)
-//            {
-//                IList<string> scriptAndResourcePaths = SplitFileNames(rulefilesArg);
-//                foreach (string scriptAndResourcePath in scriptAndResourcePaths)
-//                {
-//                    int colonPos = scriptAndResourcePath.IndexOf(":");
-//                    string scriptCode = scriptAndResourcePath.Substring(0, colonPos - 0).Trim();
-//                    string resourcePath = scriptAndResourcePath.Substring(colonPos + 1).Trim();
-//                    tailored[UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode)] = resourcePath;
-//                }
-//            }
-//            cjkAsWords = GetBoolean(args, "cjkAsWords", true);
-//            if (args.Count != 0)
-//            {
-//                throw new ArgumentException("Unknown parameters: " + args);
-//            }
-//        }
-
-//        public virtual void Inform(IResourceLoader loader)
-//        {
-//            Debug.Assert(tailored != null, "init must be called first!");
-//            if (tailored.Count == 0)
-//            {
-//                config = new DefaultICUTokenizerConfig(cjkAsWords);
-//            }
-//            else
-//            {
-//                config = new DefaultICUTokenizerConfigAnonymousHelper(cjkAsWords, tailored, loader);
-
-//                //BreakIterator[] breakers = new BreakIterator[UScript.CODE_LIMIT];
-//                //foreach (var entry in tailored)
-//                //{
-//                //    int code = entry.Key;
-//                //    string resourcePath = entry.Value;
-//                //    breakers[code] = ParseRules(resourcePath, loader);
-//                //}
-//                //            config = new DefaultICUTokenizerConfig(cjkAsWords)
-//                //            {
-
-//                //    public override BreakIterator GetBreakIterator(int script)
-//                //    {
-//                //        if (breakers[script] != null)
-//                //        {
-//                //            return (BreakIterator)breakers[script].clone();
-//                //        }
-//                //        else
-//                //        {
-//                //            return base.GetBreakIterator(script);
-//                //        }
-//                //    }
-//                //    // TODO: we could also allow codes->types mapping
-//                //};
-//            }
-//        }
-
-//        private class DefaultICUTokenizerConfigAnonymousHelper : DefaultICUTokenizerConfig
-//        {
-//            private readonly Icu.BreakIterator[] breakers;
-//            public DefaultICUTokenizerConfigAnonymousHelper(bool cjkAsWords, IDictionary<int, string> tailored, IResourceLoader loader)
-//                : base(cjkAsWords)
-//            {
-//                breakers = new Icu.BreakIterator[UScript.CODE_LIMIT];
-//                foreach (var entry in tailored)
-//                {
-//                    int code = entry.Key;
-//                    string resourcePath = entry.Value;
-//                    breakers[code] = ParseRules(resourcePath, loader);
-//                }
-//            }
-
-//            public override Icu.BreakIterator GetBreakIterator(int script)
-//            {
-//                if (breakers[script] != null)
-//                {
-//                    return (Icu.BreakIterator)breakers[script].Clone();
-//                }
-//                else
-//                {
-//                    return base.GetBreakIterator(script);
-//                }
-//            }
-
-//            private Icu.BreakIterator ParseRules(string filename, IResourceLoader loader)
-//            {
-//                StringBuilder rules = new StringBuilder();
-//                Stream rulesStream = loader.OpenResource(filename);
-//                using (TextReader reader = IOUtils.GetDecodingReader(rulesStream, Encoding.UTF8))
-//                {
-//                    string line = null;
-//                    while ((line = reader.ReadLine()) != null)
-//                    {
-//                        if (!line.StartsWith("#", StringComparison.Ordinal))
-//                        {
-//                            rules.Append(line);
-//                        }
-//                        rules.Append('\n');
-//                    }
-//                }
-//                return new RuleBasedBreakIterator(rules.ToString());
-//            }
-//        }
-
-//        public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
-//        {
-//            Debug.Assert(config != null, "inform must be called first!");
-//            return new ICUTokenizer(factory, input, config);
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
deleted file mode 100644
index f328851..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/Segmentation/ScriptIterator.cs
+++ /dev/null
@@ -1,206 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Text.RegularExpressions;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.Segmentation
-//{
-//    /// <summary>
-//    /// An iterator that locates ISO 15924 script boundaries in text. 
-//    /// </summary>
-//    /// <remarks>
-//    /// This is not the same as simply looking at the Unicode block, or even the 
-//    /// Script property. Some characters are 'common' across multiple scripts, and
-//    /// some 'inherit' the script value of text surrounding them.
-//    /// <para/>
-//    /// This is similar to ICU (internal-only) UScriptRun, with the following
-//    /// differences:
-//    /// <list type="bullet">
-//    ///     <item><description>
-//    ///         Doesn't attempt to match paired punctuation. For tokenization purposes, this
-//    ///         is not necessary. Its also quite expensive. 
-//    ///     </description></item>
-//    ///     <item><description>
-//    ///         Non-spacing marks inherit the script of their base character, following 
-//    ///         recommendations from UTR #24.
-//    ///     </description></item>
-//    /// </list>
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </remarks>
-//    internal sealed class ScriptIterator
-//    {
-//        private char[] text;
-//        private int start;
-//        private int limit;
-//        private int index;
-
-//        private int scriptStart;
-//        private int scriptLimit;
-//        private int scriptCode;
-
-//        private readonly bool combineCJ;
-
-//        /**
-//         * @param combineCJ if true: Han,Hiragana,Katakana will all return as {@link UScript#JAPANESE}
-//         */
-//        internal ScriptIterator(bool combineCJ)
-//        {
-//            this.combineCJ = combineCJ;
-//        }
-
-//        /**
-//         * Get the start of this script run
-//         * 
-//         * @return start position of script run
-//         */
-//        public int ScriptStart
-//        {
-//            get { return scriptStart; }
-//        }
-
-//        /**
-//         * Get the index of the first character after the end of this script run
-//         * 
-//         * @return position of the first character after this script run
-//         */
-//        public int ScriptLimit
-//        {
-//            get { return scriptLimit; }
-//        }
-
-//        /**
-//         * Get the UScript script code for this script run
-//         * 
-//         * @return code for the script of the current run
-//         */
-//        public int ScriptCode
-//        {
-//            get { return scriptCode; }
-//        }
-
-//        /**
-//         * Iterates to the next script run, returning true if one exists.
-//         * 
-//         * @return true if there is another script run, false otherwise.
-//         */
-//        public bool Next()
-//        {
-//            if (scriptLimit >= limit)
-//                return false;
-
-//            scriptCode = UScript.COMMON;
-//            scriptStart = scriptLimit;
-
-//            while (index < limit)
-//            {
-//                //int ch = UTF16.charAt(text, start, limit, index - start);
-//                int ch = Encoding.Unicode.(text, start, limit);
-//                int sc = GetScript(ch);
-
-//                /*
-//                 * From UTR #24: Implementations that determine the boundaries between
-//                 * characters of given scripts should never break between a non-spacing
-//                 * mark and its base character. Thus for boundary determinations and
-//                 * similar sorts of processing, a non-spacing mark — whatever its script
-//                 * value — should inherit the script value of its base character.
-//                 */
-//                if (isSameScript(scriptCode, sc)
-//                    || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK)
-//                {
-//                    //index += UTF16.getCharCount(ch);
-//                    index += Encoding.Unicode.GetCharCount()
-
-//                    /*
-//                     * Inherited or Common becomes the script code of the surrounding text.
-//                     */
-//                    if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED)
-//                    {
-//                        scriptCode = sc;
-//                    }
-
-//                }
-//                else
-//                {
-//                    break;
-//                }
-//            }
-
-//            scriptLimit = index;
-//            return true;
-//        }
-
-//        /** Determine if two scripts are compatible. */
-//        private static bool IsSameScript(int scriptOne, int scriptTwo)
-//        {
-//            return scriptOne <= UScript.INHERITED || scriptTwo <= UScript.INHERITED
-//                || scriptOne == scriptTwo;
-//        }
-
-//        /**
-//         * Set a new region of text to be examined by this iterator
-//         * 
-//         * @param text text buffer to examine
-//         * @param start offset into buffer
-//         * @param length maximum length to examine
-//         */
-//        public void SetText(char[] text, int start, int length)
-//        {
-//            this.text = text;
-//            this.start = start;
-//            this.index = start;
-//            this.limit = start + length;
-//            this.scriptStart = start;
-//            this.scriptLimit = start;
-//            this.scriptCode = UScript.INVALID_CODE;
-//        }
-
-//        /** linear fast-path for basic latin case */
-//        private static readonly int[] basicLatin = new int[128];
-
-//        static ScriptIterator()
-//        {
-//            for (int i = 0; i < basicLatin.Length; i++)
-//                basicLatin[i] = UScript.GetScript(i);
-//        }
-
-//        /** fast version of UScript.getScript(). Basic Latin is an array lookup */
-//        private int GetScript(int codepoint)
-//        {
-//            if (0 <= codepoint && codepoint < basicLatin.Length)
-//            {
-//                return basicLatin[codepoint];
-//            }
-//            else
-//            {
-//                //int script = UScript.GetScript(codepoint);
-//                if (combineCJ)
-//                {
-//                    if (Regex.IsMatch(new string(Support.Character.ToChars(codepoint)), @"\p{IsHangulCompatibilityJamo}+|\p{IsHiragana}+|\p{IsKatakana}+"))
-//                    //if (script == UScript.HAN || script == UScript.HIRAGANA || script == UScript.KATAKANA)
-//                    {
-//                        return UScript.JAPANESE;
-//                    }
-//                    else if (codepoint >= 0xFF10 && codepoint <= 0xFF19)
-//                    {
-//                        // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise
-//                        // they are treated as punctuation. we currently have no cleaner way to fix this!
-//                        return UScript.LATIN;
-//                    }
-//                    else
-//                    {
-//                        return script;
-//                    }
-//                }
-//                else
-//                {
-//                    return script;
-//                }
-//            }
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
deleted file mode 100644
index abc1ae2..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttribute.cs
+++ /dev/null
@@ -1,42 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Lucene.Net.Util;
-//using System;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.TokenAttributes
-//{
-//    /// <summary>
-//    /// This attribute stores the UTR #24 script value for a token of text.
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    public interface IScriptAttribute : IAttribute
-//    {
-//        /**
-//   * Get the numeric code for this script value.
-//   * This is the constant value from {@link UScript}.
-//   * @return numeric code
-//   */
-//        int Code { get; set; }
-//        ///**
-//        // * Set the numeric code for this script value.
-//        // * This is the constant value from {@link UScript}.
-//        // * @param code numeric code
-//        // */
-//        //public void setCode(int code);
-//        /**
-//         * Get the full name.
-//         * @return UTR #24 full name.
-//         */
-//        string GetName();
-//        /**
-//         * Get the abbreviated name.
-//         * @return UTR #24 abbreviated name.
-//         */
-//        string GetShortName();
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs b/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
deleted file mode 100644
index f97ccf1..0000000
--- a/src/Lucene.Net.Analysis.ICU/Analysis/ICU/TokenAttributes/ScriptAttributeImpl.cs
+++ /dev/null
@@ -1,80 +0,0 @@
-// LUCENENET TODO: Port issues - missing dependencies
-
-//using Lucene.Net.Util;
-//using System.Collections.Generic;
-//using System.Linq;
-//using System.Text;
-//using System.Threading.Tasks;
-
-//namespace Lucene.Net.Analysis.ICU.TokenAttributes
-//{
-//    /// <summary>
-//    /// Implementation of <see cref="IScriptAttribute"/> that stores the script
-//    /// as an integer.
-//    /// <para/>
-//    /// @lucene.experimental
-//    /// </summary>
-//    public class ScriptAttribute : Attribute, IScriptAttribute, System.ICloneable
-//    {
-//        private int code = UScript.COMMON;
-
-//        /** Initializes this attribute with <code>UScript.COMMON</code> */
-//        public ScriptAttribute() { }
-
-//        public virtual int Code
-//        {
-//            get { return code; }
-//            set { code = value; }
-//        }
-
-//        public virtual string GetName()
-//        {
-//            return UScript.GetName(code);
-//        }
-
-//        public virtual string GetShortName()
-//        {
-//            return UScript.GetShortName(code);
-//        }
-
-//        public override void Clear()
-//        {
-//            code = UScript.COMMON;
-//        }
-
-//        public override void CopyTo(IAttribute target)
-//        {
-//            ScriptAttribute t = (ScriptAttribute)target;
-//            t.Code = code;
-//        }
-
-//        public override bool Equals(object other)
-//        {
-//            if (this == other)
-//            {
-//                return true;
-//            }
-
-//            if (other is ScriptAttribute)
-//            {
-//                return ((ScriptAttribute)other).code == code;
-//            }
-
-//            return false;
-//        }
-
-//        public override int GetHashCode()
-//        {
-//            return code;
-//        }
-
-//        public override void ReflectWith(IAttributeReflector reflector)
-//        {
-//            // when wordbreaking CJK, we use the 15924 code Japanese (Han+Hiragana+Katakana) to 
-//            // mark runs of Chinese/Japanese. our use is correct (as for chinese Han is a subset), 
-//            // but this is just to help prevent confusion.
-//            string name = code == UScript.JAPANESE ? "Chinese/Japanese" : GetName();
-//            reflector.Reflect<IScriptAttribute>("script", name);
-//        }
-//    }
-//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilter.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilter.cs
new file mode 100644
index 0000000..4ca8278
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilter.cs
@@ -0,0 +1,32 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Icu;
+//using Lucene.Net.Support;
+//using System;
+//using System.Collections.Generic;
+//using System.Linq;
+//using System.Text;
+//using System.Threading.Tasks;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public sealed class ICUFoldingFilter : ICUNormalizer2Filter
+//    {
+//        private static readonly Normalizer2 normalizer;
+
+//        /// <summary>
+//        /// Create a new ICUFoldingFilter on the specified input
+//        /// </summary>
+//        public ICUFoldingFilter(TokenStream input)
+//            : base(input, normalizer)
+//        {
+//        }
+
+//        static ICUFoldingFilter()
+//        {
+//            normalizer = Normalizer2.GetInstance(
+//                typeof(ICUFoldingFilter).Assembly.FindAndGetManifestResourceStream(typeof(ICUFoldingFilter), "utr30.nrm"),
+//                "utr30", Normalizer2.Mode.COMPOSE);
+//        }
+//    }
+//}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1191c20d/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilterFactory.cs
new file mode 100644
index 0000000..c25cf93
--- /dev/null
+++ b/src/Lucene.Net.Analysis.ICU/Analysis/Icu/ICUFoldingFilterFactory.cs
@@ -0,0 +1,31 @@
+// LUCENENET TODO: Port issues - missing Normalizer2 dependency from icu.net
+
+//using Lucene.Net.Analysis.Util;
+//using System;
+//using System.Collections.Generic;
+
+//namespace Lucene.Net.Analysis.ICU
+//{
+//    public class ICUFoldingFilterFactory : TokenFilterFactory, IMultiTermAwareComponent
+//    {
+//        /// <summary>Creates a new ICUFoldingFilterFactory</summary>
+//        public ICUFoldingFilterFactory(IDictionary<string, string> args)
+//            : base(args)
+//        {
+//            if (args.Count != 0)
+//            {
+//                throw new ArgumentException("Unknown parameters: " + args);
+//            }
+//        }
+
+//        public override TokenStream Create(TokenStream input)
+//        {
+//            return new ICUFoldingFilter(input);
+//        }
+
+//        public virtual AbstractAnalysisFactory GetMultiTermComponent()
+//        {
+//            return this;
+//        }
+//    }
+//}


Mime
View raw message