lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [37/50] [abbrv] lucenenet git commit: Lucene.Net.Analysis.SmartCn: Renamed HHMM namespace to Hhmm to follow .NET conventions better
Date Sat, 09 Sep 2017 00:31:55 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegGraph.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegGraph.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegGraph.cs
new file mode 100644
index 0000000..af28a90
--- /dev/null
+++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegGraph.cs
@@ -0,0 +1,161 @@
+// lucene version compatibility level: 4.8.1
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Graph representing possible tokens at each start offset in the sentence.
+    /// <para>
+    /// For each start offset, a list of possible tokens is stored.
+    /// </para>
+    /// @lucene.experimental
+    /// </summary>
+    internal class SegGraph
+    {
+        /// <summary>
+        /// Map of start offsets to <see cref="T:IList{SegToken}"/> of tokens at that position
+        /// </summary>
+        private IDictionary<int, IList<SegToken>> tokenListTable = new Dictionary<int, IList<SegToken>>();
+
+        private int maxStart = -1;
+
+        /// <summary>
+        /// Returns <c>true</c> if a mapping for the specified start offset exists
+        /// </summary>
+        /// <param name="s">startOffset</param>
+        /// <returns><c>true</c> if there are tokens for the startOffset</returns>
+        public virtual bool IsStartExist(int s)
+        {
+            //return tokenListTable.get(s) != null;
+            IList<SegToken> result;
+            return tokenListTable.TryGetValue(s, out result) && result != null;
+        }
+
+        /// <summary>
+        ///  Get the list of tokens at the specified start offset
+        /// </summary>
+        /// <param name="s">startOffset</param>
+        /// <returns><see cref="T:IList{SegToken}"/> of tokens at the specified start offset.</returns>
+        public virtual IList<SegToken> GetStartList(int s)
+        {
+            IList<SegToken> result;
+            tokenListTable.TryGetValue(s, out result);
+            return result;
+        }
+
+        /// <summary>
+        /// Get the highest start offset in the map. Returns maximum start offset, or -1 if the map is empty.
+        /// </summary>
+        public virtual int MaxStart
+        {
+            get { return maxStart; }
+        }
+
+        /// <summary>
+        /// Set the <see cref="SegToken.Index"/> for each token, based upon its order by startOffset. 
+        /// </summary>
+        /// <returns>a <see cref="T:IList{SegToken}"/> of these ordered tokens.</returns>
+        public virtual IList<SegToken> MakeIndex()
+        {
+            IList<SegToken> result = new List<SegToken>();
+            int s = -1, count = 0, size = tokenListTable.Count;
+            IList<SegToken> tokenList;
+            int index = 0;
+            while (count < size)
+            {
+                if (IsStartExist(s))
+                {
+                    tokenList = tokenListTable[s];
+                    foreach (SegToken st in tokenList)
+                    {
+                        st.Index = index;
+                        result.Add(st);
+                        index++;
+                    }
+                    count++;
+                }
+                s++;
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// Add a <see cref="SegToken"/> to the mapping, creating a new mapping at the token's startOffset if one does not exist. 
+        /// </summary>
+        /// <param name="token">token <see cref="SegToken"/>.</param>
+        public virtual void AddToken(SegToken token)
+        {
+            int s = token.StartOffset;
+            if (!IsStartExist(s))
+            {
+                List<SegToken> newlist = new List<SegToken>();
+                newlist.Add(token);
+                tokenListTable[s] = newlist;
+            }
+            else
+            {
+                IList<SegToken> tokenList = tokenListTable[s];
+                tokenList.Add(token);
+            }
+            if (s > maxStart)
+            {
+                maxStart = s;
+            }
+        }
+
+        /// <summary>
+        /// Return a <see cref="T:IList{SegToken}"/> of all tokens in the map, ordered by startOffset.
+        /// </summary>
+        /// <returns><see cref="T:IList{SegToken}"/> of all tokens in the map.</returns>
+        public virtual IList<SegToken> ToTokenList()
+        {
+            IList<SegToken> result = new List<SegToken>();
+            int s = -1, count = 0, size = tokenListTable.Count;
+            IList<SegToken> tokenList;
+
+            while (count < size)
+            {
+                if (IsStartExist(s))
+                {
+                    tokenList = tokenListTable[s];
+                    foreach (SegToken st in tokenList)
+                    {
+                        result.Add(st);
+                    }
+                    count++;
+                }
+                s++;
+            }
+            return result;
+        }
+
+        public override string ToString()
+        {
+            IList<SegToken> tokenList = this.ToTokenList();
+            StringBuilder sb = new StringBuilder();
+            foreach (SegToken t in tokenList)
+            {
+                sb.Append(t + "\n");
+            }
+            return sb.ToString();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegToken.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegToken.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegToken.cs
new file mode 100644
index 0000000..f0bdea4
--- /dev/null
+++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegToken.cs
@@ -0,0 +1,124 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// SmartChineseAnalyzer internal token
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    public class SegToken
+    {
+        /// <summary>
+        /// Character array containing token text
+        /// </summary>
+        [WritableArray]
+        public char[] CharArray { get; set; }
+
+        /// <summary>
+        /// start offset into original sentence
+        /// </summary>
+        public int StartOffset { get; set; }
+
+        /// <summary>
+        /// end offset into original sentence
+        /// </summary>
+        public int EndOffset { get; set; }
+
+        /// <summary>
+        /// <see cref="Smart.WordType"/> of the text
+        /// </summary>
+        public WordType WordType { get; set; }
+
+        /// <summary>
+        /// word frequency
+        /// </summary>
+        public int Weight { get; set; }
+
+        /// <summary>
+        /// during segmentation, this is used to store the index of the token in the token list table
+        /// </summary>
+        public int Index { get; set; }
+
+        /// <summary>
+        /// Create a new <see cref="SegToken"/> from a character array.
+        /// </summary>
+        /// <param name="idArray">character array containing text</param>
+        /// <param name="start">start offset of <see cref="SegToken"/> in original sentence</param>
+        /// <param name="end">end offset of <see cref="SegToken"/> in original sentence</param>
+        /// <param name="wordType"><see cref="Smart.WordType"/> of the text</param>
+        /// <param name="weight">word frequency</param>
+        public SegToken(char[] idArray, int start, int end, WordType wordType, int weight)
+        {
+            this.CharArray = idArray;
+            this.StartOffset = start;
+            this.EndOffset = end;
+            this.WordType = wordType;
+            this.Weight = weight;
+        }
+
+        /// <summary>
+        /// <see cref="object.GetHashCode()"/>
+        /// </summary>
+        public override int GetHashCode()
+        {
+            int prime = 31;
+            int result = 1;
+            for (int i = 0; i < CharArray.Length; i++)
+            {
+                result = prime * result + CharArray[i];
+            }
+            result = prime * result + EndOffset;
+            result = prime * result + Index;
+            result = prime * result + StartOffset;
+            result = prime * result + Weight;
+            result = prime * result + (int)WordType;
+            return result;
+        }
+
+        /// <summary>
+        /// <see cref="object.Equals(object)"/>
+        /// </summary>
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+                return true;
+            if (obj == null)
+                return false;
+            if (GetType() != obj.GetType())
+                return false;
+            SegToken other = (SegToken)obj;
+            if (!Arrays.Equals(CharArray, other.CharArray))
+                return false;
+            if (EndOffset != other.EndOffset)
+                return false;
+            if (Index != other.Index)
+                return false;
+            if (StartOffset != other.StartOffset)
+                return false;
+            if (Weight != other.Weight)
+                return false;
+            if (WordType != other.WordType)
+                return false;
+            return true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenFilter.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenFilter.cs
new file mode 100644
index 0000000..a518833
--- /dev/null
+++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenFilter.cs
@@ -0,0 +1,76 @@
+// lucene version compatibility level: 4.8.1
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// <para>
+    /// Filters a <see cref="SegToken"/> by converting full-width latin to half-width, then lowercasing latin.
+    /// Additionally, all punctuation is converted into <see cref="Utility.COMMON_DELIMITER"/>
+    /// </para>
+    /// @lucene.experimental
+    /// </summary>
+    public class SegTokenFilter
+    {
+        /// <summary>
+        /// Filter an input <see cref="SegToken"/>
+        /// <para>
+        /// Full-width latin will be converted to half-width, then all latin will be lowercased.
+        /// All punctuation is converted into <see cref="Utility.COMMON_DELIMITER"/>
+        /// </para>
+        /// </summary>
+        /// <param name="token">Input <see cref="SegToken"/>.</param>
+        /// <returns>Normalized <see cref="SegToken"/>.</returns>
+        public virtual SegToken Filter(SegToken token)
+        {
+            switch (token.WordType)
+            {
+                case WordType.FULLWIDTH_NUMBER:
+                case WordType.FULLWIDTH_STRING: /* first convert full-width -> half-width */
+                    for (int i = 0; i < token.CharArray.Length; i++)
+                    {
+                        if (token.CharArray[i] >= 0xFF10)
+                        {
+                            token.CharArray[i] = (char)(token.CharArray[i] - 0xFEE0);
+                        }
+
+                        if (token.CharArray[i] >= 0x0041 && token.CharArray[i] <= 0x005A) /* lowercase latin */
+                        {
+                            token.CharArray[i] = (char)(token.CharArray[i] + 0x0020);
+                        }
+                    }
+                    break;
+                case WordType.STRING:
+                    for (int i = 0; i < token.CharArray.Length; i++)
+                    {
+                        if (token.CharArray[i] >= 0x0041 && token.CharArray[i] <= 0x005A) /* lowercase latin */
+                        {
+                            token.CharArray[i] = (char)(token.CharArray[i] + 0x0020);
+                        }
+                    }
+                    break;
+                case WordType.DELIMITER: /* convert all punctuation to Utility.COMMON_DELIMITER */
+                    token.CharArray = Utility.COMMON_DELIMITER;
+                    break;
+                default:
+                    break;
+            }
+            return token;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenPair.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenPair.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenPair.cs
new file mode 100644
index 0000000..f454ba9
--- /dev/null
+++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/SegTokenPair.cs
@@ -0,0 +1,96 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// A pair of tokens in <see cref="SegGraph"/>
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    internal class SegTokenPair
+    {
+        [WritableArray]
+        public char[] CharArray { get; set; }
+
+        /// <summary>
+        /// index of the first token in <see cref="SegGraph"/>
+        /// </summary>
+        public int From { get; set; }
+
+        /// <summary>
+        /// index of the second token in <see cref="SegGraph"/>
+        /// </summary>
+        public int To { get; set; }
+
+        public double Weight { get; set; }
+
+        public SegTokenPair(char[] idArray, int from, int to, double weight)
+        {
+            this.CharArray = idArray;
+            this.From = from;
+            this.To = to;
+            this.Weight = weight;
+        }
+
+        /// <summary>
+        /// <see cref="object.GetHashCode()"/>
+        /// </summary>
+        public override int GetHashCode()
+        {
+            int prime = 31;
+            int result = 1;
+            for (int i = 0; i < CharArray.Length; i++)
+            {
+                result = prime * result + CharArray[i];
+            }
+            result = prime * result + From;
+            result = prime * result + To;
+            long temp;
+            temp = Number.DoubleToInt64Bits(Weight);
+            result = prime * result + (int)(temp ^ (int)((uint)temp >> 32));
+            return result;
+        }
+
+        /// <summary>
+        /// <see cref="object.Equals(object)"/>
+        /// </summary>
+        public override bool Equals(object obj)
+        {
+            if (this == obj)
+                return true;
+            if (obj == null)
+                return false;
+            if (GetType() != obj.GetType())
+                return false;
+            SegTokenPair other = (SegTokenPair)obj;
+            if (!Arrays.Equals(CharArray, other.CharArray))
+                return false;
+            if (From != other.From)
+                return false;
+            if (To != other.To)
+                return false;
+            if (Number.DoubleToInt64Bits(Weight) != Number
+                .DoubleToInt64Bits(other.Weight))
+                return false;
+            return true;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Hhmm/WordDictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/WordDictionary.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/WordDictionary.cs
new file mode 100644
index 0000000..0f5d3db
--- /dev/null
+++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/WordDictionary.cs
@@ -0,0 +1,779 @@
+// lucene version compatibility level: 4.8.1
+using Lucene.Net.Support;
+using Lucene.Net.Support.IO;
+using System;
+using System.IO;
+using System.Reflection;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Cn.Smart.Hhmm
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// SmartChineseAnalyzer Word Dictionary
+    /// <para/>
+    /// @lucene.experimental
+    /// </summary>
+    internal class WordDictionary : AbstractDictionary
+    {
+        private WordDictionary()
+        {
+        }
+
+        private static WordDictionary singleInstance;
+
+        /// <summary>
+        /// Large prime number for hash function
+        /// </summary>
+        public static readonly int PRIME_INDEX_LENGTH = 12071;
+
+        /// <summary>
+        /// wordIndexTable guarantees to hash all Chinese characters in Unicode into 
+        /// PRIME_INDEX_LENGTH array. There will be conflict, but in reality this 
+        /// program only handles the 6768 characters found in GB2312 plus some 
+        /// ASCII characters. Therefore in order to guarantee better precision, it is
+        /// necessary to retain the original symbol in the charIndexTable.
+        /// </summary>
+        private short[] wordIndexTable;
+
+        private char[] charIndexTable;
+
+        /// <summary>
+        /// To avoid taking too much space, the data structure needed to store the 
+        /// lexicon requires two multidimensional arrays to store word and frequency.
+        /// Each word is placed in a char[]. Each char represents a Chinese char or 
+        /// other symbol.  Each frequency is put into an int. These two arrays 
+        /// correspond to each other one-to-one. Therefore, one can use 
+        /// wordItem_charArrayTable[i][j] to look up word from lexicon, and 
+        /// wordItem_frequencyTable[i][j] to look up the corresponding frequency. 
+        /// </summary>
+        private char[][][] wordItem_charArrayTable;
+
+        private int[][] wordItem_frequencyTable;
+
+        // static Logger log = Logger.getLogger(WordDictionary.class);
+
+        private static object syncLock = new object();
+
+        /// <summary>
+        /// Get the singleton dictionary instance.
+        /// </summary>
+        /// <returns>singleton</returns>
+        public static WordDictionary GetInstance()
+        {
+            lock (syncLock)
+            {
+                if (singleInstance == null)
+                {
+                    singleInstance = new WordDictionary();
+
+                    // LUCENENET specific
+                    // LUCENE-1817: https://issues.apache.org/jira/browse/LUCENE-1817
+                    // This issue still existed as of 4.8.0. Here is the fix - we only
+                    // load from a directory if the actual directory exists (AnalyzerProfile
+                    // ensures it is an empty string if it is not available).
+                    string dictRoot = AnalyzerProfile.ANALYSIS_DATA_DIR;
+                    if (string.IsNullOrEmpty(dictRoot))
+                    {
+                        singleInstance.Load();
+                    }
+                    else
+                    {
+                        singleInstance.Load(dictRoot);
+                    }
+
+
+                    //try
+                    //{
+                    //    singleInstance.Load();
+                    //}
+                    //catch (IOException e)
+                    //{
+                    //    string wordDictRoot = AnalyzerProfile.ANALYSIS_DATA_DIR;
+                    //    singleInstance.Load(wordDictRoot);
+                    //}
+                    //catch (TypeLoadException e)
+                    //{
+                    //    throw new Exception(e.ToString(), e);
+                    //}
+                }
+                return singleInstance;
+            }
+        }
+
+        /// <summary>
+        /// Attempt to load dictionary from provided directory, first trying coredict.mem, failing back on coredict.dct
+        /// </summary>
+        /// <param name="dctFileRoot">path to dictionary directory</param>
+        public virtual void Load(string dctFileRoot)
+        {
+            string dctFilePath = System.IO.Path.Combine(dctFileRoot, "coredict.dct");
+            FileInfo serialObj = new FileInfo(System.IO.Path.Combine(dctFileRoot, "coredict.mem"));
+
+            if (serialObj.Exists && LoadFromObj(serialObj))
+            {
+
+            }
+            else
+            {
+                try
+                {
+                    wordIndexTable = new short[PRIME_INDEX_LENGTH];
+                    charIndexTable = new char[PRIME_INDEX_LENGTH];
+                    for (int i = 0; i < PRIME_INDEX_LENGTH; i++)
+                    {
+                        charIndexTable[i] = (char)0;
+                        wordIndexTable[i] = -1;
+                    }
+                    wordItem_charArrayTable = new char[GB2312_CHAR_NUM][][];
+                    wordItem_frequencyTable = new int[GB2312_CHAR_NUM][];
+                    // int total =
+                    LoadMainDataFromFile(dctFilePath);
+                    ExpandDelimiterData();
+                    MergeSameWords();
+                    SortEachItems();
+                    // log.info("load dictionary: " + dctFilePath + " total:" + total);
+                }
+                catch (IOException e)
+                {
+                    throw new Exception(e.ToString(), e);
+                }
+
+                SaveToObj(serialObj);
+            }
+
+        }
+
+        /// <summary>
+        /// Load coredict.mem internally from the jar file.
+        /// </summary>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        public virtual void Load()
+        {
+            using (Stream input = this.GetType().GetTypeInfo().Assembly.FindAndGetManifestResourceStream(this.GetType(), "coredict.mem"))
+            {
+                LoadFromObjectInputStream(input);
+            }
+        }
+
+        private bool LoadFromObj(FileInfo serialObj)
+        {
+            try
+            {
+                using (Stream input = new FileStream(serialObj.FullName, FileMode.Open, FileAccess.Read))
+                    LoadFromObjectInputStream(input);
+                return true;
+            }
+            catch (Exception e)
+            {
+                throw new Exception(e.ToString(), e);
+            }
+        }
+
+        // LUCENENET conversion note:
+        // The data in Lucene is stored in a proprietary binary format (similar to
+        // .NET's BinarySerializer) that cannot be read back in .NET. Therefore, the
+        // data was extracted using Java's DataOutputStream using the following Java code.
+        // It can then be read in using the LoadFromInputStream method below 
+        // (using a DataInputStream instead of a BinaryReader), and saved
+        // in the correct (BinaryWriter) format by calling the SaveToObj method.
+        // Alternatively, the data can be loaded from disk using the files
+        // here(https://issues.apache.org/jira/browse/LUCENE-1629) in the analysis.data.zip file, 
+        // which will automatically produce the .mem files.
+
+        //public void saveToOutputStream(java.io.DataOutputStream stream) throws IOException
+        //{
+        //    // save bigramHashTable
+        //    int bhLen = bigramHashTable.length;
+        //    stream.writeInt(bhLen);
+        //    for (int i = 0; i<bhLen; i++)
+        //    {
+        //        stream.writeLong(bigramHashTable[i]);
+        //    }
+
+        //    // save frequencyTable
+        //    int fLen = frequencyTable.length;
+        //    stream.writeInt(fLen);
+        //    for (int i = 0; i<fLen; i++)
+        //    {
+        //        stream.writeInt(frequencyTable[i]);
+        //    }
+        //}
+
+        private void LoadFromObjectInputStream(Stream serialObjectInputStream)
+        {
+            //ObjectInputStream input = new ObjectInputStream(serialObjectInputStream);
+            //wordIndexTable = (short[])input.ReadObject();
+            //charIndexTable = (char[])input.ReadObject();
+            //wordItem_charArrayTable = (char[][][])input.ReadObject();
+            //wordItem_frequencyTable = (int[][])input.ReadObject();
+            //// log.info("load core dict from serialization.");
+            //input.close();
+
+            using (var reader = new BinaryReader(serialObjectInputStream))
+            //using (var reader = new DataInputStream(serialObjectInputStream))
+            {
+
+                // Read wordIndexTable
+                int wiLen = reader.ReadInt32();
+                wordIndexTable = new short[wiLen];
+                for (int i = 0; i < wiLen; i++)
+                {
+                    wordIndexTable[i] = reader.ReadInt16();
+                }
+
+                // Read charIndexTable
+                int ciLen = reader.ReadInt32();
+                charIndexTable = new char[ciLen];
+                for (int i = 0; i < ciLen; i++)
+                {
+                    charIndexTable[i] = reader.ReadChar();
+                }
+
+                // Read wordItem_charArrayTable
+                int caDim1 = reader.ReadInt32();
+                if (caDim1 > -1)
+                {
+                    wordItem_charArrayTable = new char[caDim1][][];
+                    for (int i = 0; i < caDim1; i++)
+                    {
+                        int caDim2 = reader.ReadInt32();
+                        if (caDim2 > -1)
+                        {
+                            wordItem_charArrayTable[i] = new char[caDim2][];
+                            for (int j = 0; j < caDim2; j++)
+                            {
+                                int caDim3 = reader.ReadInt32();
+                                if (caDim3 > -1)
+                                {
+                                    wordItem_charArrayTable[i][j] = new char[caDim3];
+                                    for (int k = 0; k < caDim3; k++)
+                                    {
+                                        wordItem_charArrayTable[i][j][k] = reader.ReadChar();
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Read wordItem_frequencyTable
+                int fDim1 = reader.ReadInt32();
+                if (fDim1 > -1)
+                {
+                    wordItem_frequencyTable = new int[fDim1][];
+                    for (int i = 0; i < fDim1; i++)
+                    {
+                        int fDim2 = reader.ReadInt32();
+                        if (fDim2 > -1)
+                        {
+                            wordItem_frequencyTable[i] = new int[fDim2];
+                            for (int j = 0; j < fDim2; j++)
+                            {
+                                wordItem_frequencyTable[i][j] = reader.ReadInt32();
+                            }
+                        }
+                    }
+                }
+            }
+
+            // log.info("load core dict from serialization.");
+        }
+
+        private void SaveToObj(FileInfo serialObj)
+        {
+            try
+            {
+                //ObjectOutputStream output = new ObjectOutputStream(new FileStream(
+                //    serialObj.FullName, FileMode.Create, FileAccess.Write));
+                //output.writeObject(wordIndexTable);
+                //output.writeObject(charIndexTable);
+                //output.writeObject(wordItem_charArrayTable);
+                //output.writeObject(wordItem_frequencyTable);
+                //output.close();
+                //// log.info("serialize core dict.");
+
+                using (Stream stream = new FileStream(serialObj.FullName, FileMode.Create, FileAccess.Write))
+                {
+                    using (var writer = new BinaryWriter(stream))
+                    {
+                        // Write wordIndexTable
+                        int wiLen = wordIndexTable.Length;
+                        writer.Write(wiLen);
+                        for (int i = 0; i < wiLen; i++)
+                        {
+                            writer.Write(wordIndexTable[i]);
+                        }
+
+                        // Write charIndexTable
+                        int ciLen = charIndexTable.Length;
+                        writer.Write(ciLen);
+                        for (int i = 0; i < ciLen; i++)
+                        {
+                            writer.Write(charIndexTable[i]);
+                        }
+
+                        // Write wordItem_charArrayTable
+                        int caDim1 = wordItem_charArrayTable == null ? -1 : wordItem_charArrayTable.Length;
+                        writer.Write(caDim1);
+                        for (int i = 0; i < caDim1; i++)
+                        {
+                            int caDim2 = wordItem_charArrayTable[i] == null ? -1 : wordItem_charArrayTable[i].Length;
+                            writer.Write(caDim2);
+                            for (int j = 0; j < caDim2; j++)
+                            {
+                                int caDim3 = wordItem_charArrayTable[i][j] == null ? -1 : wordItem_charArrayTable[i][j].Length;
+                                writer.Write(caDim3);
+                                for (int k = 0; k < caDim3; k++)
+                                {
+                                    writer.Write(wordItem_charArrayTable[i][j][k]);
+                                }
+                            }
+                        }
+
+                        // Write wordItem_frequencyTable
+                        int fDim1 = wordItem_frequencyTable == null ? -1 : wordItem_frequencyTable.Length;
+                        writer.Write(fDim1);
+                        for (int i = 0; i < fDim1; i++)
+                        {
+                            int fDim2 = wordItem_frequencyTable[i] == null ? -1 : wordItem_frequencyTable[i].Length;
+                            writer.Write(fDim2);
+                            for (int j = 0; j < fDim2; j++)
+                            {
+                                writer.Write(wordItem_frequencyTable[i][j]);
+                            }
+                        }
+                    }
+                }
+
+                // log.info("serialize core dict.");
+            }
+#pragma warning disable 168
+            catch (Exception e)
+#pragma warning restore 168
+            {
+                // log.warn(e.getMessage());
+            }
+        }
+
+        /// <summary>
+        /// Load the datafile into this <see cref="WordDictionary"/>
+        /// </summary>
+        /// <param name="dctFilePath">path to word dictionary (coredict.dct)</param>
+        /// <returns>number of words read</returns>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        private int LoadMainDataFromFile(string dctFilePath)
+        {
+            int i, cnt, length, total = 0;
+            // The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
+            // The 3756th is used (as a header) to store information.
+            int[]
+            buffer = new int[3];
+            byte[] intBuffer = new byte[4];
+            string tmpword;
+            //using (RandomAccessFile dctFile = new RandomAccessFile(dctFilePath, "r"))
+            using (var dctFile = new FileStream(dctFilePath, FileMode.Open, FileAccess.Read))
+            {
+
+                // GB2312 characters 0 - 6768
+                for (i = GB2312_FIRST_CHAR; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
+                {
+                    // if (i == 5231)
+                    // System.out.println(i);
+
+                    dctFile.Read(intBuffer, 0, intBuffer.Length);
+                    // the dictionary was developed for C, and byte order must be converted to work with Java
+                    cnt = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LITTLE_ENDIAN).GetInt32();
+                    if (cnt <= 0)
+                    {
+                        wordItem_charArrayTable[i] = null;
+                        wordItem_frequencyTable[i] = null;
+                        continue;
+                    }
+                    wordItem_charArrayTable[i] = new char[cnt][];
+                    wordItem_frequencyTable[i] = new int[cnt];
+                    total += cnt;
+                    int j = 0;
+                    while (j < cnt)
+                    {
+                        // wordItemTable[i][j] = new WordItem();
+                        dctFile.Read(intBuffer, 0, intBuffer.Length);
+                        buffer[0] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LITTLE_ENDIAN)
+                            .GetInt32();// frequency
+                        dctFile.Read(intBuffer, 0, intBuffer.Length);
+                        buffer[1] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LITTLE_ENDIAN)
+                            .GetInt32();// length
+                        dctFile.Read(intBuffer, 0, intBuffer.Length);
+                        buffer[2] = ByteBuffer.Wrap(intBuffer).SetOrder(ByteOrder.LITTLE_ENDIAN)
+                            .GetInt32();// handle
+
+                        // wordItemTable[i][j].frequency = buffer[0];
+                        wordItem_frequencyTable[i][j] = buffer[0];
+
+                        length = buffer[1];
+                        if (length > 0)
+                        {
+                            byte[] lchBuffer = new byte[length];
+                            dctFile.Read(lchBuffer, 0, lchBuffer.Length);
+                            //tmpword = new String(lchBuffer, "GB2312");
+                            tmpword = Encoding.GetEncoding("GB2312").GetString(lchBuffer);
+                            //tmpword = Encoding.GetEncoding("hz-gb-2312").GetString(lchBuffer);
+                            // indexTable[i].wordItems[j].word = tmpword;
+                            // wordItemTable[i][j].charArray = tmpword.toCharArray();
+                            wordItem_charArrayTable[i][j] = tmpword.ToCharArray();
+                        }
+                        else
+                        {
+                            // wordItemTable[i][j].charArray = null;
+                            wordItem_charArrayTable[i][j] = null;
+                        }
+                        // System.out.println(indexTable[i].wordItems[j]);
+                        j++;
+                    }
+
+                    string str = GetCCByGB2312Id(i);
+                    SetTableIndex(str[0], i);
+                }
+            }
+            return total;
+        }
+
+        /// <summary>
+        /// The original lexicon puts all information with punctuation into a 
+        /// chart (from 1 to 3755). Here it then gets expanded, separately being
+        /// placed into the chart that has the corresponding symbol.
+        /// </summary>
+        private void ExpandDelimiterData()
+        {
+            int i;
+            int cnt;
+            // Punctuation then treating index 3755 as 1, 
+            // distribute the original punctuation corresponding dictionary into 
+            int delimiterIndex = 3755 + GB2312_FIRST_CHAR;
+            i = 0;
+            while (i < wordItem_charArrayTable[delimiterIndex].Length)
+            {
+                char c = wordItem_charArrayTable[delimiterIndex][i][0];
+                int j = GetGB2312Id(c);// the id value of the punctuation
+                if (wordItem_charArrayTable[j] == null)
+                {
+
+                    int k = i;
+                    // Starting from i, count the number of the following worditem symbol from j
+                    while (k < wordItem_charArrayTable[delimiterIndex].Length
+                        && wordItem_charArrayTable[delimiterIndex][k][0] == c)
+                    {
+                        k++;
+                    }
+                    // c is the punctuation character, j is the id value of c
+                    // k-1 represents the index of the last punctuation character
+                    cnt = k - i;
+                    if (cnt != 0)
+                    {
+                        wordItem_charArrayTable[j] = new char[cnt][];
+                        wordItem_frequencyTable[j] = new int[cnt];
+                    }
+
+                    // Assign value for each wordItem.
+                    for (k = 0; k < cnt; k++, i++)
+                    {
+                        // wordItemTable[j][k] = new WordItem();
+                        wordItem_frequencyTable[j][k] = wordItem_frequencyTable[delimiterIndex][i];
+                        wordItem_charArrayTable[j][k] = new char[wordItem_charArrayTable[delimiterIndex][i].Length - 1];
+                        System.Array.Copy(wordItem_charArrayTable[delimiterIndex][i], 1,
+                            wordItem_charArrayTable[j][k], 0,
+                            wordItem_charArrayTable[j][k].Length);
+                    }
+                    SetTableIndex(c, j);
+                }
+            }
+            // Delete the original corresponding symbol array.
+            wordItem_charArrayTable[delimiterIndex] = null;
+            wordItem_frequencyTable[delimiterIndex] = null;
+        }
+
+        /// <summary>
+        /// since we aren't doing POS-tagging, merge the frequencies for entries of the same word (with different POS)
+        /// </summary>
+        private void MergeSameWords()
+        {
+            int i;
+            for (i = 0; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++)
+            {
+                if (wordItem_charArrayTable[i] == null)
+                    continue;
+                int len = 1;
+                for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
+                {
+                    if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
+                        wordItem_charArrayTable[i][j - 1], 0) != 0)
+                        len++;
+
+                }
+                if (len < wordItem_charArrayTable[i].Length)
+                {
+                    char[][] tempArray = new char[len][];
+                    int[] tempFreq = new int[len];
+                    int k = 0;
+                    tempArray[0] = wordItem_charArrayTable[i][0];
+                    tempFreq[0] = wordItem_frequencyTable[i][0];
+                    for (int j = 1; j < wordItem_charArrayTable[i].Length; j++)
+                    {
+                        if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
+                            tempArray[k], 0) != 0)
+                        {
+                            k++;
+                            // temp[k] = wordItemTable[i][j];
+                            tempArray[k] = wordItem_charArrayTable[i][j];
+                            tempFreq[k] = wordItem_frequencyTable[i][j];
+                        }
+                        else
+                        {
+                            // temp[k].frequency += wordItemTable[i][j].frequency;
+                            tempFreq[k] += wordItem_frequencyTable[i][j];
+                        }
+                    }
+                    // wordItemTable[i] = temp;
+                    wordItem_charArrayTable[i] = tempArray;
+                    wordItem_frequencyTable[i] = tempFreq;
+                }
+            }
+        }
+
+        private void SortEachItems()
+        {
+            char[] tmpArray;
+            int tmpFreq;
+            for (int i = 0; i < wordItem_charArrayTable.Length; i++)
+            {
+                if (wordItem_charArrayTable[i] != null
+                    && wordItem_charArrayTable[i].Length > 1)
+                {
+                    for (int j = 0; j < wordItem_charArrayTable[i].Length - 1; j++)
+                    {
+                        for (int j2 = j + 1; j2 < wordItem_charArrayTable[i].Length; j2++)
+                        {
+                            if (Utility.CompareArray(wordItem_charArrayTable[i][j], 0,
+                                wordItem_charArrayTable[i][j2], 0) > 0)
+                            {
+                                tmpArray = wordItem_charArrayTable[i][j];
+                                tmpFreq = wordItem_frequencyTable[i][j];
+                                wordItem_charArrayTable[i][j] = wordItem_charArrayTable[i][j2];
+                                wordItem_frequencyTable[i][j] = wordItem_frequencyTable[i][j2];
+                                wordItem_charArrayTable[i][j2] = tmpArray;
+                                wordItem_frequencyTable[i][j2] = tmpFreq;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Calculate character <paramref name="c"/>'s position in hash table, 
+        /// then initialize the value of that position in the address table.
+        /// </summary>
+        private bool SetTableIndex(char c, int j)
+        {
+            int index = GetAvaliableTableIndex(c);
+            if (index != -1)
+            {
+                charIndexTable[index] = c;
+                wordIndexTable[index] = (short)j;
+                return true;
+            }
+            else
+                return false;
+        }
+
+        private short GetAvaliableTableIndex(char c)
+        {
+            int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
+            int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
+            if (hash1 < 0)
+                hash1 = PRIME_INDEX_LENGTH + hash1;
+            if (hash2 < 0)
+                hash2 = PRIME_INDEX_LENGTH + hash2;
+            int index = hash1;
+            int i = 1;
+            while (charIndexTable[index] != 0 && charIndexTable[index] != c
+                && i < PRIME_INDEX_LENGTH)
+            {
+                index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
+                i++;
+            }
+            // System.out.println(i - 1);
+
+            if (i < PRIME_INDEX_LENGTH
+                && (charIndexTable[index] == 0 || charIndexTable[index] == c))
+            {
+                return (short)index;
+            }
+            else
+            {
+                return -1;
+            }
+        }
+
+        private short GetWordItemTableIndex(char c)
+        {
+            int hash1 = (int)(Hash1(c) % PRIME_INDEX_LENGTH);
+            int hash2 = Hash2(c) % PRIME_INDEX_LENGTH;
+            if (hash1 < 0)
+                hash1 = PRIME_INDEX_LENGTH + hash1;
+            if (hash2 < 0)
+                hash2 = PRIME_INDEX_LENGTH + hash2;
+            int index = hash1;
+            int i = 1;
+            while (charIndexTable[index] != 0 && charIndexTable[index] != c
+                && i < PRIME_INDEX_LENGTH)
+            {
+                index = (hash1 + i * hash2) % PRIME_INDEX_LENGTH;
+                i++;
+            }
+
+            if (i < PRIME_INDEX_LENGTH && charIndexTable[index] == c)
+            {
+                return (short)index;
+            }
+            else
+                return -1;
+        }
+
+        /// <summary>
+        /// Look up the text string corresponding with the word char array,
+        /// and return the position of the word list.
+        /// </summary>
+        /// <param name="knownHashIndex">
+        /// already figure out position of the first word
+        /// symbol charArray[0] in hash table. If not calculated yet, can be
+        /// replaced with function int findInTable(char[] charArray).
+        /// </param>
+        /// <param name="charArray">look up the char array corresponding with the word.</param>
+        /// <returns>word location in word array.  If not found, then return -1.</returns>
+        private int FindInTable(short knownHashIndex, char[] charArray)
+        {
+            if (charArray == null || charArray.Length == 0)
+                return -1;
+
+            char[][] items = wordItem_charArrayTable[wordIndexTable[knownHashIndex]];
+            int start = 0, end = items.Length - 1;
+            int mid = (start + end) / 2, cmpResult;
+
+            // Binary search for the index of idArray
+            while (start <= end)
+            {
+                cmpResult = Utility.CompareArray(items[mid], 0, charArray, 1);
+
+                if (cmpResult == 0)
+                    return mid;// find it
+                else if (cmpResult < 0)
+                    start = mid + 1;
+                else if (cmpResult > 0)
+                    end = mid - 1;
+
+                mid = (start + end) / 2;
+            }
+            return -1;
+        }
+
+        /// <summary>
+        /// Find the first word in the dictionary that starts with the supplied prefix
+        /// </summary>
+        /// <param name="charArray">input prefix</param>
+        /// <returns>index of word, or -1 if not found</returns>
+        /// <seealso cref="GetPrefixMatch(char[], int)"/>
+        public virtual int GetPrefixMatch(char[] charArray)
+        {
+            return GetPrefixMatch(charArray, 0);
+        }
+
+        /// <summary>
+        /// Find the nth word in the dictionary that starts with the supplied prefix
+        /// </summary>
+        /// <param name="charArray">input prefix</param>
+        /// <param name="knownStart">relative position in the dictionary to start</param>
+        /// <returns>index of word, or -1 if not found</returns>
+        /// <seealso cref="GetPrefixMatch(char[])"/>
+        public virtual int GetPrefixMatch(char[] charArray, int knownStart)
+        {
+            short index = GetWordItemTableIndex(charArray[0]);
+            if (index == -1)
+                return -1;
+            char[][] items = wordItem_charArrayTable[wordIndexTable[index]];
+            int start = knownStart, end = items.Length - 1;
+
+            int mid = (start + end) / 2, cmpResult;
+
+            // Binary search for the index of idArray
+            while (start <= end)
+            {
+                cmpResult = Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0);
+                if (cmpResult == 0)
+                {
+                    // Get the first item which match the current word
+                    while (mid >= 0
+                        && Utility.CompareArrayByPrefix(charArray, 1, items[mid], 0) == 0)
+                        mid--;
+                    mid++;
+                    return mid;// Find the first word that uses charArray as prefix.
+                }
+                else if (cmpResult < 0)
+                    end = mid - 1;
+                else
+                    start = mid + 1;
+                mid = (start + end) / 2;
+            }
+            return -1;
+        }
+
+        /// <summary>
+        /// Get the frequency of a word from the dictionary
+        /// </summary>
+        /// <param name="charArray">input word</param>
+        /// <returns>word frequency, or zero if the word is not found</returns>
+        public virtual int GetFrequency(char[] charArray)
+        {
+            short hashIndex = GetWordItemTableIndex(charArray[0]);
+            if (hashIndex == -1)
+            {
+                return 0;
+            }
+            int itemIndex = FindInTable(hashIndex, charArray);
+            if (itemIndex != -1)
+            {
+                return wordItem_frequencyTable[wordIndexTable[hashIndex]][itemIndex];
+            }
+            return 0;
+        }
+
+        /// <summary>
+        /// Return <c>true</c> if the dictionary entry at itemIndex for table charArray[0] is charArray
+        /// </summary>
+        /// <param name="charArray">input word</param>
+        /// <param name="itemIndex">item index for table charArray[0]</param>
+        /// <returns><c>true</c> if the entry exists</returns>
+        public virtual bool IsEqual(char[] charArray, int itemIndex)
+        {
+            short hashIndex = GetWordItemTableIndex(charArray[0]);
+            return Utility.CompareArray(charArray, 1,
+                wordItem_charArrayTable[wordIndexTable[hashIndex]][itemIndex], 0) == 0;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/Lucene.Net.Analysis.SmartCn.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/Lucene.Net.Analysis.SmartCn.csproj b/src/Lucene.Net.Analysis.SmartCn/Lucene.Net.Analysis.SmartCn.csproj
index 37604c0..c735c33 100644
--- a/src/Lucene.Net.Analysis.SmartCn/Lucene.Net.Analysis.SmartCn.csproj
+++ b/src/Lucene.Net.Analysis.SmartCn/Lucene.Net.Analysis.SmartCn.csproj
@@ -46,7 +46,9 @@
 
   <ItemGroup>
     <Compile Include="..\CommonAssemblyInfo.cs" Link="Properties\CommonAssemblyInfo.cs" />
-    <EmbeddedResource Include="stopwords.txt;HHMM\bigramdict.mem;HHMM\coredict.mem" />
+    <EmbeddedResource Include="stopwords.txt" />
+    <EmbeddedResource Include="Hhmm\bigramdict.mem" />
+    <EmbeddedResource Include="Hhmm\coredict.mem" />
   </ItemGroup>
 
   <ItemGroup>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs b/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
index 9917509..6ad831d 100644
--- a/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/WordSegmenter.cs
@@ -1,5 +1,5 @@
 // lucene version compatibility level: 4.8.1
-using Lucene.Net.Analysis.Cn.Smart.HHMM;
+using Lucene.Net.Analysis.Cn.Smart.Hhmm;
 using Lucene.Net.Support;
 using System.Collections.Generic;
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs b/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
index c8a706c..5af9a4e 100644
--- a/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/WordTokenFilter.cs
@@ -1,5 +1,5 @@
 // lucene version compatibility level: 4.8.1
-using Lucene.Net.Analysis.Cn.Smart.HHMM;
+using Lucene.Net.Analysis.Cn.Smart.Hhmm;
 using Lucene.Net.Analysis.TokenAttributes;
 using System;
 using System.Collections.Generic;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/056353d4/src/Lucene.Net.Tests.Analysis.SmartCn/TestHMMChineseTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.SmartCn/TestHMMChineseTokenizerFactory.cs b/src/Lucene.Net.Tests.Analysis.SmartCn/TestHMMChineseTokenizerFactory.cs
index 264ab38..60f24c9 100644
--- a/src/Lucene.Net.Tests.Analysis.SmartCn/TestHMMChineseTokenizerFactory.cs
+++ b/src/Lucene.Net.Tests.Analysis.SmartCn/TestHMMChineseTokenizerFactory.cs
@@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Cn.Smart
         [Test]
         public void TestHHMMSegmenter()
         {
-            var x = new HHMM.HHMMSegmenter();
+            var x = new Hhmm.HHMMSegmenter();
         }
 
         /// <summary>


Mime
View raw message