lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject [1/7] lucenenet git commit: adding array extension methods, refactoring unicode related classes to match recent changes, adding bytesrefbuilder.
Date Sun, 09 Nov 2014 22:07:45 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/pcl b31804212 -> ecf03e102


adding array extension methods, refactoring unicode related classes to match recent changes,
adding bytesrefbuilder.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8e7e1043
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8e7e1043
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8e7e1043

Branch: refs/heads/pcl
Commit: 8e7e10437db4cc82ed46f9d5968f640cd94d66d0
Parents: b318042
Author: Michael Herndon <mherndon@michaelherndon.com>
Authored: Wed Aug 20 21:20:29 2014 -0400
Committer: Michael Herndon <mherndon@michaelherndon.com>
Committed: Wed Aug 20 21:20:29 2014 -0400

----------------------------------------------------------------------
 src/Lucene.Net.Core/Lucene.Net.Core.csproj      |   2 +
 .../Support/ArrayExtensionMethods.cs            |  21 ++
 src/Lucene.Net.Core/Util/ArrayUtil.cs           |  14 +-
 src/Lucene.Net.Core/Util/BytesRef.cs            |  67 ++++-
 src/Lucene.Net.Core/Util/BytesRefArray.cs       |  28 +-
 src/Lucene.Net.Core/Util/BytesRefBuilder.cs     | 263 +++++++++++++++++++
 src/Lucene.Net.Core/Util/CharsRef.cs            |  23 +-
 src/Lucene.Net.Core/Util/CharsRefBuilder.cs     |   6 +-
 src/Lucene.Net.Core/Util/UnicodeUtil.cs         | 231 ++++++++++------
 .../Support/TestNumberExtensionMethods.cs       |  21 +-
 .../Util/TestByteArrayRef.cs                    |  14 +-
 test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs |  12 +-
 12 files changed, 559 insertions(+), 143 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Lucene.Net.Core.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.Core.csproj b/src/Lucene.Net.Core/Lucene.Net.Core.csproj
index 9265b37..5ad93e0 100644
--- a/src/Lucene.Net.Core/Lucene.Net.Core.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.Core.csproj
@@ -58,6 +58,7 @@
   <ItemGroup>
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Check.cs" />
+    <Compile Include="Support\ArrayExtensionMethods.cs" />
     <Compile Include="Support\AtomicReferenceArray.cs" />
     <Compile Include="Support\DeepCloneNotSupportedException.cs" />
     <Compile Include="Support\EnumUtil.cs" />
@@ -75,6 +76,7 @@
     <Compile Include="Util\BitUtil.cs" />
     <Compile Include="Util\BroadWord.cs" />
     <Compile Include="Util\ByteBlockPool.cs" />
+    <Compile Include="Util\BytesRefBuilder.cs" />
     <Compile Include="Util\BytesRef.cs" />
     <Compile Include="Util\BytesRefArray.cs" />
     <Compile Include="Util\CharsRefBuilder.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs b/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
new file mode 100644
index 0000000..d7cc737
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/ArrayExtensionMethods.cs
@@ -0,0 +1,21 @@
+
+
+namespace Lucene.Net.Support
+{
+    using System;
+    using System.Runtime.InteropServices.ComTypes;
+
+    public static class ArrayExtensionMethods
+    {
+
+        public static T[] Copy<T>(this T[] array, int length = -1)
+        {
+            if (length == -1)
+                length = array.Length;
+
+            var result = new T[length];
+            Array.Copy(array, result, length);
+            return result;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/ArrayUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/ArrayUtil.cs b/src/Lucene.Net.Core/Util/ArrayUtil.cs
index 078aaf3..cd3d7cc 100644
--- a/src/Lucene.Net.Core/Util/ArrayUtil.cs
+++ b/src/Lucene.Net.Core/Util/ArrayUtil.cs
@@ -37,19 +37,19 @@ namespace Lucene.Net.Util
         /// </summary>
         /// <typeparam name="T">The element type for the array.</typeparam>
         /// <param name="array">The array to base the resize on.</param>
-        /// <param name="minSize">The minimum size to grow the array.</param>
+        /// <param name="capacity">The minimum size to grow the array.</param>
         /// <returns>The resized array.</returns>
-        /// <exception cref="System.ArgumentException">Throws when <paramref name="minSize"/>
is less than zero.</exception>
-        public static T[] Grow<T>(this T[] array, int minSize = 1)
+        /// <exception cref="System.ArgumentException">Throws when <paramref name="capacity"/>
is less than zero.</exception>
+        public static T[] Grow<T>(this T[] array, int capacity = 1)
         {
             Debug.Assert(typeof(T).GetTypeInfo().IsPrimitive, "Type T must be primitive");
-            Debug.Assert(minSize >= 0, "targetSize must be positive");
+            Debug.Assert(capacity >= 0, "targetSize must be positive");
 
-            if (array.Length >= minSize) 
+            if (array.Length >= capacity) 
                 return array;
             
-            var capacity = Oversize(minSize, RamUsageEstimator.PRIMITIVE_SIZES[typeof(T)]);
-            var oversizedArray = new T[capacity];
+            var length = Oversize(capacity, RamUsageEstimator.PRIMITIVE_SIZES[typeof(T)]);
+            var oversizedArray = new T[length];
             Array.Copy(array, 0, oversizedArray, 0, array.Length);
 
             return oversizedArray;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/BytesRef.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/BytesRef.cs b/src/Lucene.Net.Core/Util/BytesRef.cs
index 5631a7e..18dbf9f 100644
--- a/src/Lucene.Net.Core/Util/BytesRef.cs
+++ b/src/Lucene.Net.Core/Util/BytesRef.cs
@@ -20,6 +20,7 @@ namespace Lucene.Net.Util
     using System;
     using System.Collections.Generic;
     using System.Diagnostics;
+    using System.Linq;
     using System.Text;
 
     /// <summary>
@@ -45,6 +46,7 @@ namespace Lucene.Net.Util
         Support.ICloneable,
         IEnumerable<Byte>
     {
+        private int length;
         /// <summary>
         ///     An empty byte array for convenience
         /// </summary>
@@ -63,7 +65,16 @@ namespace Lucene.Net.Util
         /// <summary>
         ///     Length of used bytes.
         /// </summary>
-        public virtual int Length { get; internal protected set; }
+        public virtual int Length
+        {
+            get { return this.length; }
+            set
+            {
+                this.length = value; 
+                if(this.Bytes.Length < value)
+                    this.Grow(value);
+            }
+        }
 
         /// <summary>
         ///     Create a BytesRef with <seealso cref="EMPTY_BYTES" />
@@ -139,6 +150,36 @@ namespace Lucene.Net.Util
         }
 
         /// <summary>
+        ///     Initializes a new instance of <see cref="BytesRef" /> from the UTF8
bytes
+        ///     from the given <see cref="string" />.
+        /// </summary>
+        /// <param name="text">
+        ///     this must be well-formed
+        ///     unicode text, with no unpaired surrogates.
+        /// </param>
+        public BytesRef(char[] text)
+            : this()
+        {
+
+            CopyChars(text);
+        }
+
+
+        public void CopyChars(char[] text)
+        {
+            Debug.Assert(this.Offset == 0);
+            this.Grow(text.Length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
+            this.Length = UnicodeUtil.Utf16ToUtf8(text, 0, text.Length, this.Bytes);
+        }
+
+        public void CopyChars(IEnumerable<char> text)
+        {
+            Debug.Assert(this.Offset == 0);
+            var array = text as char[];
+            this.CopyChars(array ?? text.ToArray());
+        }
+
+        /// <summary>
         ///     Copies the UTF8 bytes for this string.
         /// </summary>
         /// <param name="text">
@@ -148,7 +189,8 @@ namespace Lucene.Net.Util
         public void CopyChars(CharsRef text)
         {
             Debug.Assert(this.Offset == 0);
-            UnicodeUtil.Utf16ToUtf8(text, 0, text.Length, this);
+            this.Grow(text.Length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR);
+            this.Length = UnicodeUtil.Utf16ToUtf8(text.Chars, 0, text.Length, this.Bytes);
         }
 
         /// <summary>
@@ -160,8 +202,7 @@ namespace Lucene.Net.Util
         /// </param>
         public void CopyChars(string text)
         {
-            Debug.Assert(this.Offset == 0);
-            UnicodeUtil.Utf16ToUtf8(text.ToCharArray(), 0, text.Length, this);
+            this.CopyChars(text.ToCharArray());
         }
 
         /// <summary>
@@ -233,11 +274,9 @@ namespace Lucene.Net.Util
             {
                 return false;
             }
-            if (other is BytesRef)
-            {
-                return this.BytesEquals((BytesRef) other);
-            }
-            return false;
+
+            var bytesRef = other as BytesRef;
+            return bytesRef != null && this.BytesEquals(bytesRef);
         }
 
         /// <summary>
@@ -246,9 +285,9 @@ namespace Lucene.Net.Util
         /// <returns>A utf16 string.</returns>
         public string Utf8ToString()
         {
-            var @ref = new CharsRef(Length);
-            UnicodeUtil.Utf8ToUtf16(this.Bytes, this.Offset, this.Length, @ref);
-            return @ref.ToString();
+            var charsRef = new CharsRef(this.Length);
+            charsRef.Length = UnicodeUtil.Utf8ToUtf16(this.Bytes, this.Offset, this.Length,
charsRef.Chars);
+            return charsRef.ToString();
         }
 
         /// <summary>
@@ -318,10 +357,10 @@ namespace Lucene.Net.Util
         /// <summary>
         ///     Used to grow the reference array.
         /// </summary>
-        internal protected virtual void Grow(int newLength)
+        internal protected virtual void Grow(int capacity)
         {
             Debug.Assert(this.Offset == 0); // NOTE: senseless if offset != 0
-            this.Bytes = ArrayUtil.Grow(this.Bytes, newLength);
+            this.Bytes = ArrayUtil.Grow(this.Bytes, capacity);
         }
 
         /// <summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/BytesRefArray.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/BytesRefArray.cs b/src/Lucene.Net.Core/Util/BytesRefArray.cs
index d6ebed8..10707b5 100644
--- a/src/Lucene.Net.Core/Util/BytesRefArray.cs
+++ b/src/Lucene.Net.Core/Util/BytesRefArray.cs
@@ -110,10 +110,10 @@ namespace Lucene.Net.Util
         /// <param name="spare"> a spare <seealso cref="BytesRef" /> instance
</param>
         /// <param name="index"> the elements index to retrieve </param>
         /// <returns> the <i>n'th</i> element of this <seealso cref="BytesRefArray"
/> </returns>
-        public BytesRef Retrieve(BytesRef spare, int index)
+        public BytesRef Retrieve(BytesRefBuilder spare, int index)
         {
             Debug.Assert(spare != null, "spare must never be null");
-            Debug.Assert(spare.Offset == 0);
+            //Debug.Assert(spare.Offset == 0);
 
             if (index > this.lastElement)
                 throw new IndexOutOfRangeException("index " + index + " must be less than
the size: " +
@@ -121,10 +121,10 @@ namespace Lucene.Net.Util
             var offset = this.offsets[index];
             var length = index == this.lastElement - 1 ? this.currentOffset - offset : this.offsets[index
+ 1] - offset;
 
-            spare.Grow(length);
+      
             spare.Length = length;
-            this.pool.ReadBytes(offset, spare.Bytes, spare.Offset, spare.Length);
-            return spare;
+            this.pool.ReadBytes(offset, spare.Bytes, 0, spare.Length);
+            return spare.ToBytesRef();
         }
 
         /// <summary>
@@ -181,9 +181,9 @@ namespace Lucene.Net.Util
             private BytesRefArray bytesRefArray;
             private IComparer<BytesRef> comparer;
             private int[] orderedEntries;
-            private BytesRef pivot;
-            private BytesRef scratch1;
-            private BytesRef scratch2;
+            private BytesRefBuilder pivot;
+            private BytesRefBuilder scratch1;
+            private BytesRefBuilder scratch2;
 
             /// <summary>
             /// Initializes a new instance of the <see cref="ByteRefArraySorter"/>
class.
@@ -196,9 +196,9 @@ namespace Lucene.Net.Util
                 this.bytesRefArray = outerInstance;
                 this.comparer = comp;
                 this.orderedEntries = orderedEntries;
-                pivot = new BytesRef();
-                scratch1 = new BytesRef();
-                scratch2 = new BytesRef();
+                pivot = new BytesRefBuilder();
+                scratch1 = new BytesRefBuilder();
+                scratch2 = new BytesRefBuilder();
             }
 
             /// <summary>
@@ -260,7 +260,7 @@ namespace Lucene.Net.Util
             protected internal override int ComparePivot(int j)
             {
                 var index = orderedEntries[j];
-                return this.comparer.Compare(this.pivot, bytesRefArray.Retrieve(this.scratch2,
index));
+                return this.comparer.Compare(this.pivot.ToBytesRef(), bytesRefArray.Retrieve(this.scratch2,
index));
             }
 
 
@@ -297,6 +297,7 @@ namespace Lucene.Net.Util
             private readonly int size;
             private BytesRefArray bytesRefArray;
             private int position;
+            private BytesRefBuilder builder;
 
             /// <summary>
             ///     Initializes a new instance of the <see cref="BytesRefEnumerator" />
class.
@@ -306,6 +307,7 @@ namespace Lucene.Net.Util
             /// <param name="indices">The indices.</param>
             public BytesRefEnumerator(BytesRefArray bytesRefArray,  int size, int[] indices)
             {
+                this.builder = new BytesRefBuilder();
                 this.bytesRefArray = bytesRefArray;
                 this.size = size;
                 this.indices = indices;
@@ -344,7 +346,7 @@ namespace Lucene.Net.Util
 
                 // return a new instance for each loop. 
                 var bytesRef = new BytesRef();
-                this.Current = bytesRefArray.Retrieve(bytesRef, indices == null ? position
: indices[position]);
+                this.Current = bytesRefArray.Retrieve(this.builder, indices == null ? position
: indices[position]);
                 return true;
             }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/BytesRefBuilder.cs b/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
new file mode 100644
index 0000000..23ca12c
--- /dev/null
+++ b/src/Lucene.Net.Core/Util/BytesRefBuilder.cs
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Util
+{
+    using System;
+    using System.Linq;
+    using Support;
+    using System.Collections.Generic;
+    using System.Diagnostics.CodeAnalysis;
+
+
+    /// <summary>
+    /// Class ByteRefBuilder.
+    /// </summary>
+    //
+    // Notes
+    //
+    // ReSharper disable CSharpWarnings::CS1574
+    public class BytesRefBuilder
+    {
+        private readonly BytesRef bytesRef;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="BytesRefBuilder"/> class.
+        /// </summary>
+        public BytesRefBuilder()
+        {
+            this.bytesRef = new BytesRef();
+        }
+
+        /// <summary>
+        /// Gets the bytes.
+        /// </summary>
+        /// <value>The bytes.</value>
+        public byte[] Bytes
+        {
+            get { return this.bytesRef.Bytes; }
+        }
+
+        /// <summary>
+        /// Gets or sets the length.
+        /// </summary>
+        /// <value>The length.</value>
+        public int Length
+        {
+            get { return this.bytesRef.Length; }
+            set
+            {
+                if(this.bytesRef.Bytes.Length < value)
+                    this.Grow(value);
+
+                this.bytesRef.Length = value;
+            }
+        }
+
+        /// <summary>
+        /// Appends the specified value.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        public void Append(byte value)
+        {
+            var next = this.bytesRef.Length + 1;
+            this.Length = next;
+            this.Bytes[next] = value;
+        }
+
+        /// <summary>
+        /// Appends the specified bytes.
+        /// </summary>
+        /// <param name="bytes">The bytes.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void Append(byte[] bytes, int offset, int length)
+        {
+            this.Length += length;
+            Array.Copy(bytes, offset, this.bytesRef.Bytes, this.bytesRef.Length, length);
+        }
+
+        /// <summary>
+        /// Appends the specified value.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        public void Append(BytesRef value)
+        {
+            this.Append(value.Bytes, value.Offset, value.Length);
+        }
+
+        /// <summary>
+        /// Appends the specified builder.
+        /// </summary>
+        /// <param name="builder">The builder.</param>
+        public void Append(BytesRefBuilder builder)
+        {
+            this.Append(builder.bytesRef);
+        }
+
+        /// <summary>
+        /// Bytes at.
+        /// </summary>
+        /// <param name="offset">The offset.</param>
+        /// <returns>System.Byte.</returns>
+        public byte ByteAt(int offset)
+        {
+            return this.Bytes[offset];
+        }
+
+        /// <summary>
+        /// Clears this instance.
+        /// </summary>
+        public void Clear()
+        {
+            this.Length = 0;
+        }
+
+        /// <summary>
+        /// Clears and replaces the internal bytes. Its a shorthand method for calling 
+        /// <see cref="Clear"/> and <see cref="Append(byte[],int, int)"/>.
+        /// </summary>
+        /// <param name="bytes">The bytes.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void CopyBytes(byte[] bytes, int offset, int length)
+        {
+            this.Clear();
+            this.Append(bytes, offset, length);
+        }
+
+        /// <summary>
+        /// Clears and replaces the internal bytes. Its a shorthand method for calling 
+        /// <see cref="Clear"/> and <see cref="Append(BytesRef)"/>.
+        /// </summary>
+        /// <param name="value">The value.</param>
+        public void CopyBytes(BytesRef value)
+        {
+            this.Clear();
+            this.Append(value);
+        }
+
+        /// <summary>
+        /// Copies the bytes.
+        /// </summary>
+        /// <param name="builder">The builder.</param>
+        public void CopyBytes(BytesRefBuilder builder)
+        {
+            this.Clear();
+            this.Append(builder);
+        }
+
+        /// <summary>
+        /// Copies the chars.
+        /// </summary>
+        /// <param name="text">The text.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void CopyChars(ICharSequence text, int offset = 0, int length = -1)
+        {
+            if(length == -1)
+                length = text.Length;
+
+            this.Length = length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
+            this.Length = UnicodeUtil.Utf16ToUtf8(text, offset, length, this.bytesRef.Bytes);
+        }
+
+        /// <summary>
+        /// Copies the chars.
+        /// </summary>
+        /// <param name="text">The text.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void CopyChars(string text, int offset = 0, int length = -1)
+        {
+            if (length == -1)
+                length = text.Length;
+
+            this.Length = length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
+            this.Length = UnicodeUtil.Utf16ToUtf8(text.ToCharArray(), offset, length, this.bytesRef.Bytes);
+        }
+
+        /// <summary>
+        /// Copies the chars.
+        /// </summary>
+        /// <param name="text">The text.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void CopyChars(char[] text, int offset = 0, int length = -1)
+        {
+            if (length == -1)
+                length = text.Length;
+
+            this.Length = length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
+            this.Length = UnicodeUtil.Utf16ToUtf8(text, offset, length, this.bytesRef.Bytes);
+        }
+
+        /// <summary>
+        /// Copies the chars.
+        /// </summary>
+        /// <param name="text">The text.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        public void CopyChars(IEnumerable<char> text, int offset = 0, int length =
-1)
+        {
+            // ReSharper disable PossibleMultipleEnumeration
+            if (length == -1)
+
+                length = text.Count();
+
+            this.Length = length * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR;
+            this.Length = UnicodeUtil.Utf16ToUtf8(text, offset, length, this.bytesRef.Bytes);
+        }
+
+        /// <inherits />
+        /// <exception cref="NotSupportedException">Throws when called.</exception>
+        [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations",
Justification = "Java Port Consistency")]
+        public override bool Equals(object obj)
+        {
+            throw new NotSupportedException();
+        }
+
+          /// <inherits />
+        /// <exception cref="NotSupportedException">Throws when called.</exception>
+        [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations",
Justification = "Java Port Consistency")]
+        public override int GetHashCode()
+        {
+            throw new NotSupportedException();
+        }
+
+        /// <summary>
+        /// Grows the specified capacity.
+        /// </summary>
+        /// <param name="capacity">The capacity.</param>
+        protected void Grow(int capacity)
+        {
+            this.bytesRef.Bytes = this.bytesRef.Bytes.Grow(capacity);
+        }
+
+
+
+        /// <summary>
+        /// To the bytes reference.
+        /// </summary>
+        /// <returns>BytesRef.</returns>
+        public BytesRef ToBytesRef()
+        {
+            var copy = this.Bytes.Copy(this.Length);
+            return new BytesRef(copy); 
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/CharsRef.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/CharsRef.cs b/src/Lucene.Net.Core/Util/CharsRef.cs
index 878bca1..aed89dc 100644
--- a/src/Lucene.Net.Core/Util/CharsRef.cs
+++ b/src/Lucene.Net.Core/Util/CharsRef.cs
@@ -20,6 +20,7 @@ namespace Lucene.Net.Util
     using System;
     using System.Collections.Generic;
     using System.Diagnostics;
+    using System.Linq;
     using Lucene.Net.Support;
     using ICloneable = Lucene.Net.Support.ICloneable; 
 
@@ -40,6 +41,8 @@ namespace Lucene.Net.Util
         ICloneable,
         IEnumerable<char>
     {
+        private int length;
+
         /// <summary>
         ///     An empty character array for convenience
         /// </summary>
@@ -58,7 +61,16 @@ namespace Lucene.Net.Util
         /// <summary>
         ///     Length of used characters.
         /// </summary>
-        public int Length { get; internal set; }
+        public int Length
+        {
+            get { return this.length; }
+            set
+            {
+                this.length = value;
+                if(this.Chars.Length < value)
+                    this.Grow(value);
+            }
+        }
 
 
         /// <summary>
@@ -77,6 +89,7 @@ namespace Lucene.Net.Util
         public CharsRef(int capacity)
         {
             this.Chars = new char[capacity];
+            
         }
 
         /// <summary>
@@ -227,13 +240,13 @@ namespace Lucene.Net.Util
         /// <summary>
         ///     Used to grow the reference array.
         /// </summary>
-        /// <param name="newLength">The minimum length to grow the internal array.</param>
-        internal void Grow(int newLength)
+        /// <param name="capacity">The minimum length to grow the internal array.</param>
+        internal void Grow(int capacity)
         {
             Debug.Assert(Offset == 0);
-            if (this.Chars.Length < newLength)
+            if (this.Chars.Length < capacity)
             {
-                this.Chars = ArrayUtil.Grow(this.Chars, newLength);
+                this.Chars = ArrayUtil.Grow(this.Chars, capacity);
             }
         }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/CharsRefBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/CharsRefBuilder.cs b/src/Lucene.Net.Core/Util/CharsRefBuilder.cs
index 16a96aa..eef8922 100644
--- a/src/Lucene.Net.Core/Util/CharsRefBuilder.cs
+++ b/src/Lucene.Net.Core/Util/CharsRefBuilder.cs
@@ -188,7 +188,7 @@ namespace Lucene.Net.Util
 
             this.Grow(length);
 
-            this.charsRef.Length = UnicodeUtil.Utf8ToUtf16(bytes, offset, length, this.charsRef);
+            this.charsRef.Length = UnicodeUtil.Utf8ToUtf16(bytes, offset, length, this.Chars);
         }
 
         /// <inherits />
@@ -203,9 +203,9 @@ namespace Lucene.Net.Util
         /// Resizes and increases the length of the reference array.
         /// </summary>
         /// <param name="minimumSize">The minimum size to grow the array.</param>
-        public void Grow(int minimumSize)
+        public void Grow(int capacity)
         {
-            this.charsRef.Chars = ArrayUtil.Grow(this.charsRef.Chars, minimumSize);
+            this.charsRef.Chars = ArrayUtil.Grow(this.charsRef.Chars, capacity);
         }
 
         /// <inherits />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/src/Lucene.Net.Core/Util/UnicodeUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/UnicodeUtil.cs b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
index 4e46ef9..363232f 100644
--- a/src/Lucene.Net.Core/Util/UnicodeUtil.cs
+++ b/src/Lucene.Net.Core/Util/UnicodeUtil.cs
@@ -10,16 +10,18 @@
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * WITHbytes WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 namespace Lucene.Net.Util
 {
+    using System;
     using System.Collections.Generic;
     using System.Diagnostics;
     using System.Linq;
+    using Lucene.Net.Support;
 
     /// <summary>
     ///     Utility methods for dealing with unicode.
@@ -31,6 +33,7 @@ namespace Lucene.Net.Util
         public const int UNI_SUR_LOW_START = 0xDC00;
         public const int UNI_SUR_LOW_END = 0xDFFF;
         public const int UNI_REPLACEMENT_CHAR = 0xFFFD;
+        public const int MAX_UTF8_BYTES_PER_CHAR = 4;
         private const int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
 
         private const long UNI_MAX_BMP = 0x0000FFFF;
@@ -39,89 +42,153 @@ namespace Lucene.Net.Util
         private const long HALF_MASK = 0x3FFL;
 
         private const int SURROGATE_OFFSET =
-            MIN_SUPPLEMENTARY_CODE_POINT - (UNI_SUR_HIGH_START << (int) HALF_SHIFT)
- UNI_SUR_LOW_START;
+            MIN_SUPPLEMENTARY_CODE_POINT - (UNI_SUR_HIGH_START << (int)HALF_SHIFT)
- UNI_SUR_LOW_START;
+
 
         /// <summary>
+        /// UTF16s to UTF8.
         /// </summary>
-        /// <param name="chars"></param>
-        /// <param name="offset"></param>
-        /// <param name="length"></param>
-        /// <param name="result"></param>
-        public static void Utf16ToUtf8(IEnumerable<char> chars, int offset, int length,
BytesRef result)
+        /// <param name="sequence">The chars.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        /// <param name="bytes">The bytes.</param>
+        /// <returns>System.Int32.</returns>
+        public static int Utf16ToUtf8(ICharSequence sequence, int offset, int length, byte[]
bytes)
         {
-            var end = offset + length;
+            int position = 0,
+                i = offset,
+                end = offset + length;
 
-            var @out = result.Bytes;
-            result.Offset = 0;
-            // Pre-allocate for worst case 4-for-1
-            var maxLen = length*4;
-            if (@out.Length < maxLen)
-            {
-                @out = result.Bytes = new byte[maxLen];
-            }
 
-            var currentOffset = 0;
+            // cast or convert. 
+            var source = sequence;
 
-            var move = offset < end;
+            if (bytes == null)
+                bytes = new byte[length];
 
-            if (move)
+            while (i < end)
             {
-                var list = chars.ToList();
-         
-                for (int i = offset; i < end; i++)
-                {
-                    var code = list[i];
-                    
+                int code = source.CharAt(i++);
 
-                    if (code < 0x80)
-                    {
-                        @out[currentOffset++] = (byte)code;
-                    }
-                    else if (code < 0x800)
-                    {
-                        @out[currentOffset++] = unchecked((byte)(0xC0 | (code >> 6)));
-                        @out[currentOffset++] = unchecked((byte)(0x80 | (code & 0x3F)));
-                    }
-                    else if (code < 0xD800 || code > 0xDFFF)
+                if (code < 0x80)
+                {
+                    bytes[position++] = (byte)code;
+                }
+                else if (code < 0x800)
+                {
+                    bytes[position++] = (byte)(0xC0 | (code >> 6));
+                    bytes[position++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else if (code < 0xD800 || code > 0xDFFF)
+                {
+                    bytes[position++] = (byte)(0xE0 | (code >> 12));
+                    bytes[position++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+                    bytes[position++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else
+                {
+                    // surrogate pair
+                    // confirm valid high surrogate
+                    if (code < 0xDC00 && i < end)
                     {
-                        @out[currentOffset++] = unchecked((byte)(0xE0 | (code >> 12)));
-                        @out[currentOffset++] = unchecked((byte)(0x80 | ((code >> 6)
& 0x3F)));
-                        @out[currentOffset++] = unchecked((byte)(0x80 | (code & 0x3F)));
+                        int utf32 = source.CharAt(i);
+                        // confirm valid low surrogate and write pair
+                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
+                        {
+                            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
+                            i++;
+                            bytes[position++] = (byte)(0xF0 | (utf32 >> 18));
+                            bytes[position++] = (byte)(0x80 | ((utf32 >> 12) &
0x3F));
+                            bytes[position++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+                            bytes[position++] = (byte)(0x80 | (utf32 & 0x3F));
+                            continue;
+                        }
                     }
-                    else
+                    // replace unpaired surrogate or bytes-of-order low surrogate
+                    // with substitution character
+                    bytes[position++] = (byte)0xEF;
+                    bytes[position++] = (byte)0xBF;
+                    bytes[position++] = (byte)0xBD;
+                }
+            }
+            //assert matches(source, offset, length, bytes, position);
+            return position;
+        }
+
+        /// <summary>
+        /// UTF16s to UTF8.
+        /// </summary>
+        /// <param name="chars">The chars.</param>
+        /// <param name="offset">The offset.</param>
+        /// <param name="length">The length.</param>
+        /// <param name="bytes">The bytes.</param>
+        /// <returns>System.Int32.</returns>
+        public static int Utf16ToUtf8(IEnumerable<char> chars, int offset, int length,
byte[] bytes)
+        {
+            int position = 0,
+                i = offset,
+                end = offset + length;
+
+
+            // cast or convert. 
+            var c = chars as char[];
+            var source = c ?? chars.ToArray();
+
+            if (bytes == null)
+                bytes = new byte[length];
+
+            while (i < end)
+            {
+                int code = source[i++];
+
+                if (code < 0x80)
+                {
+                    bytes[position++] = (byte)code;
+                }
+                else if (code < 0x800)
+                {
+                    bytes[position++] = (byte)(0xC0 | (code >> 6));
+                    bytes[position++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else if (code < 0xD800 || code > 0xDFFF)
+                {
+                    bytes[position++] = (byte)(0xE0 | (code >> 12));
+                    bytes[position++] = (byte)(0x80 | ((code >> 6) & 0x3F));
+                    bytes[position++] = (byte)(0x80 | (code & 0x3F));
+                }
+                else
+                {
+                    // surrogate pair
+                    // confirm valid high surrogate
+                    if (code < 0xDC00 && i < end)
                     {
-                        // surrogate pair
-                        // confirm valid high surrogate
-                        if (code < 0xDC00 && (i < end - 1))
+                        int utf32 = (int)source[i];
+                        // confirm valid low surrogate and write pair
+                        if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
                         {
-                            int utf32 = list[i + 1];
-                            // confirm valid low surrogate and write pair
-                            if (utf32 >= 0xDC00 && utf32 <= 0xDFFF)
-                            {
-                                utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
-                                i++;
-                                @out[currentOffset++] = unchecked((byte)(0xF0 | (utf32 >>
18)));
-                                @out[currentOffset++] = unchecked((byte)(0x80 | ((utf32 >>
12) & 0x3F)));
-                                @out[currentOffset++] = unchecked((byte)(0x80 | ((utf32 >>
6) & 0x3F)));
-                                @out[currentOffset++] = unchecked((byte)(0x80 | (utf32 &
0x3F)));
-                                continue;
-                            }
+                            utf32 = (code << 10) + utf32 + SURROGATE_OFFSET;
+                            i++;
+                            bytes[position++] = (byte)(0xF0 | (utf32 >> 18));
+                            bytes[position++] = (byte)(0x80 | ((utf32 >> 12) &
0x3F));
+                            bytes[position++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F));
+                            bytes[position++] = (byte)(0x80 | (utf32 & 0x3F));
+                            continue;
                         }
-                        // replace unpaired surrogate or out-of-order low surrogate
-                        // with substitution character
-                        @out[currentOffset++] = 0xEF;
-                        @out[currentOffset++] = 0xBF;
-                        @out[currentOffset++] = 0xBD;
                     }
+                    // replace unpaired surrogate or bytes-of-order low surrogate
+                    // with substitution character
+                    bytes[position++] = (byte)0xEF;
+                    bytes[position++] = (byte)0xBF;
+                    bytes[position++] = (byte)0xBD;
                 }
             }
-
-            
-            //assert matches(s, offset, length, out, otheroffset);
-            result.Length = currentOffset;
+            //assert matches(source, offset, length, bytes, position);
+            return position;
         }
 
 
+
+
         /// <summary>
         ///     Interprets the given byte array as UTF-8 and converts to UTF-16. The <seealso
cref="CharsRef" /> will be extended
         ///     if
@@ -130,58 +197,54 @@ namespace Lucene.Net.Util
         /// <remarks>
         ///     <para>
         ///         NOTE: Full characters are read, even if this reads past the length passed
(and
-        ///         can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
+        ///         can result in an ArraybytesOfBoundsException if invalid UTF-8 is passed).
         ///         Explicit checks for valid UTF-8 are not performed.
         ///     </para>
         /// </remarks>
         // TODO: broken if chars.offset != 0
-        public static int Utf8ToUtf16(byte[] utf8, int offset, int length, CharsRef chars)
+        public static int Utf8ToUtf16(byte[] utf8Bytes, int offset, int length, char[] chars)
         {
-            int outOffset = chars.Offset = 0,
-                limit = offset + length;
-
-            var @out = chars.Chars = chars.Chars.Grow(length);
-
+            int charsOffset = 0,
+               limit = offset + length;
             while (offset < limit)
             {
-                var b = utf8[offset++] & 0xff;
+                int b = utf8Bytes[offset++] & 0xff;
                 if (b < 0xc0)
                 {
                     Debug.Assert(b < 0x80);
-                    @out[outOffset++] = (char) b;
+                    chars[charsOffset++] = (char)b;
                 }
                 else if (b < 0xe0)
                 {
-                    @out[outOffset++] = (char) (((b & 0x1f) << 6) + (utf8[offset++]
& 0x3f));
+                    chars[charsOffset++] = (char)(((b & 0x1f) << 6) + (utf8Bytes[offset++]
& 0x3f));
                 }
                 else if (b < 0xf0)
                 {
-                    @out[outOffset++] =
-                        (char) (((b & 0xf) << 12) + ((utf8[offset] & 0x3f)
<< 6) + (utf8[offset + 1] & 0x3f));
+                    chars[charsOffset++] = (char)(((b & 0xf) << 12) + ((utf8Bytes[offset]
& 0x3f) << 6) + (utf8Bytes[offset + 1] & 0x3f));
                     offset += 2;
                 }
                 else
                 {
-                    Debug.Assert(b < 0xf8, "b = 0x" + b.ToString("x"));
+                    Debug.Assert(b < 0xf8, "b = 0x" + BitConverter.ToString(new[] { (byte)b
}));
 
-                    var ch = ((b & 0x7) << 18) + ((utf8[offset] & 0x3f) <<
12) + ((utf8[offset + 1] & 0x3f) << 6) +
-                             (utf8[offset + 2] & 0x3f);
+
+                    var ch = ((b & 0x7) << 18) + ((utf8Bytes[offset] & 0x3f)
<< 12) + ((utf8Bytes[offset + 1] & 0x3f) << 6) + (utf8Bytes[offset + 2] &
0x3f);
                     offset += 3;
                     if (ch < UNI_MAX_BMP)
                     {
-                        @out[outOffset++] = (char) ch;
+                        chars[charsOffset++] = (char)ch;
                     }
                     else
                     {
-                        var chHalf = ch - 0x0010000;
-                        @out[outOffset++] = (char) ((chHalf >> 10) + 0xD800);
-                        @out[outOffset++] = (char) ((chHalf & HALF_MASK) + 0xDC00);
+                        int chHalf = ch - 0x0010000;
+                        chars[charsOffset++] = (char)((chHalf >> 10) + 0xD800);
+                        chars[charsOffset++] = (char)((chHalf & HALF_MASK) + 0xDC00);
                     }
                 }
             }
-            chars.Length = outOffset - chars.Offset;
+            return charsOffset;
+
 
-            return chars.Length;
         }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/test/Lucene.Net.Core.Tests/Support/TestNumberExtensionMethods.cs
----------------------------------------------------------------------
diff --git a/test/Lucene.Net.Core.Tests/Support/TestNumberExtensionMethods.cs b/test/Lucene.Net.Core.Tests/Support/TestNumberExtensionMethods.cs
index d931fb0..86341e5 100644
--- a/test/Lucene.Net.Core.Tests/Support/TestNumberExtensionMethods.cs
+++ b/test/Lucene.Net.Core.Tests/Support/TestNumberExtensionMethods.cs
@@ -1,8 +1,19 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 namespace Lucene.Net.Support
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/test/Lucene.Net.Core.Tests/Util/TestByteArrayRef.cs
----------------------------------------------------------------------
diff --git a/test/Lucene.Net.Core.Tests/Util/TestByteArrayRef.cs b/test/Lucene.Net.Core.Tests/Util/TestByteArrayRef.cs
index 88993dc..f7b6897 100644
--- a/test/Lucene.Net.Core.Tests/Util/TestByteArrayRef.cs
+++ b/test/Lucene.Net.Core.Tests/Util/TestByteArrayRef.cs
@@ -39,27 +39,29 @@ namespace Lucene.Net.Util
                     stringList.Clear();
                 }
                 int entries = this.AtLeast(500);
-                BytesRef spare = new BytesRef();
+                var spare = new BytesRefBuilder();
                 int initSize = list.Length;
                 for (int i = 0; i < entries; i++)
                 {
                     string randomRealisticUnicodeString = random.RandomRealisticUnicodeString();
                     spare.CopyChars(randomRealisticUnicodeString);
-                    Equal(i + initSize, list.Append(spare));
+                    Equal(i + initSize, list.Append(spare.ToBytesRef()));
                     stringList.Add(randomRealisticUnicodeString);
                 }
                 for (int i = 0; i < entries; i++)
                 {
-                    NotNull(list.Retrieve(spare, i));
-                    Equal(stringList[i], spare.Utf8ToString(), "entry " + i + " doesn't match");
+                    var bytesRef = list.Retrieve(spare, i);
+                    NotNull(bytesRef);
+                    Equal(stringList[i], bytesRef.Utf8ToString(), "entry " + i + " doesn't
match");
                 }
 
                 // check random
                 for (int i = 0; i < entries; i++)
                 {
                     int e = random.Next(entries);
-                    NotNull(list.Retrieve(spare, e));
-                    Equal(stringList[e], spare.Utf8ToString(), "entry " + i + " doesn't match");
+                    var bytesRef = list.Retrieve(spare, e);
+                    NotNull(bytesRef);
+                    Equal(stringList[e], bytesRef.Utf8ToString(), "entry " + i + " doesn't
match");
                 }
                 for (int i = 0; i < 2; i++)
                 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e7e1043/test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs
----------------------------------------------------------------------
diff --git a/test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs b/test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs
index 02f5b34..35b789c 100644
--- a/test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs
+++ b/test/Lucene.Net.Core.Tests/Util/TestCharsRef.cs
@@ -29,11 +29,11 @@ namespace Lucene.Net.Util
     public class TestCharsRef : LuceneTestCase
     {
         [Test]
-        public void testUTF16InUTF8Order()
+        public void TestUtf16InUtf8Order()
         {
-            int iterations = this.AtLeast(1000);
-            BytesRef[] utf8 = new BytesRef[iterations];
-            CharsRef[] utf16 = new CharsRef[iterations];
+            var iterations = this.AtLeast(1000);
+            var utf8 = new BytesRef[iterations];
+            var utf16 = new CharsRef[iterations];
 
             iterations.Times((i) =>
             {
@@ -42,12 +42,12 @@ namespace Lucene.Net.Util
                 utf16[i] = new CharsRef(s);
             });
 
-            Array.Sort(utf8);
+            Array.Sort(utf8, BytesRef.Utf8SortedAsUnicodeComparer);
 #pragma warning disable 0612, 0618
             Array.Sort(utf16, CharsRef.Utf16SortedAsUtf8Comparer);
 #pragma warning restore 0612, 0618
 
-            iterations.Times((i) => Equal(utf8[i].Utf8ToString(), utf16[i].ToString()));
+            iterations.Times(i => Equal(utf8[i].Utf8ToString(), utf16[i].ToString()));
         }
 
 


Mime
View raw message