lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject git commit: finishing implementing Broadword with tests. Ommiting BroadWord methods and constants that are not used throughout core or in external dependencies.
Date Sat, 09 Aug 2014 05:42:22 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/pcl 35e495187 -> 8e514a42b


finishing implementing Broadword with tests. Ommiting BroadWord methods and constants that
are not used throughout core or in external dependencies.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/8e514a42
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/8e514a42
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/8e514a42

Branch: refs/heads/pcl
Commit: 8e514a42b899e46fc9e28e46aa310ffae27842e9
Parents: 35e4951
Author: Michael Herndon <mherndon@michaelherndon.com>
Authored: Sat Aug 9 01:41:24 2014 -0400
Committer: Michael Herndon <mherndon@michaelherndon.com>
Committed: Sat Aug 9 01:41:24 2014 -0400

----------------------------------------------------------------------
 .../Support/NumberExtensionMethods.cs           |  29 ++-
 src/Lucene.Net.Core/Util/BroadWord.cs           | 123 ++++++++++---
 .../Lucene.Net.Core.Tests/Util/TestBroadWord.cs | 175 ++++++++++++++++++-
 .../Util/TestAttribute.cs                       |  48 +++++
 4 files changed, 343 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e514a42/src/Lucene.Net.Core/Support/NumberExtensionMethods.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/NumberExtensionMethods.cs b/src/Lucene.Net.Core/Support/NumberExtensionMethods.cs
index 7d610fb..c115bdc 100644
--- a/src/Lucene.Net.Core/Support/NumberExtensionMethods.cs
+++ b/src/Lucene.Net.Core/Support/NumberExtensionMethods.cs
@@ -1,11 +1,30 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
 
 namespace Lucene.Net.Support
 {
+    using System;
+
+    /// <summary>
+    /// Extension methods for numeric types to enable the same functionality that
+    /// currently exists in the standard java libraries. 
+    /// </summary>
     public static class NumberExtensionMethods
     {
         /// <summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e514a42/src/Lucene.Net.Core/Util/BroadWord.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/BroadWord.cs b/src/Lucene.Net.Core/Util/BroadWord.cs
index 89e0c72..de2553b 100644
--- a/src/Lucene.Net.Core/Util/BroadWord.cs
+++ b/src/Lucene.Net.Core/Util/BroadWord.cs
@@ -1,19 +1,19 @@
 /*
-    * Licensed to the Apache Software Foundation (ASF) under one or more
-    * contributor license agreements.  See the NOTICE file distributed with
-    * this work for additional information regarding copyright ownership.
-    * The ASF licenses this file to You under the Apache License, Version 2.0
-    * (the "License"); you may not use this file except in compliance with
-    * the License.  You may obtain a copy of the License at
-    *
-    *     http://www.apache.org/licenses/LICENSE-2.0
-    *
-    * Unless required by applicable law or agreed to in writing, software
-    * distributed under the License is distributed on an "AS IS" BASIS,
-    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    * See the License for the specific language governing permissions and
-    * limitations under the License.
-    */
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 
 
@@ -35,43 +35,47 @@ namespace Lucene.Net.Util
     ///         Experimental Algorithms: 7th International Workshop, WEA 2008 Provincetown,
MA, USA, May 30 - June 1, 2008 Proceedings (Lecture Notes in Computer ... Computer Science
and General Issues) 
     ///     </see>
     ///     </para>
+    ///     
     /// </remarks>
-    public sealed class BroadWord
+    public static class BroadWord
     {
+        // NOTES:
+        // DO NOT PORT: smalleru_8, notEquals0_8, smallerUpto15_16, 
+        // smalleru8 & notEquals0_8 are used for testing, smallerUpto15_16 is not used
at all.
 
-        private BroadWord() // no instance
-        {
-        }
+        // DO NOT MAKE CONSTS PUBLIC. ulongs are not CLSCompliant.
+        // The constants in the Java Lucene version are only used in the test case. 
+        // Thus, there is not a valid reason to make them public.  
 
         /// <summary>
         ///  L8  denotes the constant of 8-byte-counts or 8k.
         ///  _L denotes that the number is an long format. 
         /// </summary>
-        public const long L8_L = 0x0101010101010101L;
+        private const ulong L8_L = 0x0101010101010101L;
 
         /// <summary>
         ///  L9  denotes the constant of 8-byte-counts or 9k.
         ///  _L denotes that the number is an long format. 
         /// </summary>
-        public const long L9_L = unchecked((long)0x8040201008040201L);
+        private const ulong L9_L = 0x8040201008040201L;
 
         /// <summary>
         ///  L16  denotes the constant of 16-byte-counts or 16k.
         ///  _L denotes that the number is an long format. 
         /// </summary>
-        public const long L16_L = 0x0001000100010001L;
+        private const ulong L16_L = 0x0001000100010001L;
 
         /// <summary>
         /// H8 = L8 << (8-1) .
         ///  These contain the high bit of each group of k bits.
         ///  The suffix _L indicates the long implementation.
         /// </summary>
-        public static readonly long H8_L = L8_L << 7;
+        private static readonly ulong H8_L = L8_L << 7;
 
         /// H16 = L16 << (16-1) .
         ///  These contain the high bit of each group of k bits.
         ///  The suffix _L indicates the long implementation.
-        public static readonly long H16_L = L16_L << 15;
+        private static readonly ulong H16_L = L16_L << 15;
 
         /// <summary>
         /// Bit count of a long.
@@ -92,5 +96,76 @@ namespace Lucene.Net.Util
         }
 
        
+         /// <summary>
+        /// Select a 1-bit from a long. </summary>
+        /// <returns> The index of the right most 1 bit in x, or if no such bit exists,
72. </returns>
+        public static int Select(long value, int rank)
+        {
+            
+            ulong x = (ulong)value;
+            ulong s = x;
+            uint r = (uint)rank;
+            s =  s - ((s & 0xAAAAAAAAAAAAAAAAL) >> 1); // Step 0, pairwise bitsums
+
+            // Correct a small mistake in algorithm 2:
+            // Use s instead of x the second time in right shift 2, compare to Algorithm
1 in rank9 above.
+            s =  (s & 0x3333333333333333L) + ((s >> 2) & 0x3333333333333333L);
// Step 1, nibblewise bitsums
+
+            s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L;; // Step 2, bytewise
bitsums
+
+            // second argument of  << must be an int.  
+            var b = (ulong)((SmallerUpTo7_8(s, (r * L8_L)) >> 7) * L8_L) >> 53;
// & (~7L); // Step 3, side ways addition for byte number times 8
+
+            ulong l = r - (((s << 8) >> (int)b) & 0xFFL); // Step 4, byte
wise rank, subtract the rank with byte at b-8, or zero for b=0;
+            Debug.Assert(0L <= 1);
+            //assert l < 8 : l; //fails when bit r is not available.
+
+            // Select bit l from byte (x >>> b):
+            ulong spr = (((x >> (int)b) & 0xFFL) * L8_L) & L9_L; // spread
the 8 bits of the byte at b over the long at L9 positions
+
+            // long spr_bigger8_zero = smaller8(0L, spr); // inlined smaller8 with 0L argument:
+            // FIXME: replace by biggerequal8_one formula from article page 6, line 9. four
operators instead of five here.
+            ulong spr_bigger8_zero = ((H8_L - (spr & (~H8_L))) ^ (~spr)) & H8_L;
+            s =(spr_bigger8_zero >> 7) * L8_L; // Step 5, sideways byte add the 8 bits
towards the high byte
+
+            int res = (int) (b + (((SmallerUpTo7_8(s, (l * L8_L)) >> 7) * L8_L) >>
56)); // Step 6
+            return res; 
+           
+        }
+
+     
+        /// <summary>
+        /// A signed bytewise smaller &lt;<sub><small>8</small></sub>
operator, for operands 0L<= x, y <=0x7L.
+        /// this uses the following numbers of basic long operations: 1 or, 2 and, 2 xor,
1 minus, 1 not. </summary>
+        /// <returns> A long with bits set in the <seealso cref="#H8_L"/> positions
corresponding to each input signed byte pair that compares smaller. </returns>
+        private static ulong SmallerUpTo7_8(ulong x, ulong y)
+        {
+            // See section 4, page 5, line 14 of the Vigna article:
+            return (((x | H8_L) - (y & (~H8_L))) ^ x ^ ~y) & H8_L;
+        }
+
+     
+       
+
+
+
+        /// <summary>
+        /// Naive implementation of <seealso cref="#select(long,int)"/>, using <seealso
cref="Long#numberOfTrailingZeros"/> repetitively.
+        /// Works relatively fast for low ranks. </summary>
+        /// <returns> The index of the r-th 1 bit in x, or if no such bit exists, 72.
</returns>
+        public static int SelectNaive(long x, int r)
+        {
+            Debug.Assert(r >= 1);
+            int s = -1;
+            while ((x != 0L) && (r > 0))
+            {
+                int ntz = x.NumberOfTrailingZeros();
+                x = (long)((ulong)x >> (ntz + 1));
+                s += (ntz + 1);
+                r -= 1;
+            }
+            int res = (r > 0) ? 72 : s;
+            return res;
+        }
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e514a42/test/Lucene.Net.Core.Tests/Util/TestBroadWord.cs
----------------------------------------------------------------------
diff --git a/test/Lucene.Net.Core.Tests/Util/TestBroadWord.cs b/test/Lucene.Net.Core.Tests/Util/TestBroadWord.cs
index bca2806..db6bbe3 100644
--- a/test/Lucene.Net.Core.Tests/Util/TestBroadWord.cs
+++ b/test/Lucene.Net.Core.Tests/Util/TestBroadWord.cs
@@ -1,15 +1,30 @@
-
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 namespace Lucene.Net.Util
 {
     using System;
-
     using Lucene.Net.Support;
 
 
     public class TestBroadWord : LuceneTestCase
     {
-
+  
+        private const long delta = unchecked((long)0xFFFFFFFFFFFFFFFFL);
 
         [Test]
         public void TestRank()
@@ -22,11 +37,165 @@ namespace Lucene.Net.Util
             AssertRank(unchecked((long)0x8000000000000001L));
         }
 
+        [Test]
+        public void TestSelectFromZero()
+        {
+            AssertSelect(72, 0L, 1);
+        }
+
+        [Test]
+        public void TestSelectSingleBit()
+        {
+            for(var i = 0; i < 64; i++)
+            {
+                AssertSelect(i, (1L << i), 1);
+            }
+        }
 
+        [Test]
+        public void TestSelectTwoBits()
+        {
+            for (int i = 0; i < 64; i++)
+            {
+                for (int j = i + 1; j < 64; j++)
+                {
+                    long x = (1L << i) | (1L << j);
+                    //System.out.println(getName() + " i: " + i + " j: " + j);
+                    AssertSelect(i, x, 1);
+                    AssertSelect(j, x, 2);
+                    AssertSelect(72, x, 3);
+                }
+            }
+        }
+
+        [Test]
+        public void TestSelectThreeBits()
+        {
+            for (int i = 0; i < 64; i++)
+            {
+                for (int j = i + 1; j < 64; j++)
+                {
+                    for (int k = j + 1; k < 64; k++)
+                    {
+                        long x = (1L << i) | (1L << j) | (1L << k);
+                        AssertSelect(i, x, 1);
+                        AssertSelect(j, x, 2);
+                        AssertSelect(k, x, 3);
+                        AssertSelect(72, x, 4);
+                    }
+                }
+            }
+        }
+
+        [Test]
+        public void TestSelectAllBits()
+        {
+            for (int i = 0; i < 64; i++)
+            {
+                var l = delta;
+                AssertSelect(i, l, i + 1);
+            }
+        }
+
+        [Test]
+        [Perf] // TODO: implement a real performance test.
+        public void TestPerfSelectAllBitsBroad()
+        {
+            for (int j = 0; j < 100000; j++)
+            { // 1000000 for real perf test
+                for (int i = 0; i < 64; i++)
+                {
+                    var l = delta;
+                    Equal(i, BroadWord.Select(l, i + 1));
+                }
+            }
+        }
+
+        [Test][Perf]
+        public void TestPerfSelectAllBitsNaive()
+        {
+            for (int j = 0; j < 10000; j++)
+            { // real perftest: 1000000
+                for (int i = 0; i < 64; i++)
+                {
+                    var l = delta;
+                    Equal(i, BroadWord.SelectNaive(l, i + 1));
+                }
+            }
+        }
+
+        private void AssertSelect(int expected, long value, int rank)
+        { 
+            var actual = BroadWord.Select(value, rank);
+            Ok(expected == actual, "Expected {0} does not equal {3} = Select({1}, {2})",
expected, value, rank, actual);
+        }
 
         private void AssertRank(long value)
         {
             Equal(BitUtil.BitCount(value), BroadWord.BitCount(value));
         }
+
+        #region these methods exist, but are not used in the Lucene Code Base
+
+        /// <summary>
+        ///  L8  denotes the constant of 8-byte-counts or 8k.
+        ///  _L denotes that the number is an long format. 
+        /// </summary>
+        private const ulong L8_L = 0x0101010101010101L;
+
+        /// <summary>
+        ///  L9  denotes the constant of 8-byte-counts or 9k.
+        ///  _L denotes that the number is an long format. 
+        /// </summary>
+        private const ulong L9_L = 0x8040201008040201L;
+
+        /// <summary>
+        ///  L16  denotes the constant of 16-byte-counts or 16k.
+        ///  _L denotes that the number is an long format. 
+        /// </summary>
+        private const ulong L16_L = 0x0001000100010001L;
+
+        /// <summary>
+        /// H8 = L8 << (8-1) .
+        ///  These contain the high bit of each group of k bits.
+        ///  The suffix _L indicates the long implementation.
+        /// </summary>
+        private static readonly ulong H8_L = L8_L << 7;
+
+        /// H16 = L16 << (16-1) .
+        ///  These contain the high bit of each group of k bits.
+        ///  The suffix _L indicates the long implementation.
+        private static readonly ulong H16_L = L16_L << 15;
+
+
+        /// <summary>
+        /// An unsigned bytewise smaller &lt;<sub><small>8</small></sub>
operator.
+        /// this uses the following numbers of basic long operations: 3 or, 2 and, 2 xor,
1 minus, 1 not. </summary>
+        /// <returns> A long with bits set in the <seealso cref="#H8_L"/> positions
corresponding to each input unsigned byte pair that compares smaller. </returns>
+        private static ulong Smalleru_8(ulong x, ulong y)
+        {
+            // See section 4, 8th line from the bottom of the page 5, of the Vigna article:
+            return ((((x | H8_L) - (y & ~H8_L)) | x ^ y) ^ (x | ~y)) & H8_L;
+        }
+
+        /// <summary>
+        /// An unsigned bytewise not equals 0 operator.
+        /// this uses the following numbers of basic long operations: 2 or, 1 and, 1 minus.
</summary>
+        /// <returns> A long with bits set in the <seealso cref="#H8_L"/> positions
corresponding to each unsigned byte that does not equal 0. </returns>
+        private static ulong NotEquals0_8(ulong x)
+        {
+            // See section 4, line 6-8 on page 6, of the Vigna article:
+            return (((x | H8_L) - L8_L) | x) & H8_L;
+        }
+
+        /// <summary>
+        /// A bytewise smaller &lt;<sub><small>16</small></sub>
operator.
+        /// this uses the following numbers of basic long operations: 1 or, 2 and, 2 xor,
1 minus, 1 not. </summary>
+        /// <returns> A long with bits set in the <seealso cref="#H16_L"/> positions
corresponding to each input signed short pair that compares smaller. </returns>
+        private static ulong SmallerUpto15_16(ulong x, ulong y)
+        {
+            return (((x | H16_L) - (y & (~H16_L))) ^ x ^ ~y) & H16_L;
+        }
+        #endregion
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8e514a42/test/Lucene.Net.TestFramework/Util/TestAttribute.cs
----------------------------------------------------------------------
diff --git a/test/Lucene.Net.TestFramework/Util/TestAttribute.cs b/test/Lucene.Net.TestFramework/Util/TestAttribute.cs
index 161b075..0d9e0c1 100644
--- a/test/Lucene.Net.TestFramework/Util/TestAttribute.cs
+++ b/test/Lucene.Net.TestFramework/Util/TestAttribute.cs
@@ -19,8 +19,15 @@
  *
  */
 
+
 namespace Lucene.Net.Util
 {
+    using System;
+    using System.Collections.Generic;
+    using System.Linq;
+    using Xunit.Abstractions;
+    using Xunit.Sdk;
+
     /// <summary>
     /// Summary description for TestAttribute
     /// </summary>
@@ -40,4 +47,45 @@ namespace Lucene.Net.Util
 	    {
 	    }
     }
+
+    [AttributeUsage(AttributeTargets.Method, AllowMultiple = true)]
+    public class PerfAttribute : CategoryAttribute
+    {
+
+        public PerfAttribute():base("Performance")
+        {
+
+        }
+    }
+
+    [TraitDiscoverer("CategoryDiscoverer", "TraitExtensibility")]
+    [AttributeUsage(AttributeTargets.Method, AllowMultiple = true)]
+    public class CategoryAttribute : System.Attribute, Xunit.Sdk.ITraitAttribute
+    {
+        public string Name { get; private set; }
+
+        public CategoryAttribute(string category)
+        {
+            this.Name = category;
+        }
+    }
+
+
+    /// <summary>
+    /// This class discovers all of the tests and test classes that have
+    /// applied the Category attribute
+    /// </summary>
+    public class CategoryDiscoverer : ITraitDiscoverer
+    {
+        /// <summary>
+        /// Gets the trait values from the Category attribute.
+        /// </summary>
+        /// <param name="traitAttribute">The trait attribute containing the trait values.</param>
+        /// <returns>The trait values.</returns>
+        public IEnumerable<KeyValuePair<string, string>> GetTraits(IAttributeInfo
traitAttribute)
+        {
+            var ctorArgs = traitAttribute.GetConstructorArguments().ToList();
+            yield return new KeyValuePair<string, string>("Category", ctorArgs[0].ToString());
+        }
+    }
 }
\ No newline at end of file


Mime
View raw message