tuscany-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jsdelf...@apache.org
Subject svn commit: r933115 - in /tuscany/sca-cpp/trunk/kernel: Makefile.am hash-test hash-test.cpp hash.hpp
Date Mon, 12 Apr 2010 06:57:32 GMT
Author: jsdelfino
Date: Mon Apr 12 06:57:32 2010
New Revision: 933115

URL: http://svn.apache.org/viewvc?rev=933115&view=rev
Log:
Add various utility hash functions, useful to hash and retrieve keys from a datastore.

Added:
    tuscany/sca-cpp/trunk/kernel/hash-test   (with props)
    tuscany/sca-cpp/trunk/kernel/hash-test.cpp
    tuscany/sca-cpp/trunk/kernel/hash.hpp
Modified:
    tuscany/sca-cpp/trunk/kernel/Makefile.am

Modified: tuscany/sca-cpp/trunk/kernel/Makefile.am
URL: http://svn.apache.org/viewvc/tuscany/sca-cpp/trunk/kernel/Makefile.am?rev=933115&r1=933114&r2=933115&view=diff
==============================================================================
--- tuscany/sca-cpp/trunk/kernel/Makefile.am (original)
+++ tuscany/sca-cpp/trunk/kernel/Makefile.am Mon Apr 12 06:57:32 2010
@@ -38,6 +38,8 @@ xml_test_LDFLAGS = -lxml2
 xsd_test_SOURCES = xsd-test.cpp
 xsd_test_LDFLAGS = -lxml2
 
-noinst_PROGRAMS = string-test kernel-test mem-test parallel-test xml-test xsd-test
-TESTS = string-test kernel-test mem-test parallel-test xml-test
+hash_test_SOURCES = hash-test.cpp
+
+noinst_PROGRAMS = string-test kernel-test hash-test mem-test parallel-test xml-test xsd-test
+TESTS = string-test kernel-test hash-test mem-test parallel-test xml-test
 

Added: tuscany/sca-cpp/trunk/kernel/hash-test
URL: http://svn.apache.org/viewvc/tuscany/sca-cpp/trunk/kernel/hash-test?rev=933115&view=auto
==============================================================================
Files tuscany/sca-cpp/trunk/kernel/hash-test (added) and tuscany/sca-cpp/trunk/kernel/hash-test
Mon Apr 12 06:57:32 2010 differ

Propchange: tuscany/sca-cpp/trunk/kernel/hash-test
------------------------------------------------------------------------------
    svn:executable = *

Added: tuscany/sca-cpp/trunk/kernel/hash-test.cpp
URL: http://svn.apache.org/viewvc/tuscany/sca-cpp/trunk/kernel/hash-test.cpp?rev=933115&view=auto
==============================================================================
--- tuscany/sca-cpp/trunk/kernel/hash-test.cpp (added)
+++ tuscany/sca-cpp/trunk/kernel/hash-test.cpp Mon Apr 12 06:57:32 2010
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+/**
+ * Test hash functions.
+ */
+
+#include <assert.h>
+#include "string.hpp"
+#include "hash.hpp"
+#include "perf.hpp"
+
+namespace tuscany {
+
+bool testCrc32hash() {
+    const string key("This is a test key");
+    unsigned int h = crc32hash(c_str(key), length(key));
+    assert(h != 0);
+    return true;
+}
+
+bool testTimes33hash() {
+    const string key("This is a test key");
+    unsigned int h = times33hash(c_str(key), length(key));
+    assert(h != 0);
+    return true;
+}
+
+bool testMurmurhash() {
+    const string key("This is a test key");
+    unsigned int h = murmurhash(c_str(key), length(key));
+    assert(h != 0);
+    return true;
+}
+
+bool testPortablemurmurhash() {
+    const string key("This is a test key");
+    unsigned int h = portablemurmurhash(c_str(key), length(key));
+    assert(h != 0);
+    return true;
+}
+
+struct crc32hashTest {
+    const string key;
+    crc32hashTest(const string& key) : key(key) {
+    }
+    bool operator()() const {
+        unsigned int h = crc32hash(c_str(key), length(key));
+        assert(h != 0);
+        return true;
+    }
+};
+
+struct times33hashTest {
+    const string key;
+    times33hashTest(const string& key) : key(key) {
+    }
+    bool operator()() const {
+        unsigned int h = times33hash(c_str(key), length(key));
+        assert(h != 0);
+        return true;
+    }
+};
+
+struct murmurhashTest {
+    const string key;
+    murmurhashTest(const string& key) : key(key) {
+    }
+    bool operator()() const {
+        unsigned int h = murmurhash(c_str(key), length(key));
+        assert(h != 0);
+        return true;
+    }
+};
+
+struct portablemurmurhashTest {
+    const string key;
+    portablemurmurhashTest(const string& key) : key(key) {
+    }
+    bool operator()() const {
+        unsigned int h = portablemurmurhash(c_str(key), length(key));
+        assert(h != 0);
+        return true;
+    }
+};
+
+bool testHashPerf() {
+    const string key("This is a test key");
+    const int count = 100000;
+
+    const lambda<bool()> crc32 = crc32hashTest(key);
+    cout << "crc32hash test " << time(crc32, 5, count) << " ms" <<
endl;
+    const lambda<bool()> times33 = times33hashTest(key);
+    cout << "times33hash test " << time(times33, 5, count) << " ms" <<
endl;
+    const lambda<bool()> murmur = murmurhashTest(key);
+    cout << "murmurhash test " << time(murmur, 5, count) << " ms" <<
endl;
+    const lambda<bool()> portablemurmur = portablemurmurhashTest(key);
+    cout << "portable murmurhash test " << time(portablemurmur, 5, count) <<
" ms" << endl;
+
+    return true;
+}
+
+}
+
+int main() {
+    tuscany::cout << "Testing..." << tuscany::endl;
+
+    tuscany::testCrc32hash();
+    tuscany::testTimes33hash();
+    tuscany::testMurmurhash();
+    tuscany::testPortablemurmurhash();
+    tuscany::testHashPerf();
+
+    tuscany::cout << "OK" << tuscany::endl;
+
+    return 0;
+}

Added: tuscany/sca-cpp/trunk/kernel/hash.hpp
URL: http://svn.apache.org/viewvc/tuscany/sca-cpp/trunk/kernel/hash.hpp?rev=933115&view=auto
==============================================================================
--- tuscany/sca-cpp/trunk/kernel/hash.hpp (added)
+++ tuscany/sca-cpp/trunk/kernel/hash.hpp Mon Apr 12 06:57:32 2010
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+#ifndef tuscany_hash_hpp
+#define tuscany_hash_hpp
+
+/**
+ * Fast hash functions.
+ */
+
+#include <apr_hash.h>
+#include <apr_memcache.h>
+
+namespace tuscany
+{
+
+/**
+ * Apache apr-util CRC32 hash function.
+ *
+ * See srclib/apr-util/memcache/apr_memcache.c from the Apache HTTPD
+ * source tree. Reproducing the comments from apr_memcache.c here:
+ *
+ * The crc32 functions and data were originally written by Spencer
+ * Garrett <srg@quick.com> and were gleaned from the PostgreSQL source
+ * tree at contrib/ltree/crc32.[ch] and from FreeBSD at
+ * src/usr.bin/cksum/crc32.c.
+ */
+const unsigned int crc32hash(const char* data, const unsigned int len) {
+    return apr_memcache_hash_default(NULL, data, len);
+}
+
+/**
+ * Apache apr tables default hash function.
+ *
+ * See srclib/apr/tables/apr_hash.c from the Apache HTTPD source tree.
+ * Reproducing the comments from apr_hash.c here:
+ *
+ * This is the popular `times 33' hash algorithm which is used by
+ * perl and also appears in Berkeley DB. This is one of the best
+ * known hash functions for strings because it is both computed
+ * very fast and distributes very well.
+ *
+ * The originator may be Dan Bernstein but the code in Berkeley DB
+ * cites Chris Torek as the source. The best citation I have found
+ * is "Chris Torek, Hash function for text in C, Usenet message
+ * <27038@mimsy.umd.edu> in comp.lang.c , October, 1990." in Rich
+ * Salz's USENIX 1992 paper about INN which can be found at
+ * <http://citeseer.nj.nec.com/salz92internetnews.html>.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as I did while writing a low-level
+ * data structure library some time ago) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well.
+ * They all distribute in an acceptable way and this way fill a hash
+ * table with an average percent of approx. 86%.
+ *
+ * If one compares the chi^2 values of the variants (see
+ * Bob Jenkins ``Hashing Frequently Asked Questions'' at
+ * http://burtleburtle.net/bob/hash/hashfaq.html for a description
+ * of chi^2), the number 33 not even has the best value. But the
+ * number 33 and a few other equally good numbers like 17, 31, 63,
+ * 127 and 129 have nevertheless a great advantage to the remaining
+ * numbers in the large set of possible multipliers: their multiply
+ * operation can be replaced by a faster operation based on just one
+ * shift plus either a single addition or subtraction operation. And
+ * because a hash function has to both distribute good _and_ has to
+ * be very fast to compute, those few numbers should be preferred.
+ *
+ *                  -- Ralf S. Engelschall <rse@engelschall.com>
+ */
+const unsigned int times33hash(const char* data, const unsigned int len) {
+    apr_ssize_t l = len;
+    return apr_hashfunc_default(data, &l);
+}
+
+/**
+ * A very fast, non-cryptographic hash suitable for general hash-based
+ * lookup. See http://murmurhash.googlepages.com/ for more details.
+ *
+ * Original code by Austin Appleby, released to the public domain and under
+ * the MIT license.
+ *
+ * Compiles down to ~52 instructions on x86.
+ * Passes chi^2 tests for practically all keysets & bucket sizes.
+ * Excellent avalanche behavior. Maximum bias is under 0.5%.
+ * Passes Bob Jenkin's frog.c torture-test. No collisions possible for 4 byte
+ * keys, no small 1 to 7 bit differentials.
+ */
+const unsigned int murmurhash(const char* key, const unsigned int klen) {
+    unsigned int len = klen;
+    const unsigned int seed = 0;
+
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    // Initialize the hash to a 'random' value
+    unsigned int h = seed ^ len;
+
+    // Mix 4 bytes at a time into the hash
+    const unsigned char* data = (const unsigned char*)key;
+    while(len >= 4) {
+        unsigned int k = *(unsigned int*)data;
+        k *= m; 
+        k ^= k >> r; 
+        k *= m; 
+        h *= m; 
+        h ^= k;
+        data += 4;
+        len -= 4;
+    }
+    
+    // Handle the last few bytes of the input array
+    switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+            h *= m;
+    };
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+} 
+
+/**
+ * An endian and alignment neutral, but half the speed, version of
+ * the murmur hash.
+ */
+const unsigned int portablemurmurhash(const char* key, const unsigned int klen) {
+    unsigned int len = klen;
+    const unsigned int seed = 0;
+
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    // Initialize the hash to a 'random' value
+    unsigned int h = seed ^ len;
+
+    // Mix 4 bytes at a time into the hash
+    const unsigned char* data = (const unsigned char *)key;
+    while(len >= 4) {
+        unsigned int k;
+        k  = data[0];
+        k |= data[1] << 8;
+        k |= data[2] << 16;
+        k |= data[3] << 24;
+        k *= m; 
+        k ^= k >> r; 
+        k *= m;
+        h *= m;
+        h ^= k;
+        data += 4;
+        len -= 4;
+    }
+    
+    // Handle the last few bytes of the input array
+    switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+            h *= m;
+    };
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+} 
+
+const unsigned int hashselect(const unsigned int hash, const unsigned int max) {
+    return hash % max;
+}
+
+}
+#endif /* tuscany_hash_hpp */



Mime
View raw message