lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [01/14] lucy-clownfish git commit: Implement pointer-to-pointer hash table
Date Sat, 19 Mar 2016 17:27:11 GMT
Repository: lucy-clownfish
Updated Branches:
  refs/heads/master f5139fff5 -> fa009460e


Implement pointer-to-pointer hash table


Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/6c8dbb36
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/6c8dbb36
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/6c8dbb36

Branch: refs/heads/master
Commit: 6c8dbb36de4171950264c54ea813f1bba275bc61
Parents: 20eef8f
Author: Nick Wellnhofer <wellnhofer@aevum.de>
Authored: Tue Mar 8 11:54:30 2016 +0100
Committer: Nick Wellnhofer <wellnhofer@aevum.de>
Committed: Thu Mar 10 14:28:49 2016 +0100

----------------------------------------------------------------------
 runtime/core/Clownfish/PtrHash.c            | 226 +++++++++++++++++++++++
 runtime/core/Clownfish/PtrHash.h            |  53 ++++++
 runtime/core/Clownfish/Test.c               |   2 +
 runtime/core/Clownfish/Test/TestPtrHash.c   | 108 +++++++++++
 runtime/core/Clownfish/Test/TestPtrHash.cfh |  29 +++
 runtime/perl/t/core/050-ptrhash.t           |  23 +++
 6 files changed, 441 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/PtrHash.c
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/PtrHash.c b/runtime/core/Clownfish/PtrHash.c
new file mode 100644
index 0000000..9d42622
--- /dev/null
+++ b/runtime/core/Clownfish/PtrHash.c
@@ -0,0 +1,226 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <limits.h>
+#include <stddef.h>
+
+#include "charmony.h"
+
+#define CFISH_USE_SHORT_NAMES
+#include "Clownfish/PtrHash.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Util/Memory.h"
+
+#undef PTRHASH_STATS
+
+#ifdef PTRHASH_STATS
+  #include <stdio.h>
+#endif
+
+#if CHAR_BIT * CHY_SIZEOF_PTR <= 32
+  #define PTR_BITS 32
+#else
+  #define PTR_BITS 64
+#endif
+
+#define MAX_FILL_FACTOR 0.625
+
+typedef struct PtrHashEntry {
+    void *key;
+    void *value;
+} PtrHashEntry;
+
+struct PtrHash {
+    size_t num_items;
+    size_t cap;
+    int    shift;
+    PtrHashEntry *entries;
+    PtrHashEntry *end;
+};
+
+static CFISH_INLINE size_t
+SI_find_index(void *key, int shift);
+
+static CFISH_INLINE size_t
+SI_get_cap(size_t size);
+
+static void
+S_resize(PtrHash *self);
+
+PtrHash*
+PtrHash_new(size_t min_cap) {
+    PtrHash *self   = (PtrHash*)MALLOCATE(sizeof(PtrHash));
+
+    // Use minimum size of 8.
+    // size == 2 ** (PTR_BITS - shift)
+    size_t size  = 8;
+    int    shift = PTR_BITS - 3;
+    size_t cap   = SI_get_cap(size);
+
+    while (cap < min_cap) {
+        if (size > SIZE_MAX / 2 || shift == 0) {
+            THROW(ERR, "PtrHash size overflow");
+        }
+        size  *= 2;
+        shift -= 1;
+        cap    = SI_get_cap(size);
+    }
+
+    self->num_items = 0;
+    self->cap       = cap;
+    self->shift     = shift;
+    self->entries   = (PtrHashEntry*)CALLOCATE(size, sizeof(PtrHashEntry));
+    self->end       = &self->entries[size];
+
+    return self;
+}
+
+void
+PtrHash_Destroy(PtrHash *self) {
+    FREEMEM(self->entries);
+    FREEMEM(self);
+}
+
+// Multiplicative hash function using the prime nearest to the golden ratio.
+// Reasonably good and very fast.
+static CFISH_INLINE size_t
+SI_find_index(void *key, int shift) {
+#if PTR_BITS == 32
+    uint32_t value = (uint32_t)key * 0x9E3779B1u;
+#else
+    uint64_t value = (uint64_t)key * UINT64_C(0x9E3779B97F4A7C55);
+#endif
+    return (size_t)(value >> shift);
+}
+
+static CFISH_INLINE size_t
+SI_get_cap(size_t size) {
+    return (size_t)(MAX_FILL_FACTOR * size);
+}
+
+void
+PtrHash_Store(PtrHash *self, void *key, void *value) {
+    if (key == NULL) {
+        THROW(ERR, "Can't store NULL key");
+    }
+
+    size_t index = SI_find_index(key, self->shift);
+    PtrHashEntry *entry = &self->entries[index];
+
+    while (entry->key != NULL) {
+        if (entry->key == key) {
+            entry->value = value;
+            return;
+        }
+
+        entry += 1;
+        if (entry >= self->end) { entry = self->entries; }
+    }
+
+    if (self->num_items >= self->cap) {
+        S_resize(self);
+        index = SI_find_index(key, self->shift);
+        entry = &self->entries[index];
+
+        while (entry->key != NULL) {
+            entry += 1;
+            if (entry >= self->end) { entry = self->entries; }
+        }
+    }
+
+    entry->key   = key;
+    entry->value = value;
+    self->num_items += 1;
+}
+
+void*
+PtrHash_Fetch(PtrHash *self, void *key) {
+    if (key == NULL) {
+        THROW(ERR, "Can't fetch NULL key");
+    }
+
+    size_t index = SI_find_index(key, self->shift);
+    PtrHashEntry *entry = &self->entries[index];
+
+    while (entry->key != NULL) {
+        if (entry->key == key) {
+            return entry->value;
+        }
+
+        entry += 1;
+        if (entry >= self->end) { entry = self->entries; }
+    }
+
+    return NULL;
+}
+
+static void
+S_resize(PtrHash *self) {
+    size_t old_size = self->end - self->entries;
+    if (old_size > SIZE_MAX / 2 || self->shift == 0) {
+        THROW(ERR, "PtrHash size overflow");
+    }
+    size_t size  = old_size * 2;
+    int    shift = self->shift - 1;
+
+    PtrHashEntry *entries
+        = (PtrHashEntry*)CALLOCATE(size, sizeof(PtrHashEntry));
+    PtrHashEntry *end = &entries[size];
+
+#ifdef PTRHASH_STATS
+    size_t extra_probes = 0;
+#endif
+
+    for (PtrHashEntry *old_entry = self->entries;
+         old_entry < self->end;
+         ++old_entry
+        ) {
+        void *key = old_entry->key;
+        if (key == NULL) { continue; }
+
+#ifdef PTRHASH_STATS
+        size_t i         = old_entry - self->entries;
+        size_t old_index = SI_find_index(key, self->shift);
+        extra_probes += (i - old_index) & (old_size - 1);
+#endif
+
+        size_t index = SI_find_index(key, shift);
+        PtrHashEntry *entry = &entries[index];
+
+        while (entry->key != NULL) {
+            entry += 1;
+            if (entry >= end) { entry = entries; }
+        }
+
+        entry->key   = key;
+        entry->value = old_entry->value;
+    }
+
+#ifdef PTRHASH_STATS
+    fprintf(stderr, "size: %u, avg probes: %.2f\n",
+            (unsigned)old_size,
+            (double)extra_probes / self->num_items + 1.0);
+#endif
+
+    FREEMEM(self->entries);
+
+    self->cap     = SI_get_cap(size);
+    self->shift   = shift;
+    self->entries = entries;
+    self->end     = end;
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/PtrHash.h
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/PtrHash.h b/runtime/core/Clownfish/PtrHash.h
new file mode 100644
index 0000000..4a88bc4
--- /dev/null
+++ b/runtime/core/Clownfish/PtrHash.h
@@ -0,0 +1,53 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef H_CLOWNFISH_PTRHASH
+#define H_CLOWNFISH_PTRHASH 1
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct cfish_PtrHash cfish_PtrHash;
+
+cfish_PtrHash*
+cfish_PtrHash_new(size_t min_cap);
+
+void
+CFISH_PtrHash_Destroy(cfish_PtrHash *self);
+
+void
+CFISH_PtrHash_Store(cfish_PtrHash *self, void *key, void *value);
+
+void*
+CFISH_PtrHash_Fetch(cfish_PtrHash *self, void *key);
+
+#ifdef CFISH_USE_SHORT_NAMES
+  #define PtrHash           cfish_PtrHash
+  #define PtrHash_new       cfish_PtrHash_new
+  #define PtrHash_Destroy   CFISH_PtrHash_Destroy
+  #define PtrHash_Store     CFISH_PtrHash_Store
+  #define PtrHash_Fetch     CFISH_PtrHash_Fetch
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* H_CLOWNFISH_PTRHASH */
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test.c
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/Test.c b/runtime/core/Clownfish/Test.c
index 9f03448..4453853 100644
--- a/runtime/core/Clownfish/Test.c
+++ b/runtime/core/Clownfish/Test.c
@@ -32,6 +32,7 @@
 #include "Clownfish/Test/TestLockFreeRegistry.h"
 #include "Clownfish/Test/TestNum.h"
 #include "Clownfish/Test/TestObj.h"
+#include "Clownfish/Test/TestPtrHash.h"
 #include "Clownfish/Test/TestVector.h"
 #include "Clownfish/Test/Util/TestAtomic.h"
 #include "Clownfish/Test/Util/TestMemory.h"
@@ -55,6 +56,7 @@ Test_create_test_suite() {
     TestSuite_Add_Batch(suite, (TestBatch*)TestAtomic_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestLFReg_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestMemory_new());
+    TestSuite_Add_Batch(suite, (TestBatch*)TestPtrHash_new());
 
     return suite;
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test/TestPtrHash.c
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/Test/TestPtrHash.c b/runtime/core/Clownfish/Test/TestPtrHash.c
new file mode 100644
index 0000000..3bf7003
--- /dev/null
+++ b/runtime/core/Clownfish/Test/TestPtrHash.c
@@ -0,0 +1,108 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define CFISH_USE_SHORT_NAMES
+#define TESTCFISH_USE_SHORT_NAMES
+
+#include <stdlib.h>
+#include <time.h>
+
+#include "Clownfish/Test/TestPtrHash.h"
+#include "Clownfish/Class.h"
+#include "Clownfish/PtrHash.h"
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Clownfish/TestHarness/TestUtils.h"
+#include "Clownfish/Util/Memory.h"
+
+TestPtrHash*
+TestPtrHash_new() {
+    return (TestPtrHash*)Class_Make_Obj(TESTPTRHASH);
+}
+
+static void
+test_Store_and_Fetch(TestBatchRunner *runner) {
+    PtrHash *hash = PtrHash_new(100);
+    char dummy[100];
+
+    for (int i = 0; i < 100; i++) {
+        void *key = &dummy[i];
+        PtrHash_Store(hash, key, key);
+    }
+
+    bool all_equal = true;
+    for (int i = 0; i < 100; i++) {
+        void *key = &dummy[i];
+        void *value = PtrHash_Fetch(hash, key);
+        if (value != key) {
+            all_equal = false;
+            break;
+        }
+    }
+    TEST_TRUE(runner, all_equal, "basic Store and Fetch");
+
+    TEST_TRUE(runner, PtrHash_Fetch(hash, &dummy[100]) == NULL,
+              "Fetch against non-existent key returns NULL");
+
+    PtrHash_Store(hash, &dummy[50], dummy);
+    TEST_TRUE(runner, PtrHash_Fetch(hash, &dummy[50]) == dummy,
+              "Store replaces existing value");
+
+    PtrHash_Destroy(hash);
+}
+
+static void
+test_stress(TestBatchRunner *runner) {
+    PtrHash *hash = PtrHash_new(0); // trigger multiple rebuilds.
+    size_t num_elems = 200000;
+    void **keys = (void**)MALLOCATE(num_elems * sizeof(void*));
+
+    for (size_t i = 0; i < num_elems; i++) {
+        size_t index = (size_t)(TestUtils_random_u64() % num_elems);
+        void *key = &keys[index];
+        PtrHash_Store(hash, key, key);
+        keys[i] = key;
+    }
+
+    // Overwrite for good measure.
+    for (size_t i = 0; i < num_elems; i++) {
+        void *key = keys[i];
+        PtrHash_Store(hash, key, key);
+    }
+
+    bool all_equal = true;
+    for (size_t i = 0; i < num_elems; i++) {
+        void *key = keys[i];
+        void *got = PtrHash_Fetch(hash, key);
+        if (got != key) {
+            all_equal = false;
+            break;
+        }
+    }
+    TEST_TRUE(runner, all_equal, "stress test");
+
+    FREEMEM(keys);
+    PtrHash_Destroy(hash);
+}
+
+void
+TestPtrHash_Run_IMP(TestPtrHash *self, TestBatchRunner *runner) {
+    TestBatchRunner_Plan(runner, (TestBatch*)self, 4);
+    srand((unsigned int)time(NULL));
+    test_Store_and_Fetch(runner);
+    test_stress(runner);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/core/Clownfish/Test/TestPtrHash.cfh
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/Test/TestPtrHash.cfh b/runtime/core/Clownfish/Test/TestPtrHash.cfh
new file mode 100644
index 0000000..83589cb
--- /dev/null
+++ b/runtime/core/Clownfish/Test/TestPtrHash.cfh
@@ -0,0 +1,29 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestClownfish;
+
+class Clownfish::Test::TestPtrHash
+    inherits Clownfish::TestHarness::TestBatch {
+
+    inert incremented TestPtrHash*
+    new();
+
+    void
+    Run(TestPtrHash *self, TestBatchRunner *runner);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/6c8dbb36/runtime/perl/t/core/050-ptrhash.t
----------------------------------------------------------------------
diff --git a/runtime/perl/t/core/050-ptrhash.t b/runtime/perl/t/core/050-ptrhash.t
new file mode 100644
index 0000000..5f03627
--- /dev/null
+++ b/runtime/perl/t/core/050-ptrhash.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Clownfish::Test;
+my $success = Clownfish::Test::run_tests("Clownfish::Test::TestPtrHash");
+
+exit($success ? 0 : 1);
+


Mime
View raw message