lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [1/3] lucy-clownfish git commit: Fix for duplicate hash entries
Date Wed, 15 Apr 2015 13:24:33 GMT
Repository: lucy-clownfish
Updated Branches:
  refs/heads/master f02be68a5 -> 93f0dee1f


Fix for duplicate hash entries

This fixes a rather serious bug in the hash table implementation.
Hash_do_store would create a new entry as soon as it found a tombstone,
ignoring possibly existing entries after the tombstone. This could lead to
entries being created twice in a hash table. Fetching the entry would at
first return the new entry. But a rehash would overwrite the new entry with
the old entry. Iterating a hash would also return both entries.

Fixes CLOWNFISH-33.


Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/d75438da
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/d75438da
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/d75438da

Branch: refs/heads/master
Commit: d75438dade7c82e94ff17a38b598b1de6fa89ff3
Parents: f02be68
Author: Nick Wellnhofer <wellnhofer@aevum.de>
Authored: Tue Apr 14 17:23:58 2015 +0200
Committer: Nick Wellnhofer <wellnhofer@aevum.de>
Committed: Wed Apr 15 15:08:53 2015 +0200

----------------------------------------------------------------------
 runtime/core/Clownfish/Hash.c          | 14 ++++++------
 runtime/core/Clownfish/Test/TestHash.c | 34 ++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/d75438da/runtime/core/Clownfish/Hash.c
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/Hash.c b/runtime/core/Clownfish/Hash.c
index 9024624..7a4a450 100644
--- a/runtime/core/Clownfish/Hash.c
+++ b/runtime/core/Clownfish/Hash.c
@@ -117,6 +117,13 @@ Hash_Clear_IMP(Hash *self) {
 void
 Hash_do_store(Hash *self, Obj *key, Obj *value,
               int32_t hash_sum, bool use_this_key) {
+    HashEntry *entry = SI_fetch_entry(self, key, hash_sum);
+    if (entry) {
+        DECREF(entry->value);
+        entry->value = value;
+        return;
+    }
+
     HashEntry *entries = self->size >= self->threshold
                          ? SI_rebuild_hash(self)
                          : (HashEntry*)self->entries;
@@ -139,13 +146,6 @@ Hash_do_store(Hash *self, Obj *key, Obj *value,
             self->size++;
             break;
         }
-        else if (entry->hash_sum == hash_sum
-                 && Obj_Equals(key, entry->key)
-                ) {
-            DECREF(entry->value);
-            entry->value = value;
-            break;
-        }
         tick++; // linear scan
     }
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/d75438da/runtime/core/Clownfish/Test/TestHash.c
----------------------------------------------------------------------
diff --git a/runtime/core/Clownfish/Test/TestHash.c b/runtime/core/Clownfish/Test/TestHash.c
index 54ec399..9d14e1d 100644
--- a/runtime/core/Clownfish/Test/TestHash.c
+++ b/runtime/core/Clownfish/Test/TestHash.c
@@ -221,14 +221,46 @@ test_stress(TestBatchRunner *runner) {
     DECREF(hash);
 }
 
+static void
+test_store_skips_tombstone(TestBatchRunner *runner) {
+    Hash *hash = Hash_new(0);
+    uint32_t mask = Hash_Get_Capacity(hash) - 1;
+
+    String *one = Str_newf("one");
+    uint32_t slot = Str_Hash_Sum(one) & mask;
+
+    // Find a colliding key.
+    String *two = NULL;
+    for (int i = 0; i < 100000; i++) {
+        two = Str_newf("%i32", i);
+        if (slot == (Str_Hash_Sum(two) & mask)) {
+            break;
+        }
+        DECREF(two);
+        two = NULL;
+    }
+
+    Hash_Store(hash, (Obj*)one, (Obj*)CFISH_TRUE);
+    Hash_Store(hash, (Obj*)two, (Obj*)CFISH_TRUE);
+    Hash_Delete(hash, (Obj*)one);
+    Hash_Store(hash, (Obj*)two, (Obj*)CFISH_TRUE);
+
+    TEST_INT_EQ(runner, Hash_Get_Size(hash), 1, "Store skips tombstone");
+
+    DECREF(one);
+    DECREF(two);
+    DECREF(hash);
+}
+
 void
 TestHash_Run_IMP(TestHash *self, TestBatchRunner *runner) {
-    TestBatchRunner_Plan(runner, (TestBatch*)self, 27);
+    TestBatchRunner_Plan(runner, (TestBatch*)self, 28);
     srand((unsigned int)time((time_t*)NULL));
     test_Equals(runner);
     test_Store_and_Fetch(runner);
     test_Keys_Values_Iter(runner);
     test_stress(runner);
+    test_store_skips_tombstone(runner);
 }
 
 


Mime
View raw message