lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [lucy-commits] [5/6] git commit: refs/heads/sortfieldwriter - Use counting sort to sort doc_ids in SortFieldWriter#Refill
Date Sun, 13 Oct 2013 13:59:08 GMT
Use counting sort to sort doc_ids in SortFieldWriter#Refill

Since we already have the ordinals for each doc_id, we can use a
counting sort. This uses a temporary array of size run_cardinality but
runs in linear time.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/ad178f10
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/ad178f10
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/ad178f10

Branch: refs/heads/sortfieldwriter
Commit: ad178f10692659b4ed8b170ebfa42d13fd3eed20
Parents: 98a960e
Author: Nick Wellnhofer <wellnhofer@aevum.de>
Authored: Thu Sep 26 19:43:42 2013 +0200
Committer: Nick Wellnhofer <wellnhofer@aevum.de>
Committed: Sun Oct 13 15:10:30 2013 +0200

----------------------------------------------------------------------
 core/Lucy/Index/SortFieldWriter.c | 53 +++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/ad178f10/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 4ab3ff0..efd7427 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -342,30 +342,43 @@ SortFieldWriter_Compare_IMP(SortFieldWriter *self, void *va, void *vb)
{
     return comparison;
 }
 
-static int
-S_compare_doc_ids_by_ord_rev(void *context, const void *va, const void *vb) {
-    SortCache *sort_cache = (SortCache*)context;
-    int32_t a = *(int32_t*)va;
-    int32_t b = *(int32_t*)vb;
-    int32_t ord_a = SortCache_Ordinal(sort_cache, a);
-    int32_t ord_b = SortCache_Ordinal(sort_cache, b);
-    int32_t comparison = ord_a - ord_b;
-    if (comparison == 0) { comparison = a - b; }
-    return comparison;
-}
-
 static void
 S_lazy_init_sorted_ids(SortFieldWriter *self) {
     SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
-    if (!ivars->sorted_ids) {
-        ivars->sorted_ids
-            = (int32_t*)MALLOCATE((ivars->run_max + 1) * sizeof(int32_t));
-        for (int32_t i = 0, max = ivars->run_max; i <= max; i++) {
-            ivars->sorted_ids[i] = i;
-        }
-        Sort_quicksort(ivars->sorted_ids + 1, ivars->run_max, sizeof(int32_t),
-                       S_compare_doc_ids_by_ord_rev, ivars->sort_cache);
+    if (ivars->sorted_ids) { return; }
+
+    // Counting sort. Could be optimized by working directly on the
+    // ordinal arrays.
+
+    SortCache *sort_cache      = ivars->sort_cache;
+    int32_t    run_cardinality = ivars->run_cardinality;
+    int32_t    run_max         = ivars->run_max;
+
+    // Count.
+    int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t));
+    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+        ++counts[ord];
+    }
+
+    // Compute partial sums.
+    int32_t sum = 0;
+    for (int32_t ord = 0; ord < run_cardinality; ++ord) {
+        int32_t count = counts[ord];
+        counts[ord] = sum;
+        sum += count;
     }
+
+    // Distribute.
+    int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t));
+    for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+        int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+        int32_t pos = counts[ord]++;
+        sorted_ids[pos] = doc_id;
+    }
+
+    ivars->sorted_ids = sorted_ids;
+    FREEMEM(counts);
 }
 
 void


Mime
View raw message