lucy-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nwelln...@apache.org
Subject [lucy-commits] [2/2] git commit: refs/heads/cfish-string-prep1 - Optimize writing of terms
Date Sat, 21 Sep 2013 16:49:50 GMT
Optimize writing of terms

* Make TextTermStepper#Write_* accept either Strings or CharBufs.
* Convert LexiconWriter#Add_Term to accept an Obj.
* Convert S_write_terms_and_postings in PostingPool to use a CharBuf
  in order to avoid repeated String allocations.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/c69fb741
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/c69fb741
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/c69fb741

Branch: refs/heads/cfish-string-prep1
Commit: c69fb741a5d016455b56de8ca3890c33f55ce464
Parents: 1d7ea09
Author: Nick Wellnhofer <wellnhofer@aevum.de>
Authored: Sat Sep 21 15:25:32 2013 +0200
Committer: Nick Wellnhofer <wellnhofer@aevum.de>
Committed: Sat Sep 21 16:16:03 2013 +0200

----------------------------------------------------------------------
 core/Lucy/Index/LexiconWriter.c   |  5 +++--
 core/Lucy/Index/LexiconWriter.cfh |  2 +-
 core/Lucy/Index/PostingPool.c     | 20 ++++++++++----------
 core/Lucy/Plan/TextType.c         | 34 ++++++++++++++++++++++++----------
 core/Lucy/Plan/TextType.cfh       |  2 +-
 5 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.c b/core/Lucy/Index/LexiconWriter.c
index 5189c49..4253e67 100644
--- a/core/Lucy/Index/LexiconWriter.c
+++ b/core/Lucy/Index/LexiconWriter.c
@@ -18,6 +18,7 @@
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Index/LexiconWriter.h"
+#include "Clownfish/CharBuf.h"
 #include "Lucy/Plan/FieldType.h"
 #include "Lucy/Plan/Schema.h"
 #include "Lucy/Index/PolyReader.h"
@@ -104,7 +105,7 @@ S_add_last_term_to_ix(LexiconWriter *self) {
 }
 
 void
-LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo* tinfo) {
+LexWriter_Add_Term_IMP(LexiconWriter* self, Obj* term_text, TermInfo* tinfo) {
     LexiconWriterIVARS *const ivars = LexWriter_IVARS(self);
     OutStream *dat_out = ivars->dat_out;
 
@@ -115,7 +116,7 @@ LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo*
tinfo)
         S_add_last_term_to_ix(self);
     }
 
-    TermStepper_Write_Delta(ivars->term_stepper, dat_out, (Obj*)term_text);
+    TermStepper_Write_Delta(ivars->term_stepper, dat_out, term_text);
     TermStepper_Write_Delta(ivars->tinfo_stepper, dat_out, (Obj*)tinfo);
 
     // Track number of terms.

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.cfh b/core/Lucy/Index/LexiconWriter.cfh
index 340e271..867120c 100644
--- a/core/Lucy/Index/LexiconWriter.cfh
+++ b/core/Lucy/Index/LexiconWriter.cfh
@@ -72,7 +72,7 @@ class Lucy::Index::LexiconWriter cnick LexWriter
      * field number).
      */
     void
-    Add_Term(LexiconWriter* self, String* term_text, TermInfo* tinfo);
+    Add_Term(LexiconWriter* self, Obj* term_text, TermInfo* tinfo);
 
     public void
     Add_Segment(LexiconWriter *self, SegReader *reader,

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/PostingPool.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/PostingPool.c b/core/Lucy/Index/PostingPool.c
index a969fd5..a3e61c0 100644
--- a/core/Lucy/Index/PostingPool.c
+++ b/core/Lucy/Index/PostingPool.c
@@ -22,6 +22,7 @@
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Index/PostingPool.h"
+#include "Clownfish/CharBuf.h"
 #include "Lucy/Analysis/Inversion.h"
 #include "Lucy/Plan/Architecture.h"
 #include "Lucy/Plan/FieldType.h"
@@ -377,10 +378,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                               (*(RawPosting**)PostPool_Fetch(self)),
                               RAWPOSTING);
     RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
-    String *last_term_text
-        = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
-    const char *last_text_buf  = Str_Get_Ptr8(last_term_text);
-    uint32_t    last_text_size = Str_Get_Size(last_term_text);
+    CharBuf *last_term_text
+        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
+    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
+    uint32_t    last_text_size = CB_Get_Size(last_term_text);
     SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);
 
     // Initialize sentinel to be used on the last iter, using an empty string
@@ -413,7 +414,7 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
         // If the term text changes, process the last term.
         if (!same_text_as_last) {
             // Hand off to LexiconWriter.
-            LexWriter_Add_Term(lex_writer, last_term_text, tinfo);
+            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);
 
             // Start each term afresh.
             TInfo_Reset(tinfo);
@@ -426,11 +427,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
             last_skip_filepos     = tinfo_ivars->post_filepos;
 
             // Remember the term_text so we can write string diffs.
-            DECREF(last_term_text);
-            last_term_text
-                = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
-            last_text_buf  = Str_Get_Ptr8(last_term_text);
-            last_text_size = Str_Get_Size(last_term_text);
+            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
+                          post_ivars->content_len);
+            last_text_buf  = CB_Get_Ptr8(last_term_text);
+            last_text_size = CB_Get_Size(last_term_text);
         }
 
         // Bail on last iter before writing invalid posting data.

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c
index 487317c..675b51a 100644
--- a/core/Lucy/Plan/TextType.c
+++ b/core/Lucy/Plan/TextType.c
@@ -93,11 +93,12 @@ void
 TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self,
                                     OutStream *outstream, Obj *value) {
     TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
-    const char *buf  = Str_Get_Ptr8((String*)value);
-    size_t      size = Str_Get_Size((String*)value);
+    CharBuf *charbuf = (CharBuf*)ivars->value;
+    CB_Mimic(charbuf, value);
+    const char *buf  = CB_Get_Ptr8(charbuf);
+    size_t      size = CB_Get_Size(charbuf);
     OutStream_Write_C32(outstream, size);
     OutStream_Write_Bytes(outstream, buf, size);
-    Obj_Mimic(ivars->value, value);
     // Invalidate string.
     DECREF(ivars->string);
     ivars->string = NULL;
@@ -107,12 +108,25 @@ void
 TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
                                 Obj *value) {
     TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
-    String     *new_value  = (String*)CERTIFY(value, STRING);
-    CharBuf    *last_value = (CharBuf*)ivars->value;
-    const char *new_text   = Str_Get_Ptr8(new_value);
-    size_t      new_size   = Str_Get_Size(new_value);
-    const char *last_text  = CB_Get_Ptr8(last_value);
-    size_t      last_size  = CB_Get_Size(last_value);
+    CharBuf    *charbuf   = (CharBuf*)ivars->value;
+    const char *last_text = CB_Get_Ptr8(charbuf);
+    size_t      last_size = CB_Get_Size(charbuf);
+    const char *new_text  = NULL;
+    size_t      new_size  = 0;
+
+    if (Obj_Is_A(value, STRING)) {
+        String *new_string = (String*)value;
+        new_text = Str_Get_Ptr8(new_string);
+        new_size = Str_Get_Size(new_string);
+    }
+    else if (Obj_Is_A(value, CHARBUF)) {
+        CharBuf *new_charbuf = (CharBuf*)value;
+        new_text = CB_Get_Ptr8(new_charbuf);
+        new_size = CB_Get_Size(new_charbuf);
+    }
+    else {
+        THROW(ERR, "'value' must be a String or CharBuf");
+    }
 
     // Count how many bytes the strings share at the top.
     const int32_t overlap = StrHelp_overlap(last_text, new_text,
@@ -125,7 +139,7 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
     OutStream_Write_String(outstream, diff_start_str, diff_len);
 
     // Update value.
-    Obj_Mimic(ivars->value, value);
+    CB_Mimic_Utf8(charbuf, new_text, new_size);
 
     // Invalidate string.
     DECREF(ivars->string);

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.cfh b/core/Lucy/Plan/TextType.cfh
index 33b70ec..b2bf014 100644
--- a/core/Lucy/Plan/TextType.cfh
+++ b/core/Lucy/Plan/TextType.cfh
@@ -42,7 +42,7 @@ class Lucy::Index::TermStepper::TextTermStepper
     Reset(TextTermStepper *self);
 
     /**
-     * @param value A String.
+     * @param value A String or CharBuf.
      */
     public void
     Set_Value(TextTermStepper *self, Obj *value = NULL);


Mime
View raw message