lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Jack Krupansky" <j...@basetechnology.com>
Subject Re: translating a character code to an ordinal?
Date Fri, 07 Jun 2013 23:15:22 GMT
This won't help you unless you move to Solr 4.0, but here's an update 
processor script from the book that can take the first character of a string 
field and add it as an integer value for another field:

  <updateRequestProcessorChain name="script-add-char-code">
    <processor class="solr.StatelessScriptUpdateProcessorFactory">
      <str name="script">add-char-code.js</str>
      <lst name="params">
        <str name="fieldName">content</str>
        <str name="codeFieldName">content_code_i</str>
      </lst>
    </processor>
    <processor class="solr.LogUpdateProcessorFactory" />
    <processor class="solr.RunUpdateProcessorFactory" />
  </updateRequestProcessorChain>

Here is the JavaScript script that should be placed in the 
"add-char-code.js" file in the "conf" directory for
the Solr collection:

  function processAdd(cmd) {
    var fieldName;
    var codeFieldName;
    if (typeof params !== "undefined") {
      fieldName = params.get("fieldName");
      codeFieldName = params.get("codeFieldName");
    }
    if (fieldName == null)
      fieldName = "content";
    if (codeFieldName == null)
      codeFieldName = "content_code_i";

    // Get value for named field, no-op if empty
    var value = cmd.getSolrInputDocument().getField(fieldName);
    if (value != null){
      var str = value.getFirstValue();

      // No-op if string is empty
      if (str != null && str.length() != 0){
        // Get code for first character
        var code = str.charCodeAt(0);
        logger.info("String: \"" + str + "\" len: " + str.length() + " code: 
" + code);

        // Set the character code output field value
        cmd.getSolrInputDocument().addField(codeFieldName, code);
      }
    }
  }

  function processDelete() {
    // Dummy - add if needed
  }

  function processCommit() {
    // Dummy - add if needed
  }

  function processRollback() {
    // Dummy - add if needed
  }

  function processMergeIndexes() {
    // Dummy - add if needed
  }

  function finish() {
    // Dummy - add if needed
  }

Test it:

  curl 
"http://localhost:8983/solr/update?commit=true&update.chain=script-add-char-code" 
\
  -H 'Content-type:application/json' -d '
  [{"id": "doc-1", "content": "abc"},
   {"id": "doc-2", "content": "1"},
   {"id": "doc-3", "content": ""},
   {"id": "doc-4"},
   {"id": "doc-5", "content": "\u0002 abc"},
   {"id": "doc-6", "content": ["And, this", "is the end", "of this 
test."]}]'

Results:

  "id":"doc-1",
  "content":["abc"],
  "content_code_i":97,

  "id":"doc-2",
  "content":["1"],
  "content_code_i":49,

  "id":"doc-3",
  "content":[""],

  "id":"doc-4",

  "id":"doc-5",
  "content":["\u0002 abc"],
  "content_code_i":2,

  "id":"doc-6",
  "content":["And, this",
    "is the end",
    "of this test."],
  "content_code_i":65,

-- Jack Krupansky

-----Original Message----- 
From: geeky2
Sent: Friday, June 07, 2013 6:27 PM
To: solr-user@lucene.apache.org
Subject: translating a character code to an ordinal?

hello all,

environment: solr 3.5, centos

problem statement:  i have several character codes that i want to translate
to ordinal (integer) values (for sorting), while retaining the original code
field in the document.

i was thinking that i could use a copyField from my "code" field to my "ord"
field - then employ a pattern replace filter factory during indexing.

but won't the copyfield fail because the two field types are different?

ps: i also read the wiki about
http://wiki.apache.org/solr/DataImportHandler#Transformer the script
transformer and regex transformer - but was hoping to avoid this - if i
could.




thx
mark




--
View this message in context: 
http://lucene.472066.n3.nabble.com/translating-a-character-code-to-an-ordinal-tp4068966.html
Sent from the Solr - User mailing list archive at Nabble.com. 


Mime
View raw message