lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Yonik Seeley <ysee...@gmail.com>
Subject score based on field value
Date Sat, 24 Sep 2005 05:02:43 GMT
There has been a lot of interest in generating a score or boosting based on
the value of a particular field.
Here is my first prototype that can handle int and float field values.
I'm not particularly happy with the form of this solution yet, which is why
I'm throwing it out to dev to see if anyone has any good ideas about the
direction to take this.

-Yonik
Now hiring -- http://tinyurl.com/7m67g


package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;

/**
* @author yonik
*/
public class FieldValueQuery extends Query {

// have a common base class?
public interface FloatFunc {
public float getScore(float fieldVal);
}

public interface IntFunc {
public float getScore(int fieldVal);
}

public static class FloatLine implements FloatFunc {
final float slope,intercept;
public FloatLine(float slope, float intercept) {
this.slope=slope;
this.intercept=intercept;
}

public float getScore(float fieldVal) {
return fieldVal*slope + intercept;
}
}

public static class IntLine implements IntFunc {
final float slope,intercept;
public IntLine(float slope, float intercept) {
this.slope=slope;
this.intercept=intercept;
}

public float getScore(int fieldVal) {
return fieldVal*slope + intercept;
}
}


String fname;
FloatFunc ffunc;
IntFunc ifunc;
public FieldValueQuery(String field, FloatFunc func) {
this.fname = field;
this.ffunc=func;
}

public FieldValueQuery(String field, IntFunc func) {
this.fname = field;
this.ifunc=func;
}

public Query rewrite(IndexReader reader) throws IOException {
return this;
}

protected class FunctionWeight implements Weight {
private Searcher searcher;
private float queryNorm;
private float queryWeight;

public FunctionWeight(Searcher searcher) {
this.searcher = searcher;
}

public Query getQuery() {
return FieldValueQuery.this;
}

public float getValue() {
return queryWeight;
}

public float sumOfSquaredWeights() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}

public void normalize(float norm) {
this.queryNorm = norm;
queryWeight *= this.queryNorm;
}

public Scorer scorer(IndexReader reader) throws IOException {
if (ifunc != null) {
return new AllScorer(getSimilarity(searcher), reader, this) {
// hmmm, what if user needs a custom float reader?
// should the FieldCache access be encapsulated
// in the Func object instead? It kind of exposes more
// lucene internals though...
final int[] arr = FieldCache.DEFAULT.getInts(reader, fname);
public float score() throws IOException {
return ifunc.getScore(arr[doc]) * qWeight;
};
};
} else if (ffunc != null) {
return new AllScorer(getSimilarity(searcher), reader, this) {
final float[] arr = FieldCache.DEFAULT.getFloats(reader, fname);
public float score() throws IOException {
return ffunc.getScore(arr[doc]) * qWeight;
};
};
} else {
throw new RuntimeException(); // impossible
}
}

public Explanation explain(IndexReader reader, int doc) throws IOException {
return scorer(reader).explain(doc);
}
}

protected class AllScorer extends Scorer {
final IndexReader reader;
final int maxDoc;
final float qWeight;
int doc=-1;

public AllScorer(Similarity similarity, IndexReader reader, Weight w) throws
IOException {
super(similarity);
this.qWeight = w.getValue();
this.reader = reader;
this.maxDoc = reader.maxDoc();
}

// instead of matching all docs, we could also embed a query.
// the score could either ignore the subscore, or boost it.
// Containment: floatline(foo:myTerm, "myFloatField", 1.0, 0.0f)
// Boost: foo:myTerm^floatline("myFloatField",1.0,0.0f)
public boolean next() throws IOException {
for(;;) {
++doc;
if (doc>=maxDoc) {
return false;
}
if (reader.isDeleted(doc)) continue;
return true;
}
}

public int doc() {
return doc;
}

public float score() throws IOException {
return qWeight;
}

public boolean skipTo(int target) throws IOException {
doc=target-1;
return next();
}

public Explanation explain(int doc) throws IOException {
// TODO: do it right when design is settled
this.doc = doc;
float sc = score();
return new Explanation(sc,"Function " + sc/qWeight + " * queryWeight("
+qWeight + ")");
}
}


protected Weight createWeight(Searcher searcher) {
return new FieldValueQuery.FunctionWeight(searcher);
}


/** Prints a user-readable version of this query. */
public String toString(String field)
{
if (ifunc != null) return fname + ':' + ifunc;
else if (ffunc != null) return fname + ':' + ffunc;
else return fname + ':' + "unknown function";
}


/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
return this==o;
// TODO
}

/** Returns a hash code value for this object. */
public int hashCode() {
int h = Float.floatToIntBits(getBoost());
h ^= fname.hashCode();
// TODO
return h;
}


}

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message