lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r735517 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/FuzzyQuery.java src/test/org/apache/lucene/search/TestFuzzyQuery.java
Date Sun, 18 Jan 2009 18:24:14 GMT
Author: markrmiller
Date: Sun Jan 18 10:24:14 2009
New Revision: 735517

URL: http://svn.apache.org/viewvc?rev=735517&view=rev
Log:
LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length is small compared to
minSimilarity.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestFuzzyQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=735517&r1=735516&r2=735517&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Jan 18 10:24:14 2009
@@ -1,4 +1,4 @@
-Lucene Change Log
+Lucene Change Log
 $Id$
 
 ======================= Trunk (not yet released) =======================
@@ -141,6 +141,9 @@
  3. LUCENE-1484: Remove synchronization of IndexReader.document() by
     using CloseableThreadLocal internally.  (Jason Rutherglen via Mike
     McCandless).
+    
+ 4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length 
+    is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
 
 Documentation
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=735517&r1=735516&r2=735517&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FuzzyQuery.java Sun Jan 18 10:24:14
2009
@@ -34,6 +34,7 @@
   
   private float minimumSimilarity;
   private int prefixLength;
+  private boolean termLongEnough = false;
   
   /**
    * Create a new FuzzyQuery that will match terms with a similarity 
@@ -61,6 +62,10 @@
     if (prefixLength < 0)
       throw new IllegalArgumentException("prefixLength < 0");
     
+    if (term.text().length() > 1.0f / (1.0f - minimumSimilarity)) {
+      this.termLongEnough = true;
+    }
+    
     this.minimumSimilarity = minimumSimilarity;
     this.prefixLength = prefixLength;
   }
@@ -105,6 +110,10 @@
   }
   
   public Query rewrite(IndexReader reader) throws IOException {
+    if(!termLongEnough) {  // can't match
+      return new BooleanQuery();
+    }
+
     FilteredTermEnum enumerator = getEnum(reader);
     int maxClauseCount = BooleanQuery.getMaxClauseCount();
     ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=735517&r1=735516&r2=735517&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestFuzzyQuery.java Sun Jan 18 10:24:14
2009
@@ -249,6 +249,38 @@
     directory.close();
   }
   
+  public void testTokenLengthOpt() throws IOException {
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
+        true, IndexWriter.MaxFieldLength.LIMITED);
+    addDoc("12345678911", writer);
+    addDoc("segment", writer);
+    writer.optimize();
+    writer.close();
+    IndexSearcher searcher = new IndexSearcher(directory);
+
+    Query query;
+    // term not over 10 chars, so optimization shortcuts
+    query = new FuzzyQuery(new Term("field", "1234569"), 0.9f);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals(0, hits.length);
+
+    // 10 chars, so no optimization
+    query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals(0, hits.length);
+    
+    // over 10 chars, so no optimization
+    query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals(1, hits.length);
+
+    // over 10 chars, no match
+    query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals(0, hits.length);
+  }
+  
   private void addDoc(String text, IndexWriter writer) throws IOException {
     Document doc = new Document();
     doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED));



Mime
View raw message