lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r696056 [2/2] - in /lucene/java/trunk: ./ src/java/org/apache/lucene/queryParser/ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/queryParser/ src/test/org/apache/lucene/search/
Date Tue, 16 Sep 2008 21:03:21 GMT
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeFilter.java Tue Sep 16 14:03:21
2008
@@ -18,9 +18,16 @@
  */
 
 import java.io.IOException;
+import java.text.Collator;
+import java.util.Locale;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.RAMDirectory;
 
 /**
  * A basic 'positive' Unit test class for the RangeFilter class.
@@ -42,7 +49,7 @@
 
     public void testRangeFilterId() throws IOException {
 
-        IndexReader reader = IndexReader.open(index);
+        IndexReader reader = IndexReader.open(signedIndex.index);
 	IndexSearcher search = new IndexSearcher(reader);
 
         int medId = ((maxId - minId) / 2);
@@ -122,13 +129,96 @@
         
     }
 
+    public void testRangeFilterIdCollating() throws IOException {
+
+        IndexReader reader = IndexReader.open(signedIndex.index);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        Collator c = Collator.getInstance(Locale.ENGLISH);
+
+        int medId = ((maxId - minId) / 2);
+
+        String minIP = pad(minId);
+        String maxIP = pad(maxId);
+        String medIP = pad(medId);
+
+        int numDocs = reader.numDocs();
+
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+        Hits result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test id, bounded on both ends
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
+        assertEquals("find all", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
+        assertEquals("all but last", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
+        assertEquals("all but first", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
+        assertEquals("all but ends", numDocs-2, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
+        assertEquals("med and up", 1+ maxId-medId, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
+        assertEquals("up to med", 1+ medId-minId, result.length());
+
+        // unbounded id
+
+        result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
+        assertEquals("min and up", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
+        assertEquals("max and down", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
+        assertEquals("not min, but up", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
+        assertEquals("not max, but down", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
+        assertEquals("med and up, not max", maxId-medId, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
+        assertEquals("not min, up to med", medId-minId, result.length());
+
+        // very small sets
+
+        result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
+        assertEquals("min,min,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
+        assertEquals("med,med,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
+        assertEquals("max,max,F,F", 0, result.length());
+
+        result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
+        assertEquals("min,min,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
+        assertEquals("nul,min,F,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
+        assertEquals("max,max,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
+        assertEquals("max,nul,T,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
+        assertEquals("med,med,T,T", 1, result.length());
+    }
+
     public void testRangeFilterRand() throws IOException {
 
-        IndexReader reader = IndexReader.open(index);
+  IndexReader reader = IndexReader.open(signedIndex.index);
 	IndexSearcher search = new IndexSearcher(reader);
 
-        String minRP = pad(minR);
-        String maxRP = pad(maxR);
+        String minRP = pad(signedIndex.minR);
+        String maxRP = pad(signedIndex.maxR);
     
         int numDocs = reader.numDocs();
         
@@ -184,4 +274,106 @@
         
     }
 
+    public void testRangeFilterRandCollating() throws IOException {
+
+        // using the unsigned index because collation seems to ignore hyphens
+        IndexReader reader = IndexReader.open(unsignedIndex.index);
+        IndexSearcher search = new IndexSearcher(reader);
+
+        Collator c = Collator.getInstance(Locale.ENGLISH);
+
+        String minRP = pad(unsignedIndex.minR);
+        String maxRP = pad(unsignedIndex.maxR);
+
+        int numDocs = reader.numDocs();
+
+        assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+        Hits result;
+        Query q = new TermQuery(new Term("body","body"));
+
+        // test extremes, bounded on both ends
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
+        assertEquals("find all", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
+        assertEquals("all but biggest", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
+        assertEquals("all but smallest", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
+        assertEquals("all but extremes", numDocs-2, result.length());
+
+        // unbounded
+
+        result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
+        assertEquals("smallest and up", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
+        assertEquals("biggest and down", numDocs, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
+        assertEquals("not smallest, but up", numDocs-1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
+        assertEquals("not biggest, but down", numDocs-1, result.length());
+
+        // very small sets
+
+        result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
+        assertEquals("min,min,F,F", 0, result.length());
+        result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
+        assertEquals("max,max,F,F", 0, result.length());
+
+        result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
+        assertEquals("min,min,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
+        assertEquals("nul,min,F,T", 1, result.length());
+
+        result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
+        assertEquals("max,max,T,T", 1, result.length());
+        result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
+        assertEquals("max,nul,T,T", 1, result.length());
+    }
+    
+    public void testFarsi() throws Exception {
+            
+        /* build an index */
+        RAMDirectory farsiIndex = new RAMDirectory();
+        IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, 
+                                             IndexWriter.MaxFieldLength.LIMITED);
+        Document doc = new Document();
+        doc.add(new Field("content","\u0633\u0627\u0628", 
+                          Field.Store.YES, Field.Index.UN_TOKENIZED));
+        doc.add(new Field("body", "body",
+                          Field.Store.YES, Field.Index.UN_TOKENIZED));
+        writer.addDocument(doc);
+            
+        writer.optimize();
+        writer.close();
+
+        IndexReader reader = IndexReader.open(farsiIndex);
+        IndexSearcher search = new IndexSearcher(reader);
+        Query q = new TermQuery(new Term("body","body"));
+
+        // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+        // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+        // characters properly.
+        Collator collator = Collator.getInstance(new Locale("ar"));
+        
+        // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+        // orders the U+0698 character before the U+0633 character, so the single
+        // index Term below should NOT be returned by a RangeFilter with a Farsi
+        // Collator (or an Arabic one for the case when Farsi is not supported).
+        Hits result = search.search
+            (q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
+        assertEquals("The index Term should not be included.", 0, result.length());
+
+        result = search.search
+            (q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
+        assertEquals("The index Term should be included.", 1, result.length());
+        search.close();
+    }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java?rev=696056&r1=696055&r2=696056&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestRangeQuery.java Tue Sep 16 14:03:21
2008
@@ -26,6 +26,8 @@
 
 import org.apache.lucene.util.LuceneTestCase;
 import java.io.IOException;
+import java.util.Locale;
+import java.text.Collator;
 
 
 public class TestRangeQuery extends LuceneTestCase {
@@ -130,6 +132,78 @@
     assertFalse("queries with different inclusive are not equal", query.equals(other));
   }
 
+  public void testExclusiveCollating() throws Exception {
+    Query query = new RangeQuery(new Term("content", "A"),
+                                 new Term("content", "C"),
+                                 false, Collator.getInstance(Locale.ENGLISH));
+    initializeIndex(new String[] {"A", "B", "C", "D"});
+    IndexSearcher searcher = new IndexSearcher(dir);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("A,B,C,D, only B in range", 1, hits.length);
+    searcher.close();
+
+    initializeIndex(new String[] {"A", "B", "D"});
+    searcher = new IndexSearcher(dir);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("A,B,D, only B in range", 1, hits.length);
+    searcher.close();
+
+    addDoc("C");
+    searcher = new IndexSearcher(dir);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("C added, still only B in range", 1, hits.length);
+    searcher.close();
+  }
+
+  public void testInclusiveCollating() throws Exception {
+    Query query = new RangeQuery(new Term("content", "A"),
+                                 new Term("content", "C"),
+                                 true, Collator.getInstance(Locale.ENGLISH));
+
+    initializeIndex(new String[]{"A", "B", "C", "D"});
+    IndexSearcher searcher = new IndexSearcher(dir);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
+    searcher.close();
+
+    initializeIndex(new String[]{"A", "B", "D"});
+    searcher = new IndexSearcher(dir);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("A,B,D - A and B in range", 2, hits.length);
+    searcher.close();
+
+    addDoc("C");
+    searcher = new IndexSearcher(dir);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("C added - A, B, C in range", 3, hits.length);
+    searcher.close();
+  }
+
+  public void testFarsi() throws Exception {
+    // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+    // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
+    // characters properly.
+    Collator collator = Collator.getInstance(new Locale("ar"));
+    Query query = new RangeQuery(new Term("content", "\u062F"),
+                                 new Term("content", "\u0698"),
+                                 true, collator);
+    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+    // orders the U+0698 character before the U+0633 character, so the single
+    // index Term below should NOT be returned by a RangeQuery with a Farsi
+    // Collator (or an Arabic one for the case when Farsi is not supported).
+    initializeIndex(new String[]{ "\u0633\u0627\u0628"});
+    IndexSearcher searcher = new IndexSearcher(dir);
+    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should not be included.", 0, hits.length);
+
+    query = new RangeQuery(new Term("content", "\u0633"),
+                           new Term("content", "\u0638"),
+                           true, collator);
+    hits = searcher.search(query, null, 1000).scoreDocs;
+    assertEquals("The index Term should be included.", 1, hits.length);
+    searcher.close();
+  }
+
   private void initializeIndex(String[] values) throws IOException {
     IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
     for (int i = 0; i < values.length; i++) {
@@ -154,6 +228,3 @@
     docCount++;
   }
 }
-
-
-



Mime
View raw message