lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r713962 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/FieldsReader.java src/test/org/apache/lucene/index/TestIndexReader.java
Date Fri, 14 Nov 2008 10:02:45 GMT
Author: mikemccand
Date: Fri Nov 14 02:02:44 2008
New Revision: 713962

URL: http://svn.apache.org/viewvc?rev=713962&view=rev
Log:
LUCENE-1452: fixes cases during merge and lazy field access where binary field is truncated
to 0

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=713962&r1=713961&r2=713962&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Nov 14 02:02:44 2008
@@ -27,6 +27,12 @@
    implementation - Leads to Solr Cache misses. 
    (Todd Feak, Mark Miller via yonik)
 
+2. LUCENE-1452: Fixed silent data-loss case whereby binary fields are
+   truncated to 0 bytes during merging if the segments being merged
+   are non-congruent (same field name maps to different field
+   numbers).  This bug was introduced with LUCENE-1219.  (Andrzej
+   Bialecki via Mike McCandless).
+
 New features
 
  1. LUCENE-1411: Added expert API to open an IndexWriter on a prior

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java?rev=713962&r1=713961&r2=713962&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FieldsReader.java Fri Nov 14 02:02:44
2008
@@ -423,6 +423,8 @@
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      if (isBinary)
+        binaryLength = toRead;
       lazy = true;
     }
 
@@ -431,6 +433,8 @@
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      if (isBinary)
+        binaryLength = toRead;
       lazy = true;
     }
 
@@ -619,6 +623,9 @@
       this.fieldsData = value;
       this.isCompressed = compressed;
       this.isBinary = binary;
+      if (binary)
+        binaryLength = ((byte[]) value).length;
+
       this.isTokenized = tokenize;
 
       this.name = fi.name.intern();

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java?rev=713962&r1=713961&r2=713962&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexReader.java Fri Nov 14 02:02:44
2008
@@ -26,6 +26,7 @@
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
+import java.util.HashSet;
 
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
@@ -34,6 +35,9 @@
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.index.IndexReader.FieldOption;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
@@ -289,6 +293,96 @@
         reader2.close();
         dir.close();
     }
+    
+    public void testBinaryFields() throws IOException
+    {
+        Directory dir = new RAMDirectory();
+        byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+        
+        IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+        
+        for (int i = 0; i < 10; i++) {
+          addDoc(writer, "document number " + (i + 1));
+          addDocumentWithFields(writer);
+          addDocumentWithDifferentFields(writer);
+          addDocumentWithTermVectorFields(writer);
+        }
+        writer.close();
+        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
+        Document doc = new Document();
+        doc.add(new Field("bin1", bin, Field.Store.YES));
+        doc.add(new Field("bin2", bin, Field.Store.COMPRESS));
+        doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
+        writer.addDocument(doc);
+        writer.close();
+        IndexReader reader = IndexReader.open(dir);
+        doc = reader.document(reader.maxDoc() - 1);
+        Field[] fields = doc.getFields("bin1");
+        assertNotNull(fields);
+        assertEquals(1, fields.length);
+        Field b1 = fields[0];
+        assertTrue(b1.isBinary());
+        byte[] data1 = b1.getBinaryValue();
+        assertEquals(bin.length, b1.getBinaryLength());
+        for (int i = 0; i < bin.length; i++) {
+          assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
+        }
+        fields = doc.getFields("bin2");
+        assertNotNull(fields);
+        assertEquals(1, fields.length);
+        b1 = fields[0];
+        assertTrue(b1.isBinary());
+        data1 = b1.getBinaryValue();
+        assertEquals(bin.length, b1.getBinaryLength());
+        for (int i = 0; i < bin.length; i++) {
+          assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
+        }
+        Set lazyFields = new HashSet();
+        lazyFields.add("bin1");
+        FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
+        doc = reader.document(reader.maxDoc() - 1, sel);
+        Fieldable[] fieldables = doc.getFieldables("bin1");
+        assertNotNull(fieldables);
+        assertEquals(1, fieldables.length);
+        Fieldable fb1 = fieldables[0];
+        assertTrue(fb1.isBinary());
+        assertEquals(bin.length, fb1.getBinaryLength());
+        data1 = fb1.getBinaryValue();
+        assertEquals(bin.length, fb1.getBinaryLength());
+        for (int i = 0; i < bin.length; i++) {
+          assertEquals(bin[i], data1[i + fb1.getBinaryOffset()]);
+        }
+        reader.close();
+        // force optimize
+
+
+        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
+        writer.optimize();
+        writer.close();
+        reader = IndexReader.open(dir);
+        doc = reader.document(reader.maxDoc() - 1);
+        fields = doc.getFields("bin1");
+        assertNotNull(fields);
+        assertEquals(1, fields.length);
+        b1 = fields[0];
+        assertTrue(b1.isBinary());
+        data1 = b1.getBinaryValue();
+        assertEquals(bin.length, b1.getBinaryLength());
+        for (int i = 0; i < bin.length; i++) {
+          assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
+        }
+        fields = doc.getFields("bin2");
+        assertNotNull(fields);
+        assertEquals(1, fields.length);
+        b1 = fields[0];
+        assertTrue(b1.isBinary());
+        data1 = b1.getBinaryValue();
+        assertEquals(bin.length, b1.getBinaryLength());
+        for (int i = 0; i < bin.length; i++) {
+          assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
+        }
+        reader.close();
+    }
 
     // Make sure attempts to make changes after reader is
     // closed throws IOException:
@@ -1403,9 +1497,8 @@
         w.close();
         assertTrue(new File(indexDir, "_0.fnm").delete());
 
-        IndexReader r = null;
         try {
-          r = IndexReader.open(indexDir);
+          IndexReader.open(indexDir);
           fail("did not hit expected exception");
         } catch (AlreadyClosedException ace) {
           fail("should not have hit AlreadyClosedException");



Mime
View raw message