lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dougs...@apache.org
Subject svn commit: r798995 [7/35] - in /incubator/lucene.net/trunk/C#/src: Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/ Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net/...
Date Wed, 29 Jul 2009 18:04:24 GMT
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/DocumentsWriterThreadState.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/DocumentsWriterThreadState.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+    /// <summary>
+    /// Used by DocumentsWriter to maintain per-thread state.
+    /// We keep a separate Posting hash and other state for each
+    /// thread and then merge postings hashes from all threads
+    /// when writing the segment.
+    /// </summary>
+    sealed internal class DocumentsWriterThreadState
+    {
+        internal bool isIdle = true;                          // false if this is currently in use by a thread
+        internal int numThreads = 1;                             // Number of threads that share this instance
+        internal bool doFlushAfter;                           // true if we should flush after processing current doc
+        internal readonly DocConsumerPerThread consumer;
+        internal readonly DocumentsWriter.DocState docState;
+
+        internal readonly DocumentsWriter docWriter;
+
+        public DocumentsWriterThreadState(DocumentsWriter docWriter)
+        {
+            this.docWriter = docWriter;
+            docState = new DocumentsWriter.DocState();
+            docState.maxFieldLength = docWriter.maxFieldLength;
+            docState.infoStream = docWriter.infoStream;
+            docState.similarity = docWriter.similarity;
+            docState.docWriter = docWriter;
+            consumer = docWriter.consumer.addThread(this);
+        }
+
+        internal void doAfterFlush()
+        {
+            numThreads = 0;
+            doFlushAfter = false;
+        }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfo.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfo.cs Wed Jul 29 18:04:12 2009
@@ -32,15 +32,16 @@
 		public bool storePositionWithTermVector;
 		
 		public bool omitNorms; // omit norms associated with indexed fields
-		
-		public bool IsIndexed()
-		{
-			return isIndexed;
-		}
-		
+        internal bool omitTf;
+
+        public bool omitTf_ForNUnitTest
+        {
+            get { return omitTf; }
+        }
+
 		internal bool storePayloads; // whether this field stores payloads together with term positions
 		
-		internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
+		internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
 		{
 			name = na;
 			isIndexed = tk;
@@ -50,15 +51,81 @@
 			this.storePositionWithTermVector = storePositionWithTermVector;
 			this.omitNorms = omitNorms;
 			this.storePayloads = storePayloads;
+            this.omitTf = omitTf;
 		}
 		
-		public System.Object Clone()
+		public object Clone()
 		{
-			return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+			return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
 		}
 
+        internal void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector,
+                    bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
+        {
+            if (this.isIndexed != isIndexed)
+            {
+                this.isIndexed = true;                      // once indexed, always index
+            }
+            if (this.storeTermVector != storeTermVector)
+            {
+                this.storeTermVector = true;                // once vector, always vector
+            }
+            if (this.storePositionWithTermVector != storePositionWithTermVector)
+            {
+                this.storePositionWithTermVector = true;                // once vector, always vector
+            }
+            if (this.storeOffsetWithTermVector != storeOffsetWithTermVector)
+            {
+                this.storeOffsetWithTermVector = true;                // once vector, always vector
+            }
+            if (this.omitNorms != omitNorms)
+            {
+                this.omitNorms = false;                // once norms are stored, always store
+            }
+            if (this.omitTf != omitTf)
+            {
+                this.omitTf = true;                // if one require omitTf at least once, it remains off for life
+            }
+            if (this.storePayloads != storePayloads)
+            {
+                this.storePayloads = true;
+            }
+        }
+
+        internal void update(FieldInfo other)
+        {
+            if (isIndexed != other.isIndexed)
+            {
+                isIndexed = true;                      // once indexed, always index
+            }
+            if (storeTermVector != other.storeTermVector)
+            {
+                storeTermVector = true;                // once vector, always vector
+            }
+            if (storePositionWithTermVector != other.storePositionWithTermVector)
+            {
+                storePositionWithTermVector = true;                // once vector, always vector
+            }
+            if (storeOffsetWithTermVector != other.storeOffsetWithTermVector)
+            {
+                storeOffsetWithTermVector = true;                // once vector, always vector
+            }
+            if (omitNorms != other.omitNorms)
+            {
+                omitNorms = false;                // once norms are stored, always store
+            }
+            if (omitTf != other.omitTf)
+            {
+                omitTf = true;                // if one require omitTf at least once, it remains off for life
+            }
+            if (storePayloads != other.storePayloads)
+            {
+                storePayloads = true;
+            }
+        }
+
         // For testing only
-        public System.String Name_ForNUnitTest
+        public string Name_ForNUnitTest
         {
             get { return name; }
         }
@@ -68,5 +135,10 @@
         {
             get { return storePayloads; }
         }
+
+        public bool IsIndexed_ForNUnitTest()
+        {
+            return isIndexed;
+        }
     }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldInfos.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldInfos.cs Wed Jul 29 18:04:12 2009
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-using System;
+using System.Collections.Generic;
 
 using Document = Lucene.Net.Documents.Document;
 using Fieldable = Lucene.Net.Documents.Fieldable;
@@ -27,7 +27,7 @@
 {
 	
 	/// <summary>Access to the Fieldable Info file that describes document fields and whether or
-	/// not they are indexed. Each segment has a separate Fieldable Info file. Objects
+	/// not they are indexed. Each segment has a separate Fieldable Info file. objects
 	/// of this class are thread-safe for multiple readers, but only one thread can
 	/// be adding documents at a time, with no other reader or writer threads
 	/// accessing this object.
@@ -40,10 +40,11 @@
 		internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x4);
 		internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x8);
 		internal const byte OMIT_NORMS = (byte) (0x10);
-		internal const byte STORE_PAYLOADS = (byte) (0x20);
-		
-		private System.Collections.ArrayList byNumber = new System.Collections.ArrayList();
-		private System.Collections.Hashtable byName = new System.Collections.Hashtable();
+        internal const byte STORE_PAYLOADS = (byte)(0x20);
+        internal const byte OMIT_TF = (byte)(0x40);
+
+        private List<FieldInfo> byNumber = new List<FieldInfo>();
+        private Dictionary<string, FieldInfo> byName = new Dictionary<string, FieldInfo>();
 		
 		public FieldInfos()
 		{
@@ -57,7 +58,7 @@
 		/// <param name="name">The name of the file to open the IndexInput from in the Directory
 		/// </param>
 		/// <throws>  IOException </throws>
-		public FieldInfos(Directory d, System.String name)
+		public FieldInfos(Directory d, string name)
 		{
 			IndexInput input = d.OpenInput(name);
 			try
@@ -71,30 +72,49 @@
 		}
 		
 		/// <summary> Returns a deep clone of this FieldInfos instance.</summary>
-		public System.Object Clone()
+		public object Clone()
 		{
-			FieldInfos fis = new FieldInfos();
-			int numField = byNumber.Count;
-			for (int i = 0; i < numField; i++)
-			{
-				FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber[i]).Clone();
-				fis.byNumber.Add(fi);
-				fis.byName[fi.name] = fi;
-			}
-			return fis;
+            lock (this)
+            {
+                FieldInfos fis = new FieldInfos();
+                int numField = byNumber.Count;
+                for (int i = 0; i < numField; i++)
+                {
+                    FieldInfo fi = (FieldInfo)(byNumber[i].Clone());
+                    fis.byNumber.Add(fi);
+                    fis.byName[fi.name] = fi;
+                }
+                return fis;
+            }
 		}
 		
 		/// <summary>Adds field info for a Document. </summary>
 		public void  Add(Document doc)
 		{
-			System.Collections.IList fields = doc.GetFields();
-			System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
-			while (fieldIterator.MoveNext())
-			{
-				Fieldable field = (Fieldable) fieldIterator.Current;
-				Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
-			}
+            lock (this)
+            {
+                System.Collections.IList fields = doc.GetFields();
+                System.Collections.IEnumerator fieldIterator = fields.GetEnumerator();
+                while (fieldIterator.MoveNext())
+                {
+                    Fieldable field = (Fieldable)fieldIterator.Current;
+                    Add(field.Name(), field.IsIndexed(), field.IsTermVectorStored(), field.IsStorePositionWithTermVector(), field.IsStoreOffsetWithTermVector(), field.GetOmitNorms());
+                }
+            }
 		}
+
+        /// <summary>
+        /// Returns true if any fields *do not* omit tf.
+        /// </summary>
+        /// <returns></returns>
+        internal bool HasProx()
+        {
+            int numFields = byNumber.Count;
+            for (int i = 0; i < numFields; i++)
+                if (!FieldInfo(i).omitTf)
+                    return true;
+            return false;
+        }
 		
 		/// <summary> Add fields that are indexed. Whether they have termvectors has to be specified.
 		/// 
@@ -109,12 +129,15 @@
 		/// </param>
 		public void  AddIndexed(System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
 		{
-			System.Collections.IEnumerator i = names.GetEnumerator();
-			while (i.MoveNext())
-			{
-				System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry) i.Current;
-				Add((System.String) t.Key, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
-			}
+            lock (this)
+            {
+                System.Collections.IEnumerator i = names.GetEnumerator();
+                while (i.MoveNext())
+                {
+                    System.Collections.DictionaryEntry t = (System.Collections.DictionaryEntry)i.Current;
+                    Add((string)t.Key, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+                }
+            }
 		}
 		
 		/// <summary> Assumes the fields are not storing term vectors.
@@ -125,15 +148,18 @@
 		/// <param name="isIndexed">Whether the fields are indexed or not
 		/// 
 		/// </param>
-		/// <seealso cref="Add(String, boolean)">
+		/// <seealso cref="Add(string, boolean)">
 		/// </seealso>
-		public void  Add(System.Collections.ICollection names, bool isIndexed)
+		public void  Add(ICollection<string> names, bool isIndexed)
 		{
-			System.Collections.IEnumerator i = names.GetEnumerator();
-			while (i.MoveNext())
-			{
-				Add((System.String) i.Current, isIndexed);
-			}
+            lock (this)
+            {
+                IEnumerator<string> i = names.GetEnumerator();
+                while (i.MoveNext())
+                {
+                    Add(i.Current, isIndexed);
+                }
+            }
 		}
 		
 		/// <summary> Calls 5 parameter add with false for all TermVector parameters.
@@ -143,11 +169,14 @@
 		/// </param>
 		/// <param name="isIndexed">true if the field is indexed
 		/// </param>
-		/// <seealso cref="Add(String, boolean, boolean, boolean, boolean)">
+		/// <seealso cref="Add(string, boolean, boolean, boolean, boolean)">
 		/// </seealso>
-		public void  Add(System.String name, bool isIndexed)
+		public void  Add(string name, bool isIndexed)
 		{
-			Add(name, isIndexed, false, false, false, false);
+            lock (this)
+            {
+                Add(name, isIndexed, false, false, false, false);
+            }
 		}
 		
 		/// <summary> Calls 5 parameter add with false for term vector positions and offsets.
@@ -159,9 +188,12 @@
 		/// </param>
 		/// <param name="storeTermVector">true if the term vector should be stored
 		/// </param>
-		public void  Add(System.String name, bool isIndexed, bool storeTermVector)
+		public void  Add(string name, bool isIndexed, bool storeTermVector)
 		{
-			Add(name, isIndexed, storeTermVector, false, false, false);
+            lock (this)
+            {
+                Add(name, isIndexed, storeTermVector, false, false, false);
+            }
 		}
 		
 		/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -180,10 +212,12 @@
 		/// </param>
 		/// <param name="storeOffsetWithTermVector">true if the term vector with offsets should be stored
 		/// </param>
-		public void  Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
+		public void  Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
 		{
-			
-			Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+            lock (this)
+            {
+                Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+            }
 		}
 		
 		/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -204,9 +238,12 @@
 		/// </param>
 		/// <param name="omitNorms">true if the norms for the indexed field should be omitted
 		/// </param>
-		public void  Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
+		public void  Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms)
 		{
-			Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false);
+            lock (this)
+            {
+                Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false);
+            }
 		}
 		
 		/// <summary>If the field is not yet known, adds it. If it is known, checks to make
@@ -229,60 +266,57 @@
 		/// </param>
 		/// <param name="storePayloads">true if payloads should be stored for this field
 		/// </param>
-		public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
-		{
-			FieldInfo fi = FieldInfo(name);
-			if (fi == null)
-			{
-				return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
-			}
-			else
-			{
-				if (fi.isIndexed != isIndexed)
-				{
-					fi.isIndexed = true; // once indexed, always index
-				}
-				if (fi.storeTermVector != storeTermVector)
-				{
-					fi.storeTermVector = true; // once vector, always vector
-				}
-				if (fi.storePositionWithTermVector != storePositionWithTermVector)
-				{
-					fi.storePositionWithTermVector = true; // once vector, always vector
-				}
-				if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector)
-				{
-					fi.storeOffsetWithTermVector = true; // once vector, always vector
-				}
-				if (fi.omitNorms != omitNorms)
-				{
-					fi.omitNorms = false; // once norms are stored, always store
-				}
-				if (fi.storePayloads != storePayloads)
-				{
-					fi.storePayloads = true;
-				}
-			}
-			return fi;
-		}
-		
-		private FieldInfo AddInternal(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads)
+        public FieldInfo Add(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
+        {
+            lock (this)
+            {
+                FieldInfo fi = FieldInfo(name);
+                if (fi == null)
+                {
+                    return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
+                }
+                else
+                {
+                    fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
+                }
+                return fi;
+            }
+        }
+		
+        public FieldInfo Add(FieldInfo fieldInfo)
+        {
+            lock (this)
+            {
+                FieldInfo fi = FieldInfo(fieldInfo.name);
+                if (fi == null)
+                {
+                    return AddInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector, fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf);
+                }
+                else
+                {
+                    fi.update(fieldInfo);
+                }
+                return fi;
+            }
+        }
+
+		private FieldInfo AddInternal(string name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTf)
 		{
-			FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+			FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
 			byNumber.Add(fi);
 			byName[name] = fi;
 			return fi;
 		}
 		
-		public int FieldNumber(System.String fieldName)
+		public int FieldNumber(string fieldName)
 		{
 			FieldInfo fi = FieldInfo(fieldName);
 			return (fi != null) ? fi.number : -1;
 		}
 		
-		public FieldInfo FieldInfo(System.String fieldName)
+		public FieldInfo FieldInfo(string fieldName)
 		{
-			return (FieldInfo) byName[fieldName];
+			return byName.ContainsKey(fieldName) ? byName[fieldName] : null;
 		}
 		
 		/// <summary> Return the fieldName identified by its number.
@@ -293,7 +327,7 @@
 		/// <returns> the fieldName or an empty string when the field
 		/// with the given number doesn't exist.
 		/// </returns>
-		public System.String FieldName(int fieldNumber)
+		public string FieldName(int fieldNumber)
 		{
 			FieldInfo fi = FieldInfo(fieldNumber);
 			return (fi != null) ? fi.name : "";
@@ -307,7 +341,7 @@
 		/// </returns>
 		public FieldInfo FieldInfo(int fieldNumber)
 		{
-			return (fieldNumber >= 0) ? (FieldInfo) byNumber[fieldNumber] : null;
+			return (fieldNumber >= 0) ? byNumber[fieldNumber] : null;
 		}
 		
 		public int Size()
@@ -329,7 +363,7 @@
 			return hasVectors;
 		}
 		
-		public void  Write(Directory d, System.String name)
+		public void  Write(Directory d, string name)
 		{
 			IndexOutput output = d.CreateOutput(name);
 			try
@@ -361,7 +395,10 @@
 					bits |= OMIT_NORMS;
 				if (fi.storePayloads)
 					bits |= STORE_PAYLOADS;
-				output.WriteString(fi.name);
+                if (fi.omitTf)
+                    bits |= OMIT_TF;
+
+                output.WriteString(fi.name);
 				output.WriteByte(bits);
 			}
 		}
@@ -371,7 +408,7 @@
 			int size = input.ReadVInt(); //read in the size
 			for (int i = 0; i < size; i++)
 			{
-				System.String name = String.Intern(input.ReadString());
+				string name = string.Intern(input.ReadString());
 				byte bits = input.ReadByte();
 				bool isIndexed = (bits & IS_INDEXED) != 0;
 				bool storeTermVector = (bits & STORE_TERMVECTOR) != 0;
@@ -379,9 +416,10 @@
 				bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
 				bool omitNorms = (bits & OMIT_NORMS) != 0;
 				bool storePayloads = (bits & STORE_PAYLOADS) != 0;
+                bool omitTf = (bits & OMIT_TF) != 0;
 				
-				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads);
+				AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf);
 			}
 		}
 	}
-}
+}
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldSortedTermVectorMapper.cs Wed Jul 29 18:04:12 2009
@@ -27,19 +27,19 @@
 	public class FieldSortedTermVectorMapper : TermVectorMapper
 	{
 		private System.Collections.IDictionary fieldToTerms = new System.Collections.Hashtable();
-		private System.Collections.Generic.SortedDictionary<Object, Object> currentSet;
+		private System.Collections.Generic.SortedDictionary<object, object> currentSet;
 		private System.String currentField;
-		private System.Collections.Generic.IComparer<Object> comparator;
+		private System.Collections.Generic.IComparer<object> comparator;
 
 		/// <summary> </summary>
 		/// <param name="comparator">A Comparator for sorting {@link TermVectorEntry}s
 		/// </param>
-		public FieldSortedTermVectorMapper(System.Collections.Generic.IComparer<Object> comparator) : this(false, false, comparator)
+		public FieldSortedTermVectorMapper(System.Collections.Generic.IComparer<object> comparator) : this(false, false, comparator)
 		{
 		}
 		
 		
-		public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, System.Collections.Generic.IComparer<Object> comparator) : base(ignoringPositions, ignoringOffsets)
+		public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, System.Collections.Generic.IComparer<object> comparator) : base(ignoringPositions, ignoringOffsets)
 		{
 			this.comparator = comparator;
 		}
@@ -52,7 +52,8 @@
 		
 		public override void  SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions)
 		{
-			currentSet = new System.Collections.Generic.SortedDictionary<Object, Object>(comparator);
+			currentSet = new System.Collections.Generic.SortedDictionary<object, object>(comparator);
+
 			currentField = field;
 			fieldToTerms[field] = currentSet;
 		}
@@ -68,7 +69,7 @@
 		}
 		
 		
-		public virtual System.Collections.Generic.IComparer<Object> GetComparator()
+		public virtual System.Collections.Generic.IComparer<object> GetComparator()
 		{
 			return comparator;
 		}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsReader.cs Wed Jul 29 18:04:12 2009
@@ -20,627 +20,704 @@
 using Lucene.Net.Documents;
 using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
 using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using CloseableThreadLocal = Lucene.Net.Util.CloseableThreadLocal;
 using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;
 using TokenStream = Lucene.Net.Analysis.TokenStream;
 
 namespace Lucene.Net.Index
 {
-	
-	/// <summary> Class responsible for access to stored document fields.
-	/// <p/>
-	/// It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
-	/// 
-	/// </summary>
-	/// <version>  $Id: FieldsReader.java 620759 2008-02-12 11:10:21Z mikemccand $
-	/// </version>
-	public sealed class FieldsReader
-	{
-		private FieldInfos fieldInfos;
-		
-		// The main fieldStream, used only for cloning.
-		private IndexInput cloneableFieldsStream;
-		
-		// This is a clone of cloneableFieldsStream used for reading documents.
-		// It should not be cloned outside of a synchronized context.
-		private IndexInput fieldsStream;
-		
-		private IndexInput indexStream;
-		private int numTotalDocs;
-		private int size;
-		private bool closed;
-		
-		// The docID offset where our docs begin in the index
-		// file.  This will be 0 if we have our own private file.
-		private int docStoreOffset;
-		
-		private System.LocalDataStoreSlot fieldsStreamTL = System.Threading.Thread.AllocateDataSlot();
-		
-		public FieldsReader(Directory d, System.String segment, FieldInfos fn) : this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
-		{
-		}
-		
-		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize) : this(d, segment, fn, readBufferSize, - 1, 0)
-		{
-		}
-		
-		internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
-		{
-			bool success = false;
-			
-			try
-			{
-				fieldInfos = fn;
-				
-				cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize);
-				fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
-				indexStream = d.OpenInput(segment + ".fdx", readBufferSize);
-				
-				if (docStoreOffset != - 1)
-				{
-					// We read only a slice out of this shared fields file
-					this.docStoreOffset = docStoreOffset;
-					this.size = size;
-					
-					// Verify the file is long enough to hold all of our
-					// docs
-					System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset);
-				}
-				else
-				{
-					this.docStoreOffset = 0;
-					this.size = (int) (indexStream.Length() >> 3);
-				}
-				
-				numTotalDocs = (int) (indexStream.Length() >> 3);
-				success = true;
-			}
-			finally
-			{
-				// With lock-less commits, it's entirely possible (and
-				// fine) to hit a FileNotFound exception above. In
-				// this case, we want to explicitly close any subset
-				// of things that were opened so that we don't have to
-				// wait for a GC to do so.
-				if (!success)
-				{
-					Close();
-				}
-			}
-		}
-		
-		/// <throws>  AlreadyClosedException if this FieldsReader is closed </throws>
-		internal void  EnsureOpen()
-		{
-			if (closed)
-			{
-				throw new AlreadyClosedException("this FieldsReader is closed");
-			}
-		}
-		
-		/// <summary> Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a
-		/// lazy implementation of a Field.  This means that the Fields values will not be accessible.
-		/// 
-		/// </summary>
-		/// <throws>  IOException </throws>
-		public void  Close()
-		{
-			if (!closed)
-			{
-				if (fieldsStream != null)
-				{
-					fieldsStream.Close();
-				}
-				if (cloneableFieldsStream != null)
-				{
-					cloneableFieldsStream.Close();
-				}
-				if (indexStream != null)
-				{
-					indexStream.Close();
-				}
-				IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(fieldsStreamTL);
-				if (localFieldsStream != null)
-				{
-					localFieldsStream.Close();
-					System.Threading.Thread.SetData(fieldsStreamTL, null);
-				}
-				closed = true;
-			}
-		}
-		
-		public int Size()
-		{
-			return size;
-		}
-		
-		public Document Doc(int n, FieldSelector fieldSelector)
-		{
-			indexStream.Seek((n + docStoreOffset) * 8L);
-			long position = indexStream.ReadLong();
-			fieldsStream.Seek(position);
-			
-			Document doc = new Document();
-			int numFields = fieldsStream.ReadVInt();
-			for (int i = 0; i < numFields; i++)
-			{
-				int fieldNumber = fieldsStream.ReadVInt();
-				FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
-				FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
-				
-				byte bits = fieldsStream.ReadByte();
-				System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);
-
-				bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
-				bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
-				bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
-				//TODO: Find an alternative approach here if this list continues to grow beyond the
-				//list of 5 or 6 currently here.  See Lucene 762 for discussion
-				if (acceptField.Equals(FieldSelectorResult.LOAD))
-				{
-					AddField(doc, fi, binary, compressed, tokenize);
-				}
-				else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
-				{
-					AddFieldForMerge(doc, fi, binary, compressed, tokenize);
-				}
-				else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
-				{
-					AddField(doc, fi, binary, compressed, tokenize);
-					break; //Get out of this loop
-				}
-				else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
-				{
-					AddFieldLazy(doc, fi, binary, compressed, tokenize);
-				}
-				else if (acceptField.Equals(FieldSelectorResult.SIZE))
-				{
-					SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
-				}
-				else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
-				{
-					AddFieldSize(doc, fi, binary, compressed);
-					break;
-				}
-				else
-				{
-					SkipField(binary, compressed);
-				}
-			}
-			
-			return doc;
-		}
-		
-		/// <summary>Returns the length in bytes of each raw document in a
-		/// contiguous range of length numDocs starting with
-		/// startDocID.  Returns the IndexInput (the fieldStream),
-		/// already seeked to the starting point for startDocID.
-		/// </summary>
-		internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
-		{
-			indexStream.Seek((docStoreOffset + startDocID) * 8L);
-			long startOffset = indexStream.ReadLong();
-			long lastOffset = startOffset;
-			int count = 0;
-			while (count < numDocs)
-			{
-				long offset;
-				int docID = docStoreOffset + startDocID + count + 1;
-				System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
-				if (docID < numTotalDocs)
-					offset = indexStream.ReadLong();
-				else
-					offset = fieldsStream.Length();
-				lengths[count++] = (int) (offset - lastOffset);
-				lastOffset = offset;
-			}
-			
-			fieldsStream.Seek(startOffset);
-			
-			return fieldsStream;
-		}
-		
-		/// <summary> Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
-		/// This will have the most payoff on large fields.
-		/// </summary>
-		private void  SkipField(bool binary, bool compressed)
-		{
-			SkipField(binary, compressed, fieldsStream.ReadVInt());
-		}
-		
-		private void  SkipField(bool binary, bool compressed, int toRead)
-		{
-			if (binary || compressed)
-			{
-				long pointer = fieldsStream.GetFilePointer();
-				fieldsStream.Seek(pointer + toRead);
-			}
-			else
-			{
-				//We need to skip chars.  This will slow us down, but still better
-				fieldsStream.SkipChars(toRead);
-			}
-		}
-		
-		private void  AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
-		{
-			if (binary == true)
-			{
-				int toRead = fieldsStream.ReadVInt();
-				long pointer = fieldsStream.GetFilePointer();
-				if (compressed)
-				{
-					//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
-					doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer));
-				}
-				else
-				{
-					//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-					doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer));
-				}
-				//Need to move the pointer ahead by toRead positions
-				fieldsStream.Seek(pointer + toRead);
-			}
-			else
-			{
-				Field.Store store = Field.Store.YES;
-				Field.Index index = GetIndexType(fi, tokenize);
-				Field.TermVector termVector = GetTermVectorType(fi);
-				
-				Fieldable f;
-				if (compressed)
-				{
-					store = Field.Store.COMPRESS;
-					int toRead = fieldsStream.ReadVInt();
-					long pointer = fieldsStream.GetFilePointer();
-					f = new LazyField(this, fi.name, store, toRead, pointer);
-					//skip over the part that we aren't loading
-					fieldsStream.Seek(pointer + toRead);
-					f.SetOmitNorms(fi.omitNorms);
-				}
-				else
-				{
-					int length = fieldsStream.ReadVInt();
-					long pointer = fieldsStream.GetFilePointer();
-					//Skip ahead of where we are by the length of what is stored
-					fieldsStream.SkipChars(length);
-					f = new LazyField(this, fi.name, store, index, termVector, length, pointer);
-					f.SetOmitNorms(fi.omitNorms);
-				}
-				doc.Add(f);
-			}
-		}
-		
-		// in merge mode we don't uncompress the data of a compressed field
-		private void  AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
-		{
-			System.Object data;
-			
-			if (binary || compressed)
-			{
-				int toRead = fieldsStream.ReadVInt();
-				byte[] b = new byte[toRead];
-				fieldsStream.ReadBytes(b, 0, b.Length);
-				data = b;
-			}
-			else
-			{
-				data = fieldsStream.ReadString();
-			}
-			
-			doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
-		}
-		
-		private void  AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
-		{
-			
-			//we have a binary stored field, and it may be compressed
-			if (binary)
-			{
-				int toRead = fieldsStream.ReadVInt();
-				byte[] b = new byte[toRead];
-				fieldsStream.ReadBytes(b, 0, b.Length);
-				if (compressed)
-					doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
-				else
-					doc.Add(new Field(fi.name, b, Field.Store.YES));
-			}
-			else
-			{
-				Field.Store store = Field.Store.YES;
-				Field.Index index = GetIndexType(fi, tokenize);
-				Field.TermVector termVector = GetTermVectorType(fi);
-				
-				Fieldable f;
-				if (compressed)
-				{
-					store = Field.Store.COMPRESS;
-					int toRead = fieldsStream.ReadVInt();
-					
-					byte[] b = new byte[toRead];
-					fieldsStream.ReadBytes(b, 0, b.Length);
-					f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
-					f.SetOmitNorms(fi.omitNorms);
-				}
-				else
-				{
-					f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
-					f.SetOmitNorms(fi.omitNorms);
-				}
-				doc.Add(f);
-			}
-		}
-		
-		// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
-		// Read just the size -- caller must skip the field content to continue reading fields
-		// Return the size in bytes or chars, depending on field type
-		private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
-		{
-			int size = fieldsStream.ReadVInt(), bytesize = binary || compressed ? size : 2 * size;
-			byte[] sizebytes = new byte[4];
-			sizebytes[0] = (byte) (SupportClass.Number.URShift(bytesize, 24));
-			sizebytes[1] = (byte) (SupportClass.Number.URShift(bytesize, 16));
-			sizebytes[2] = (byte) (SupportClass.Number.URShift(bytesize, 8));
-			sizebytes[3] = (byte) bytesize;
-			doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
-			return size;
-		}
-		
-		private Field.TermVector GetTermVectorType(FieldInfo fi)
-		{
-			Field.TermVector termVector = null;
-			if (fi.storeTermVector)
-			{
-				if (fi.storeOffsetWithTermVector)
-				{
-					if (fi.storePositionWithTermVector)
-					{
-						termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
-					}
-					else
-					{
-						termVector = Field.TermVector.WITH_OFFSETS;
-					}
-				}
-				else if (fi.storePositionWithTermVector)
-				{
-					termVector = Field.TermVector.WITH_POSITIONS;
-				}
-				else
-				{
-					termVector = Field.TermVector.YES;
-				}
-			}
-			else
-			{
-				termVector = Field.TermVector.NO;
-			}
-			return termVector;
-		}
-		
-		private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
-		{
-			Field.Index index;
-			if (fi.isIndexed && tokenize)
-				index = Field.Index.TOKENIZED;
-			else if (fi.isIndexed && !tokenize)
-				index = Field.Index.UN_TOKENIZED;
-			else
-				index = Field.Index.NO;
-			return index;
-		}
-		
-		/// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
-		/// loaded.
-		/// </summary>
-		[Serializable]
-		private class LazyField:AbstractField, Fieldable
-		{
-			private void  InitBlock(FieldsReader enclosingInstance)
-			{
-				this.enclosingInstance = enclosingInstance;
-			}
-			private FieldsReader enclosingInstance;
-			public FieldsReader Enclosing_Instance
-			{
-				get
-				{
-					return enclosingInstance;
-				}
-				
-			}
-			private int toRead;
-			private long pointer;
-			
-			public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer):base(name, store, Field.Index.NO, Field.TermVector.NO)
-			{
-				InitBlock(enclosingInstance);
-				this.toRead = toRead;
-				this.pointer = pointer;
-				lazy = true;
-			}
-			
-			public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer):base(name, store, index, termVector)
-			{
-				InitBlock(enclosingInstance);
-				this.toRead = toRead;
-				this.pointer = pointer;
-				lazy = true;
-			}
-			
-			private IndexInput GetFieldStream()
-			{
-				IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(Enclosing_Instance.fieldsStreamTL);
-				if (localFieldsStream == null)
-				{
-					localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
-					System.Threading.Thread.SetData(Enclosing_Instance.fieldsStreamTL, localFieldsStream);
-				}
-				return localFieldsStream;
-			}
-			
-			/// <summary>The value of the field in Binary, or null.  If null, the Reader value,
-			/// String value, or TokenStream value is used. Exactly one of stringValue(), 
-			/// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
-			/// </summary>
-			public override byte[] BinaryValue()
-			{
-				Enclosing_Instance.EnsureOpen();
-				if (fieldsData == null)
-				{
-					byte[] b = new byte[toRead];
-					IndexInput localFieldsStream = GetFieldStream();
-					//Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
-					//since they are already handling this exception when getting the document
-					try
-					{
-						localFieldsStream.Seek(pointer);
-						localFieldsStream.ReadBytes(b, 0, b.Length);
-						if (isCompressed == true)
-						{
-							fieldsData = Enclosing_Instance.Uncompress(b);
-						}
-						else
-						{
-							fieldsData = b;
-						}
-					}
-					catch (System.IO.IOException e)
-					{
-						throw new FieldReaderException(e);
-					}
-				}
-				return fieldsData is byte[] ? (byte[]) fieldsData : null;
-			}
-			
-			/// <summary>The value of the field as a Reader, or null.  If null, the String value,
-			/// binary value, or TokenStream value is used.  Exactly one of stringValue(), 
-			/// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
-			/// </summary>
-			public override System.IO.TextReader ReaderValue()
-			{
-				Enclosing_Instance.EnsureOpen();
-				return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null;
-			}
-			
-			/// <summary>The value of the field as a TokesStream, or null.  If null, the Reader value,
-			/// String value, or binary value is used. Exactly one of stringValue(), 
-			/// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
-			/// </summary>
-			public override TokenStream TokenStreamValue()
-			{
-				Enclosing_Instance.EnsureOpen();
-				return fieldsData is TokenStream ? (TokenStream) fieldsData : null;
-			}
-			
-			
-			/// <summary>The value of the field as a String, or null.  If null, the Reader value,
-			/// binary value, or TokenStream value is used.  Exactly one of stringValue(), 
-			/// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
-			/// </summary>
-			public override System.String StringValue()
-			{
-				Enclosing_Instance.EnsureOpen();
-				if (fieldsData == null)
-				{
-					IndexInput localFieldsStream = GetFieldStream();
-					try
-					{
-						localFieldsStream.Seek(pointer);
-						if (isCompressed)
-						{
-							byte[] b = new byte[toRead];
-							localFieldsStream.ReadBytes(b, 0, b.Length);
-							fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
-						}
-						else
-						{
-							//read in chars b/c we already know the length we need to read
-							char[] chars = new char[toRead];
-							localFieldsStream.ReadChars(chars, 0, toRead);
-							fieldsData = new System.String(chars);
-						}
-					}
-					catch (System.IO.IOException e)
-					{
-						throw new FieldReaderException(e);
-					}
-				}
-				return fieldsData is System.String ? (System.String) fieldsData : null;
-			}
-			
-			public long GetPointer()
-			{
-				Enclosing_Instance.EnsureOpen();
-				return pointer;
-			}
-			
-			public void  SetPointer(long pointer)
-			{
-				Enclosing_Instance.EnsureOpen();
-				this.pointer = pointer;
-			}
-			
-			public int GetToRead()
-			{
-				Enclosing_Instance.EnsureOpen();
-				return toRead;
-			}
-			
-			public void  SetToRead(int toRead)
-			{
-				Enclosing_Instance.EnsureOpen();
-				this.toRead = toRead;
-			}
-		}
-		
-		private byte[] Uncompress(byte[] input)
-		{
-			return SupportClass.CompressionSupport.Uncompress(input);
-        }
-		
-		// Instances of this class hold field properties and data
-		// for merge
-		[Serializable]
-		public sealed class FieldForMerge : AbstractField
-		{
-			public override System.String StringValue()
-			{
-				return (System.String) this.fieldsData;
-			}
-			
-			public override System.IO.TextReader ReaderValue()
-			{
-				// not needed for merge
-				return null;
-			}
-			
-			public override byte[] BinaryValue()
-			{
-				return (byte[]) this.fieldsData;
-			}
-			
-			public override TokenStream TokenStreamValue()
-			{
-				// not needed for merge
-				return null;
-			}
-			
-			public FieldForMerge(System.Object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
-			{
-				this.isStored = true;
-				this.fieldsData = value_Renamed;
-				this.isCompressed = compressed;
-				this.isBinary = binary;
-				this.isTokenized = tokenize;
-				
-				this.name = String.Intern(fi.name);
-				this.isIndexed = fi.isIndexed;
-				this.omitNorms = fi.omitNorms;
-				this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
-				this.storePositionWithTermVector = fi.storePositionWithTermVector;
-				this.storeTermVector = fi.storeTermVector;
-			}
-		}
-	}
+
+    /// <summary> Class responsible for access to stored document fields.
+    /// <p/>
+    /// It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
+    /// </summary>
+    public sealed class FieldsReader
+    {
+        private FieldInfos fieldInfos;
+
+        // The main fieldStream, used only for cloning.
+        private IndexInput cloneableFieldsStream;
+
+        // This is a clone of cloneableFieldsStream used for reading documents.
+        // It should not be cloned outside of a synchronized context.
+        private IndexInput fieldsStream;
+
+        private IndexInput indexStream;
+        private int numTotalDocs;
+        private int size;
+        private bool closed;
+        private readonly int format;
+        private readonly int formatSize;
+
+        // The docID offset where our docs begin in the index
+        // file.  This will be 0 if we have our own private file.
+        private int docStoreOffset;
+
+        //private System.LocalDataStoreSlot fieldsStreamTL = System.Threading.Thread.AllocateDataSlot();
+        private CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal();
+
+        public FieldsReader(Directory d, System.String segment, FieldInfos fn)
+            : this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0)
+        {
+        }
+
+        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize)
+            : this(d, segment, fn, readBufferSize, -1, 0)
+        {
+        }
+
+        internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
+        {
+            bool success = false;
+
+            try
+            {
+                fieldInfos = fn;
+
+                cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
+                indexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);
+
+                // First version of fdx did not include a format
+                // header, but, the first int will always be 0 in that
+                // case
+                int firstInt = indexStream.ReadInt();
+                if (firstInt == 0)
+                    format = 0;
+                else
+                    format = firstInt;
+
+                if (format > FieldsWriter.FORMAT_CURRENT)
+                    throw new CorruptIndexException("Incompatible format version: " + format + " expected "
+                                                    + FieldsWriter.FORMAT_CURRENT + " or lower");
+
+                if (format > FieldsWriter.FORMAT)
+                    formatSize = 4;
+                else
+                    formatSize = 0;
+
+                if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+                    cloneableFieldsStream.SetModifiedUTF8StringsMode();
+
+                fieldsStream = (IndexInput)cloneableFieldsStream.Clone();
+
+                long indexSize = indexStream.Length() - formatSize;
+
+                if (docStoreOffset != -1)
+                {
+                    // We read only a slice out of this shared fields file
+                    this.docStoreOffset = docStoreOffset;
+                    this.size = size;
+
+                    // Verify the file is long enough to hold all of our
+                    // docs
+                    System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + docStoreOffset);
+                }
+                else
+                {
+                    this.docStoreOffset = 0;
+                    this.size = (int)(indexSize >> 3);
+                }
+
+                numTotalDocs = (int)(indexSize >> 3);
+                success = true;
+            }
+            finally
+            {
+                // With lock-less commits, it's entirely possible (and
+                // fine) to hit a FileNotFound exception above. In
+                // this case, we want to explicitly close any subset
+                // of things that were opened so that we don't have to
+                // wait for a GC to do so.
+                if (!success)
+                {
+                    Close();
+                }
+            }
+        }
+
+        /// <throws>  AlreadyClosedException if this FieldsReader is closed </throws>
+        internal void EnsureOpen()
+        {
+            if (closed)
+            {
+                throw new AlreadyClosedException("this FieldsReader is closed");
+            }
+        }
+
+        /// <summary> Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a
+        /// lazy implementation of a Field.  This means that the Fields values will not be accessible.
+        /// 
+        /// </summary>
+        /// <throws>  IOException </throws>
+        public void Close()
+        {
+            if (!closed)
+            {
+                if (fieldsStream != null)
+                {
+                    fieldsStream.Close();
+                }
+                if (cloneableFieldsStream != null)
+                {
+                    cloneableFieldsStream.Close();
+                }
+                if (indexStream != null)
+                {
+                    indexStream.Close();
+                }
+                fieldsStreamTL.Close();
+                closed = true;
+            }
+        }
+
+        public int Size()
+        {
+            return size;
+        }
+
+        private void SeekIndex(int docID)
+        {
+            indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
+        }
+
+        internal bool CanReadRawDocs()
+        {
+            return format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+        }
+
+        public Document Doc(int n, FieldSelector fieldSelector)
+        {
+            SeekIndex(n);
+            long position = indexStream.ReadLong();
+            fieldsStream.Seek(position);
+
+            Document doc = new Document();
+            int numFields = fieldsStream.ReadVInt();
+            for (int i = 0; i < numFields; i++)
+            {
+                int fieldNumber = fieldsStream.ReadVInt();
+                FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
+                FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
+
+                byte bits = fieldsStream.ReadByte();
+                System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);
+
+                bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
+                bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
+                bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
+                //TODO: Find an alternative approach here if this list continues to grow beyond the
+                //list of 5 or 6 currently here.  See Lucene 762 for discussion
+                if (acceptField.Equals(FieldSelectorResult.LOAD))
+                {
+                    AddField(doc, fi, binary, compressed, tokenize);
+                }
+                else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
+                {
+                    AddFieldForMerge(doc, fi, binary, compressed, tokenize);
+                }
+                else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
+                {
+                    AddField(doc, fi, binary, compressed, tokenize);
+                    break; //Get out of this loop
+                }
+                else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
+                {
+                    AddFieldLazy(doc, fi, binary, compressed, tokenize);
+                }
+                else if (acceptField.Equals(FieldSelectorResult.SIZE))
+                {
+                    SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
+                }
+                else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
+                {
+                    AddFieldSize(doc, fi, binary, compressed);
+                    break;
+                }
+                else
+                {
+                    SkipField(binary, compressed);
+                }
+            }
+
+            return doc;
+        }
+
+        /// <summary>Returns the length in bytes of each raw document in a
+        /// contiguous range of length numDocs starting with
+        /// startDocID.  Returns the IndexInput (the fieldStream),
+        /// already seeked to the starting point for startDocID.
+        /// </summary>
+        internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
+        {
+            SeekIndex(startDocID);
+            long startOffset = indexStream.ReadLong();
+            long lastOffset = startOffset;
+            int count = 0;
+            while (count < numDocs)
+            {
+                long offset;
+                int docID = docStoreOffset + startDocID + count + 1;
+                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
+                if (docID < numTotalDocs)
+                    offset = indexStream.ReadLong();
+                else
+                    offset = fieldsStream.Length();
+                lengths[count++] = (int)(offset - lastOffset);
+                lastOffset = offset;
+            }
+
+            fieldsStream.Seek(startOffset);
+
+            return fieldsStream;
+        }
+
+        /// <summary> Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
+        /// This will have the most payoff on large fields.
+        /// </summary>
+        private void SkipField(bool binary, bool compressed)
+        {
+            SkipField(binary, compressed, fieldsStream.ReadVInt());
+        }
+
+        private void SkipField(bool binary, bool compressed, int toRead)
+        {
+            if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed)
+            {
+                fieldsStream.Seek(fieldsStream.GetFilePointer() + toRead);
+            }
+            else
+            {
+                //We need to skip chars.  This will slow us down, but still better
+                fieldsStream.SkipChars(toRead);
+            }
+        }
+
+        private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+        {
+            if (binary)
+            {
+                int toRead = fieldsStream.ReadVInt();
+                long pointer = fieldsStream.GetFilePointer();
+                if (compressed)
+                {
+                    //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
+                    doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
+                }
+                else
+                {
+                    //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
+                    doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary));
+                }
+                //Need to move the pointer ahead by toRead positions
+                fieldsStream.Seek(pointer + toRead);
+            }
+            else
+            {
+                Field.Store store = Field.Store.YES;
+                Field.Index index = GetIndexType(fi, tokenize);
+                Field.TermVector termVector = GetTermVectorType(fi);
+
+                Fieldable f;
+                if (compressed)
+                {
+                    store = Field.Store.COMPRESS;
+                    int toRead = fieldsStream.ReadVInt();
+                    long pointer = fieldsStream.GetFilePointer();
+                    f = new LazyField(this, fi.name, store, toRead, pointer, binary);
+                    //skip over the part that we aren't loading
+                    fieldsStream.Seek(pointer + toRead);
+                    f.SetOmitNorms(fi.omitNorms);
+                }
+                else
+                {
+                    int length = fieldsStream.ReadVInt();
+                    long pointer = fieldsStream.GetFilePointer();
+                    //Skip ahead of where we are by the length of what is stored
+                    if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+                        fieldsStream.Seek(pointer + length);
+                    else
+                        fieldsStream.SkipChars(length);
+                    f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary);
+                    f.SetOmitNorms(fi.omitNorms);
+                }
+                doc.Add(f);
+            }
+        }
+
+        // in merge mode we don't uncompress the data of a compressed field
+        private void AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+        {
+            object data;
+
+            if (binary || compressed)
+            {
+                int toRead = fieldsStream.ReadVInt();
+                byte[] b = new byte[toRead];
+                fieldsStream.ReadBytes(b, 0, b.Length);
+                data = b;
+            }
+            else
+            {
+                data = fieldsStream.ReadString();
+            }
+
+            doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
+        }
+
+        private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+        {
+
+            //we have a binary stored field, and it may be compressed
+            if (binary)
+            {
+                int toRead = fieldsStream.ReadVInt();
+                byte[] b = new byte[toRead];
+                fieldsStream.ReadBytes(b, 0, b.Length);
+                if (compressed)
+                    doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
+                else
+                    doc.Add(new Field(fi.name, b, Field.Store.YES));
+            }
+            else
+            {
+                Field.Store store = Field.Store.YES;
+                Field.Index index = GetIndexType(fi, tokenize);
+                Field.TermVector termVector = GetTermVectorType(fi);
+
+                Fieldable f;
+                if (compressed)
+                {
+                    store = Field.Store.COMPRESS;
+                    int toRead = fieldsStream.ReadVInt();
+
+                    byte[] b = new byte[toRead];
+                    fieldsStream.ReadBytes(b, 0, b.Length);
+                    f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
+                    f.SetOmitNorms(fi.omitNorms);
+                }
+                else
+                {
+                    f = new Field(fi.name, fieldsStream.ReadString(), store, index, termVector);
+                    f.SetOmitNorms(fi.omitNorms);
+                }
+                doc.Add(f);
+            }
+        }
+
+        // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
+        // Read just the size -- caller must skip the field content to continue reading fields
+        // Return the size in bytes or chars, depending on field type
+        private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
+        {
+            int size = fieldsStream.ReadVInt(), bytesize = binary || compressed ? size : 2 * size;
+            byte[] sizebytes = new byte[4];
+            sizebytes[0] = (byte)(SupportClass.Number.URShift(bytesize, 24));
+            sizebytes[1] = (byte)(SupportClass.Number.URShift(bytesize, 16));
+            sizebytes[2] = (byte)(SupportClass.Number.URShift(bytesize, 8));
+            sizebytes[3] = (byte)bytesize;
+            doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
+            return size;
+        }
+
+        private Field.TermVector GetTermVectorType(FieldInfo fi)
+        {
+            Field.TermVector termVector = null;
+            if (fi.storeTermVector)
+            {
+                if (fi.storeOffsetWithTermVector)
+                {
+                    if (fi.storePositionWithTermVector)
+                    {
+                        termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
+                    }
+                    else
+                    {
+                        termVector = Field.TermVector.WITH_OFFSETS;
+                    }
+                }
+                else if (fi.storePositionWithTermVector)
+                {
+                    termVector = Field.TermVector.WITH_POSITIONS;
+                }
+                else
+                {
+                    termVector = Field.TermVector.YES;
+                }
+            }
+            else
+            {
+                termVector = Field.TermVector.NO;
+            }
+            return termVector;
+        }
+
+        private Field.Index GetIndexType(FieldInfo fi, bool tokenize)
+        {
+            Field.Index index;
+            if (fi.isIndexed && tokenize)
+                index = Field.Index.ANALYZED;
+            else if (fi.isIndexed && !tokenize)
+                index = Field.Index.NOT_ANALYZED;
+            else
+                index = Field.Index.NO;
+            return index;
+        }
+
+        /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+        /// loaded.
+        /// </summary>
+        [Serializable]
+        private class LazyField : AbstractField, Fieldable
+        {
+            private void InitBlock(FieldsReader enclosingInstance)
+            {
+                this.enclosingInstance = enclosingInstance;
+            }
+            private FieldsReader enclosingInstance;
+            public FieldsReader Enclosing_Instance
+            {
+                get
+                {
+                    return enclosingInstance;
+                }
+
+            }
+            private int toRead;
+            private long pointer;
+
+            public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary)
+                : base(name, store, Field.Index.NO, Field.TermVector.NO)
+            {
+                InitBlock(enclosingInstance);
+                this.toRead = toRead;
+                this.pointer = pointer;
+                this.isBinary = isBinary;
+                if (isBinary)
+                    binaryLength = toRead;
+                lazy = true;
+            }
+
+            public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary)
+                : base(name, store, index, termVector)
+            {
+                InitBlock(enclosingInstance);
+                this.toRead = toRead;
+                this.pointer = pointer;
+                this.isBinary = isBinary;
+                if (isBinary)
+                    binaryLength = toRead;
+                lazy = true;
+            }
+
+            private IndexInput GetFieldStream()
+            {
+                IndexInput localFieldsStream = (IndexInput)Enclosing_Instance.fieldsStreamTL.Get();
+                if (localFieldsStream == null)
+                {
+                    localFieldsStream = (IndexInput)Enclosing_Instance.cloneableFieldsStream.Clone();
+                    Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
+                }
+                return localFieldsStream;
+            }
+
+            /// <summary>The value of the field in Binary, or null.  If null, the Reader value,
+            /// String value, or TokenStream value is used. Exactly one of stringValue(), 
+            /// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
+            /// </summary>
+            public override byte[] BinaryValue()
+            {
+                return GetBinaryValue(null);
+            }
+
+            /// <summary>The value of the field as a Reader, or null.  If null, the String value,
+            /// binary value, or TokenStream value is used.  Exactly one of stringValue(), 
+            /// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
+            /// </summary>
+            public override System.IO.TextReader ReaderValue()
+            {
+                Enclosing_Instance.EnsureOpen();
+                return null;
+            }
+
+            /// <summary>The value of the field as a TokesStream, or null.  If null, the Reader value,
+            /// String value, or binary value is used. Exactly one of stringValue(), 
+            /// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
+            /// </summary>
+            public override TokenStream TokenStreamValue()
+            {
+                Enclosing_Instance.EnsureOpen();
+                return null;
+            }
+
+
+            /// <summary>The value of the field as a String, or null.  If null, the Reader value,
+            /// binary value, or TokenStream value is used.  Exactly one of stringValue(), 
+            /// readerValue(), binaryValue(), and tokenStreamValue() must be set. 
+            /// </summary>
+            public override System.String StringValue()
+            {
+                Enclosing_Instance.EnsureOpen();
+                if (isBinary)
+                    return null;
+                else
+                {
+                    if (fieldsData == null)
+                    {
+                        IndexInput localFieldsStream = GetFieldStream();
+                        try
+                        {
+                            localFieldsStream.Seek(pointer);
+                            if (isCompressed)
+                            {
+                                byte[] b = new byte[toRead];
+                                localFieldsStream.ReadBytes(b, 0, b.Length);
+                                fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
+                            }
+                            else
+                            {
+                                if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
+                                {
+                                    byte[] bytes = new byte[toRead];
+                                    localFieldsStream.ReadBytes(bytes, 0, toRead);
+                                    fieldsData = System.Text.Encoding.UTF8.GetString(bytes);
+                                }
+                                else
+                                {
+                                    //read in chars b/c we already know the length we need to read
+                                    char[] chars = new char[toRead];
+                                    localFieldsStream.ReadChars(chars, 0, toRead);
+                                    fieldsData = new System.String(chars);
+                                }
+                            }
+                        }
+                        catch (System.IO.IOException e)
+                        {
+                            throw new FieldReaderException(e);
+                        }
+                    }
+                }
+                return (string)fieldsData;
+            }
+
+            public long GetPointer()
+            {
+                Enclosing_Instance.EnsureOpen();
+                return pointer;
+            }
+
+            public void SetPointer(long pointer)
+            {
+                Enclosing_Instance.EnsureOpen();
+                this.pointer = pointer;
+            }
+
+            public int GetToRead()
+            {
+                Enclosing_Instance.EnsureOpen();
+                return toRead;
+            }
+
+            public void SetToRead(int toRead)
+            {
+                Enclosing_Instance.EnsureOpen();
+                this.toRead = toRead;
+            }
+
+            public override byte[] GetBinaryValue(byte[] result)
+            {
+                Enclosing_Instance.EnsureOpen();
+
+                if (isBinary)
+                {
+                    if (fieldsData == null)
+                    {
+                        // Allocate new bufer if result is null or too small
+                        byte[] b;
+                        if (result == null || result.Length < toRead)
+                            b = new byte[toRead];
+                        else
+                            b = result;
+
+                        IndexInput localFieldsStream = GetFieldStream();
+
+                        // Throw this IOException since IndexRead.document does so anyway, so probably not that big of a change for people
+                        // since they are already handlinig this exception when getting the document
+                        try
+                        {
+                            localFieldsStream.Seek(pointer);
+                            localFieldsStream.ReadBytes(b, 0, toRead);
+                            if (isCompressed)
+                                fieldsData = Enclosing_Instance.Uncompress(b);
+                            else
+                                fieldsData = b;
+                        }
+                        catch (System.IO.IOException e)
+                        {
+                            throw new FieldReaderException(e);
+                        }
+
+                        binaryOffset = 0;
+                        binaryLength = toRead;
+                    }
+
+                    return (byte[])fieldsData;
+                }
+                else
+                    return null;
+            }
+        }
+
+        private byte[] Uncompress(byte[] input)
+        {
+            return SupportClass.CompressionSupport.Uncompress(input);
+        }
+
+        // Instances of this class hold field properties and data
+        // for merge
+        [Serializable]
+        public sealed class FieldForMerge : AbstractField
+        {
+            public override System.String StringValue()
+            {
+                return (System.String)this.fieldsData;
+            }
+
+            public override System.IO.TextReader ReaderValue()
+            {
+                // not needed for merge
+                return null;
+            }
+
+            public override byte[] BinaryValue()
+            {
+                return (byte[])this.fieldsData;
+            }
+
+            public override TokenStream TokenStreamValue()
+            {
+                // not needed for merge
+                return null;
+            }
+
+            public FieldForMerge(object value_Renamed, FieldInfo fi, bool binary, bool compressed, bool tokenize)
+            {
+                this.isStored = true;
+                this.fieldsData = value_Renamed;
+                this.isCompressed = compressed;
+                this.isBinary = binary;
+                if (isBinary)
+                    binaryLength = ((byte[])value_Renamed).Length;
+
+                this.isTokenized = tokenize;
+
+                this.name = String.Intern(fi.name);
+                this.isIndexed = fi.isIndexed;
+                this.omitNorms = fi.omitNorms;
+                this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
+                this.storePositionWithTermVector = fi.storePositionWithTermVector;
+                this.storeTermVector = fi.storeTermVector;
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FieldsWriter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FieldsWriter.cs Wed Jul 29 18:04:12 2009
@@ -33,6 +33,17 @@
 		internal const byte FIELD_IS_BINARY = (byte) (0x2);
 		internal const byte FIELD_IS_COMPRESSED = (byte) (0x4);
 		
+        // Original format
+        internal const int FORMAT = 0;
+
+        // Changed strings to UTF8
+        internal const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;
+
+        // NOTE: if you introduce a new format, make it 1 higher
+        // than the current one, and always change this if you
+        // switch to a new format!
+        internal const int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;
+
 		private FieldInfos fieldInfos;
 		
 		private IndexOutput fieldsStream;
@@ -44,9 +55,77 @@
 		internal FieldsWriter(Directory d, System.String segment, FieldInfos fn)
 		{
 			fieldInfos = fn;
-			fieldsStream = d.CreateOutput(segment + ".fdt");
-			indexStream = d.CreateOutput(segment + ".fdx");
-			doClose = true;
+
+            bool success = false;
+            string fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
+            try
+            {
+                fieldsStream = d.CreateOutput(fieldsName);
+                fieldsStream.WriteInt(FORMAT_CURRENT);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        Close();
+                    }
+                    catch (System.Exception)
+                    {
+                        // Suppress so we keep throwing the original exception
+                    }
+                    try
+                    {
+                        d.DeleteFile(fieldsName);
+                    }
+                    catch (System.Exception)
+                    {
+                        // Suppress so we keep throwing the original exception
+                    }
+                }
+            }
+
+            success = false;
+            string indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
+            try
+            {
+                indexStream = d.CreateOutput(indexName);
+                indexStream.WriteInt(FORMAT_CURRENT);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        Close();
+                    }
+                    catch (System.IO.IOException)
+                    {
+                    }
+                    try
+                    {
+                        d.DeleteFile(fieldsName);
+                    }
+                    catch (System.Exception)
+                    {
+                        // Suppress so we keep throwing the original exception
+                    }
+                    try
+                    {
+                        d.DeleteFile(indexName);
+                    }
+                    catch (System.Exception)
+                    {
+                        // Suppress so we keep throwing the original exception
+                    }
+                }
+            }
+
+            doClose = true;
 		}
 		
 		internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn)
@@ -56,6 +135,11 @@
 			indexStream = fdx;
 			doClose = false;
 		}
+
+        internal void SetFieldsStream(IndexOutput stream)
+        {
+            this.fieldsStream = stream;
+        }
 		
 		// Writes the contents of buffer into the fields stream
 		// and adds a new entry for this document into the index
@@ -67,7 +151,13 @@
 			fieldsStream.WriteVInt(numStoredFields);
 			buffer.WriteTo(fieldsStream);
 		}
-		
+
+        internal void SkipDocument()
+        {
+            indexStream.WriteLong(fieldsStream.GetFilePointer());
+            fieldsStream.WriteVInt(0);
+        }
+
 		internal void  Flush()
 		{
 			indexStream.Flush();
@@ -76,12 +166,60 @@
 		
 		internal void  Close()
 		{
-			if (doClose)
-			{
-				fieldsStream.Close();
-				indexStream.Close();
-			}
-		}
+            if (doClose)
+            {
+                try
+                {
+                    if (fieldsStream != null)
+                    {
+                        try
+                        {
+                            fieldsStream.Close();
+                        }
+                        finally
+                        {
+                            fieldsStream = null;
+                        }
+                    }
+                }
+                catch (System.IO.IOException ioe)
+                {
+                    try
+                    {
+                        if (indexStream != null)
+                        {
+                            try
+                            {
+                                indexStream.Close();
+                            }
+                            finally
+                            {
+                                indexStream = null;
+                            }
+                        }
+                    }
+                    catch (System.IO.IOException)
+                    {
+                        // Ignore so we throw only first IOException hit
+                    }
+                    throw ioe;
+                }
+                finally
+                {
+                    if (indexStream != null)
+                    {
+                        try
+                        {
+                            indexStream.Close();
+                        }
+                        finally
+                        {
+                            indexStream = null;
+                        }
+                    }
+                }
+            }
+        }
 		
 		internal void  WriteField(FieldInfo fi, Fieldable field)
 		{
@@ -103,39 +241,46 @@
 			if (field.IsCompressed())
 			{
 				// compression is enabled for the current field
-				byte[] data = null;
+				byte[] data;
+                int len;
+                int offset;
 				
 				if (disableCompression)
 				{
 					// optimized case for merging, the data
 					// is already compressed
-					data = field.BinaryValue();
+					data = field.GetBinaryValue();
+                    System.Diagnostics.Debug.Assert(data != null);
+                    len = field.GetBinaryLength();
+                    offset = field.GetBinaryOffset();
 				}
 				else
 				{
 					// check if it is a binary field
 					if (field.IsBinary())
 					{
-						data = Compress(field.BinaryValue());
+						data = Compress(field.GetBinaryValue(), field.GetBinaryOffset(), field.GetBinaryLength());
 					}
 					else
 					{
-						data = Compress(System.Text.Encoding.GetEncoding("UTF-8").GetBytes(field.StringValue()));
+                        byte[] x = System.Text.Encoding.UTF8.GetBytes(field.StringValue());
+						data = Compress(x, 0, x.Length);
 					}
+                    len = data.Length;
+                    offset = 0;
 				}
-				int len = data.Length;
+
 				fieldsStream.WriteVInt(len);
-				fieldsStream.WriteBytes(data, len);
+				fieldsStream.WriteBytes(data, offset, len);
 			}
 			else
 			{
 				// compression is disabled for the current field
 				if (field.IsBinary())
 				{
-					byte[] data = field.BinaryValue();
-					int len = data.Length;
-					fieldsStream.WriteVInt(len);
-					fieldsStream.WriteBytes(data, len);
+                    int length = field.GetBinaryLength();
+					fieldsStream.WriteVInt(length);
+					fieldsStream.WriteBytes(field.BinaryValue(), field.GetBinaryOffset(), length);
 				}
 				else
 				{
@@ -186,9 +331,9 @@
 			}
 		}
 		
-		private byte[] Compress(byte[] input)
+		private byte[] Compress(byte[] input, int offset, int length)
 		{
-			return SupportClass.CompressionSupport.Compress(input);
+			return SupportClass.CompressionSupport.Compress(input, offset, length);
         }
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FilterIndexReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FilterIndexReader.cs Wed Jul 29 18:04:12 2009
@@ -284,7 +284,7 @@
 		}
 		
 		
-		public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
+		public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
 		{
 			EnsureOpen();
 			return in_Renamed.GetFieldNames(fieldNames);

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/FreqProxFieldMergeState.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/FreqProxFieldMergeState.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+    /// <summary>
+    /// Used by DocumentsWriter to merge the postings from
+    /// multiple ThreadStates when creating a segment
+    /// </summary>
+    internal sealed class FreqProxFieldMergeState
+    {
+        internal readonly FreqProxTermsWriterPerField field;
+        internal readonly int numPostings;
+        internal readonly CharBlockPool charPool;
+        internal readonly RawPostingList[] postings;
+
+        private FreqProxTermsWriter.PostingList p;
+        internal char[] text;
+        internal int textOffset;
+
+        private int postingUpto = -1;
+
+        internal readonly ByteSliceReader freq = new ByteSliceReader();
+        internal readonly ByteSliceReader prox = new ByteSliceReader();
+
+        internal int docID;
+        internal int termFreq;
+
+        public FreqProxFieldMergeState(FreqProxTermsWriterPerField field)
+        {
+            this.field = field;
+            this.charPool = field.perThread.termsHashPerThread.charPool;
+            this.numPostings = field.termsHashPerField.numPostings;
+            this.postings = field.termsHashPerField.sortPostings();
+        }
+
+        internal bool nextTerm()
+        {
+            postingUpto++;
+            if (postingUpto == numPostings)
+                return false;
+
+            p = (FreqProxTermsWriter.PostingList)postings[postingUpto];
+            docID = 0;
+
+            text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+            textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+            field.termsHashPerField.initReader(freq, p, 0);
+            if (!field.fieldInfo.omitTf)
+                field.termsHashPerField.initReader(prox, p, 1);
+
+            // Should always be true
+            bool result = nextDoc();
+            System.Diagnostics.Debug.Assert(result);
+
+            return true;
+        }
+
+        public bool nextDoc()
+        {
+            if (freq.Eof())
+            {
+                if (p.lastDocCode != -1)
+                {
+                    // Return last doc
+                    docID = p.lastDocID;
+                    if (!field.omitTf)
+                        termFreq = p.docFreq;
+                    p.lastDocCode = -1;
+                    return true;
+                }
+                else
+                    // EOF
+                    return false;
+            }
+
+            int code = freq.ReadVInt();
+            if (field.omitTf)
+                docID += code;
+            else
+            {
+                docID += (int)((uint)code >> 1);
+                if ((code & 1) != 0)
+                    termFreq = 1;
+                else
+                    termFreq = freq.ReadVInt();
+            }
+
+            System.Diagnostics.Debug.Assert(docID != p.lastDocID);
+
+            return true;
+        }
+    }
+}



Mime
View raw message