lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dougs...@apache.org
Subject svn commit: r798995 [14/35] - in /incubator/lucene.net/trunk/C#/src: Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/ Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net...
Date Wed, 29 Jul 2009 18:04:24 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorEntryFreqSortedComparator.cs Wed Jul 29 18:04:12 2009
@@ -19,29 +19,29 @@
 
 namespace Lucene.Net.Index
 {
-	
-	/// <summary> Compares {@link Lucene.Net.Index.TermVectorEntry}s first by frequency and then by
-	/// the term (case-sensitive)
-	/// 
-	/// 
-	/// </summary>
-	public class TermVectorEntryFreqSortedComparator : System.Collections.Generic.IComparer<Object>
-	{
-		public virtual int Compare(System.Object object_Renamed, System.Object object1)
-		{
-			int result = 0;
-			TermVectorEntry entry = (TermVectorEntry) object_Renamed;
-			TermVectorEntry entry1 = (TermVectorEntry) object1;
-			result = entry1.GetFrequency() - entry.GetFrequency();
-			if (result == 0)
-			{
-				result = String.CompareOrdinal(entry.GetTerm(), entry1.GetTerm());
-				if (result == 0)
-				{
-					result = String.CompareOrdinal(entry.GetField(), entry1.GetField());
-				}
-			}
-			return result;
-		}
-	}
+
+    /// <summary> Compares {@link Lucene.Net.Index.TermVectorEntry}s first by frequency and then by
+    /// the term (case-sensitive)
+    /// 
+    /// 
+    /// </summary>
+    public class TermVectorEntryFreqSortedComparator : System.Collections.Generic.IComparer<object>
+    {
+        public virtual int Compare(object object_Renamed, object object1)
+        {
+            int result = 0;
+            TermVectorEntry entry = (TermVectorEntry)object_Renamed;
+            TermVectorEntry entry1 = (TermVectorEntry)object1;
+            result = entry1.GetFrequency() - entry.GetFrequency();
+            if (result == 0)
+            {
+                result = String.CompareOrdinal(entry.GetTerm(), entry1.GetTerm());
+                if (result == 0)
+                {
+                    result = String.CompareOrdinal(entry.GetField(), entry1.GetField());
+                }
+            }
+            return result;
+        }
+    }
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorOffsetInfo.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorOffsetInfo.cs Wed Jul 29 18:04:12 2009
@@ -70,11 +70,11 @@
 		}
 		
 		/// <summary> Two TermVectorOffsetInfos are equals if both the start and end offsets are the same</summary>
-		/// <param name="o">The comparison Object
+		/// <param name="o">The comparison object
 		/// </param>
 		/// <returns> true if both {@link #GetStartOffset()} and {@link #GetEndOffset()} are the same for both objects.
 		/// </returns>
-		public  override bool Equals(System.Object o)
+		public  override bool Equals(object o)
 		{
 			if (this == o)
 				return true;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsReader.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsReader.cs Wed Jul 29 18:04:12 2009
@@ -28,9 +28,19 @@
 	/// </version>
 	public class TermVectorsReader : System.ICloneable
 	{
-		
+		// NOTE: if you make a new format, it must be larger than the current format
 		internal const int FORMAT_VERSION = 2;
-		//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
+
+        // changes to speed up bulk merging of term vectors
+        internal const int FORMAT_VERSION2 = 3;
+
+        // change strings to UTF8 with length in bytes (not length in chars)
+        internal const int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
+
+        // NOTE: always change this if you switch to a new format!
+        internal const int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
+
+		// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
 		internal const int FORMAT_SIZE = 4;
 		
 		internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1);
@@ -42,19 +52,21 @@
 		private IndexInput tvd;
 		private IndexInput tvf;
 		private int size;
+        private int numTotalDocs;
 		
 		// The docID offset where our docs begin in the index
 		// file.  This will be 0 if we have our own private file.
 		private int docStoreOffset;
 		
-		private int tvdFormat;
-		private int tvfFormat;
+		private int format;
 		
-		public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
+		public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos)
+            : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE)
 		{
 		}
 		
-		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize) : this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
+		internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize)
+            : this(d, segment, fieldInfos, readBufferSize, - 1, 0)
 		{
 		}
 		
@@ -64,28 +76,47 @@
 			
 			try
 			{
-				if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
-				{
-					tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
-					CheckValidFormat(tvx);
-					tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
-					tvdFormat = CheckValidFormat(tvd);
-					tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
-					tvfFormat = CheckValidFormat(tvf);
-					if (- 1 == docStoreOffset)
-					{
-						this.docStoreOffset = 0;
-						this.size = (int) (tvx.Length() >> 3);
-					}
-					else
-					{
-						this.docStoreOffset = docStoreOffset;
-						this.size = size;
-						// Verify the file is long enough to hold all of our
-						// docs
-						System.Diagnostics.Debug.Assert(((int) (tvx.Length() / 8)) >= size + docStoreOffset);
-					}
-				}
+                if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
+                {
+                    tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize);
+                    format = CheckValidFormat(tvx);
+                    tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize);
+                    int tvdFormat = CheckValidFormat(tvd);
+                    tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize);
+                    int tvfFormat = CheckValidFormat(tvf);
+
+                    System.Diagnostics.Debug.Assert(format == tvdFormat);
+                    System.Diagnostics.Debug.Assert(format == tvfFormat);
+
+                    if (format >= FORMAT_VERSION2)
+                    {
+                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0);
+                        numTotalDocs = (int)(tvx.Length() >> 4);
+                    }
+                    else
+                    {
+                        System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0);
+                        numTotalDocs = (int)(tvx.Length() >> 3);
+                    }
+
+                    if (-1 == docStoreOffset)
+                    {
+                        this.docStoreOffset = 0;
+                        this.size = numTotalDocs;
+                        System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size);
+                    }
+                    else
+                    {
+                        this.docStoreOffset = docStoreOffset;
+                        this.size = size;
+                        // Verify the file is long enough to hold all of our
+                        // docs
+                        System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset);
+                    }
+
+                }
+                else
+                    format = 0;
 				
 				this.fieldInfos = fieldInfos;
 				success = true;
@@ -104,12 +135,92 @@
 			}
 		}
 		
-		private int CheckValidFormat(IndexInput in_Renamed)
+        // used for bulk copy when merging
+        internal IndexInput GetTvdStream()
+        {
+            return tvd;
+        }
+
+        // used for bulk copy when merging
+        internal IndexInput GetTvfStream()
+        {
+            return tvf;
+        }
+
+        private void SeekTvx(int docNum)
+        {
+            if (format < FORMAT_VERSION2)
+                tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE);
+            else
+                tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE);
+        }
+
+        internal bool CanReadRawDocs()
+        {
+            return format >= FORMAT_UTF8_LENGTH_IN_BYTES;
+        }
+
+        /** Retrieve the length (in bytes) of the tvd and tvf
+         *  entries for the next numDocs starting with
+         *  startDocID.  This is used for bulk copying when
+         *  merging segments, if the field numbers are
+         *  congruent.  Once this returns, the tvf & tvd streams
+         *  are seeked to the startDocID. */
+        internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs)
+        {
+            if (tvx == null)
+            {
+                SupportClass.CollectionsSupport.ArrayFill(tvdLengths, 0);
+                SupportClass.CollectionsSupport.ArrayFill(tvfLengths, 0);
+                return;
+            }
+
+            // SegmentMerger calls canReadRawDocs() first and should
+            // not call us if that returns false.
+            if (format < FORMAT_VERSION2)
+                throw new System.Exception("cannot read raw docs with older term vector formats");
+
+            SeekTvx(startDocID);
+
+            long tvdPosition = tvx.ReadLong();
+            tvd.Seek(tvdPosition);
+
+            long tvfPosition = tvx.ReadLong();
+            tvf.Seek(tvfPosition);
+
+            long lastTvdPosition = tvdPosition;
+            long lastTvfPosition = tvfPosition;
+
+            int count = 0;
+            while (count < numDocs)
+            {
+                int docID = docStoreOffset + startDocID + count + 1;
+                System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
+                if (docID < numTotalDocs)
+                {
+                    tvdPosition = tvx.ReadLong();
+                    tvfPosition = tvx.ReadLong();
+                }
+                else
+                {
+                    tvdPosition = tvd.Length();
+                    tvfPosition = tvf.Length();
+                    System.Diagnostics.Debug.Assert(count == numDocs - 1);
+                }
+                tvdLengths[count] = (int)(tvdPosition - lastTvdPosition);
+                tvfLengths[count] = (int)(tvfPosition - lastTvfPosition);
+                count++;
+                lastTvdPosition = tvdPosition;
+                lastTvfPosition = tvfPosition;
+            }
+        }
+
+        private int CheckValidFormat(IndexInput in_Renamed)
 		{
 			int format = in_Renamed.ReadInt();
-			if (format > FORMAT_VERSION)
+			if (format > FORMAT_CURRENT)
 			{
-				throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_VERSION + " or less");
+				throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less");
 			}
 			return format;
 		}
@@ -172,11 +283,11 @@
 				//We don't need to do this in other seeks because we already have the
 				// file pointer
 				//that was written in another file
-				tvx.Seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
-				//System.out.println("TVX Pointer: " + tvx.getFilePointer());
-				long position = tvx.ReadLong();
+                SeekTvx(docNum);
+				//System.out.println("TVX Pointer: " + tvx.GetFilePointer());
+				long tvdPosition = tvx.ReadLong();
 				
-				tvd.Seek(position);
+				tvd.Seek(tvdPosition);
 				int fieldCount = tvd.ReadVInt();
 				//System.out.println("Num Fields: " + fieldCount);
 				// There are only a few fields per document. We opt for a full scan
@@ -186,7 +297,7 @@
 				int found = - 1;
 				for (int i = 0; i < fieldCount; i++)
 				{
-					if (tvdFormat == FORMAT_VERSION)
+					if (format >= FORMAT_VERSION)
 						number = tvd.ReadVInt();
 					else
 						number += tvd.ReadVInt();
@@ -200,8 +311,12 @@
 				if (found != - 1)
 				{
 					// Compute position in the tvf file
-					position = 0;
-					for (int i = 0; i <= found; i++)
+					long position;
+                    if (format >= FORMAT_VERSION2)
+                        position = tvx.ReadLong();
+                    else
+                        position = tvd.ReadVLong();
+					for (int i = 1; i <= found; i++)
 						position += tvd.ReadVLong();
 					
 					mapper.SetDocumentNumber(docNum);
@@ -217,128 +332,127 @@
 				//System.out.println("No tvx file");
 			}
 		}
-		
-		
-		
-		/// <summary> Retrieve the term vector for the given document and field</summary>
-		/// <param name="docNum">The document number to retrieve the vector for
-		/// </param>
-		/// <param name="field">The field within the document to retrieve
-		/// </param>
-		/// <returns> The TermFreqVector for the document and field or null if there is no termVector for this field.
-		/// </returns>
-		/// <throws>  IOException if there is an error reading the term vector files </throws>
-		public /*internal*/ virtual TermFreqVector Get(int docNum, System.String field)
-		{
-			// Check if no term vectors are available for this segment at all
-			ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
-			Get(docNum, field, mapper);
-			
-			return mapper.MaterializeVector();
-		}
-		
-		/// <summary> Return all term vectors stored for this document or null if the could not be read in.
-		/// 
-		/// </summary>
-		/// <param name="docNum">The document number to retrieve the vector for
-		/// </param>
-		/// <returns> All term frequency vectors
-		/// </returns>
-		/// <throws>  IOException if there is an error reading the term vector files  </throws>
-		public /*internal*/ virtual TermFreqVector[] Get(int docNum)
-		{
-			TermFreqVector[] result = null;
-			if (tvx != null)
-			{
-				//We need to offset by
-				tvx.Seek(((docNum + docStoreOffset) * 8L) + FORMAT_SIZE);
-				long position = tvx.ReadLong();
-				
-				tvd.Seek(position);
-				int fieldCount = tvd.ReadVInt();
-				
-				// No fields are vectorized for this document
-				if (fieldCount != 0)
-				{
-					int number = 0;
-					System.String[] fields = new System.String[fieldCount];
-					
-					for (int i = 0; i < fieldCount; i++)
-					{
-						if (tvdFormat == FORMAT_VERSION)
-							number = tvd.ReadVInt();
-						else
-							number += tvd.ReadVInt();
-						
-						fields[i] = fieldInfos.FieldName(number);
-					}
-					
-					// Compute position in the tvf file
-					position = 0;
-					long[] tvfPointers = new long[fieldCount];
-					for (int i = 0; i < fieldCount; i++)
-					{
-						position += tvd.ReadVLong();
-						tvfPointers[i] = position;
-					}
-					
-					result = ReadTermVectors(docNum, fields, tvfPointers);
-				}
-			}
-			else
-			{
-				//System.out.println("No tvx file");
-			}
-			return result;
-		}
-		
-		public virtual void  Get(int docNumber, TermVectorMapper mapper)
-		{
-			// Check if no term vectors are available for this segment at all
-			if (tvx != null)
-			{
-				//We need to offset by
-				tvx.Seek((docNumber * 8L) + FORMAT_SIZE);
-				long position = tvx.ReadLong();
-				
-				tvd.Seek(position);
-				int fieldCount = tvd.ReadVInt();
-				
-				// No fields are vectorized for this document
-				if (fieldCount != 0)
-				{
-					int number = 0;
-					System.String[] fields = new System.String[fieldCount];
-					
-					for (int i = 0; i < fieldCount; i++)
-					{
-						if (tvdFormat == FORMAT_VERSION)
-							number = tvd.ReadVInt();
-						else
-							number += tvd.ReadVInt();
-						
-						fields[i] = fieldInfos.FieldName(number);
-					}
-					
-					// Compute position in the tvf file
-					position = 0;
-					long[] tvfPointers = new long[fieldCount];
-					for (int i = 0; i < fieldCount; i++)
-					{
-						position += tvd.ReadVLong();
-						tvfPointers[i] = position;
-					}
-					
-					mapper.SetDocumentNumber(docNumber);
-					ReadTermVectors(fields, tvfPointers, mapper);
-				}
-			}
-			else
-			{
-				//System.out.println("No tvx file");
-			}
-		}
-		
+
+        /**
+         * Retrieve the term vector for the given document and field
+         * @param docNum The document number to retrieve the vector for
+         * @param field The field within the document to retrieve
+         * @return The TermFreqVector for the document and field or null if there is no termVector for this field.
+         * @throws IOException if there is an error reading the term vector files
+         */
+        public TermFreqVector Get(int docNum, String field)
+        {
+            // Check if no term vectors are available for this segment at all
+            ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+            Get(docNum, field, mapper);
+
+            return mapper.MaterializeVector();
+        }
+
+        // Reads the String[] fields; you have to pre-seek tvd to
+        // the right point
+        private String[] ReadFields(int fieldCount)
+        {
+            int number = 0;
+            String[] fields = new String[fieldCount];
+
+            for (int i = 0; i < fieldCount; i++)
+            {
+                if (format >= FORMAT_VERSION)
+                    number = tvd.ReadVInt();
+                else
+                    number += tvd.ReadVInt();
+
+                fields[i] = fieldInfos.FieldName(number);
+            }
+
+            return fields;
+        }
+
+        // Reads the long[] offsets into TVF; you have to pre-seek
+        // tvx/tvd to the right point
+        private long[] ReadTvfPointers(int fieldCount)
+        {
+            // Compute position in the tvf file
+            long position;
+            if (format >= FORMAT_VERSION2)
+                position = tvx.ReadLong();
+            else
+                position = tvd.ReadVLong();
+
+            long[] tvfPointers = new long[fieldCount];
+            tvfPointers[0] = position;
+
+            for (int i = 1; i < fieldCount; i++)
+            {
+                position += tvd.ReadVLong();
+                tvfPointers[i] = position;
+            }
+
+            return tvfPointers;
+        }
+
+        /**
+         * Return all term vectors stored for this document or null if the could not be read in.
+         * 
+         * @param docNum The document number to retrieve the vector for
+         * @return All term frequency vectors
+         * @throws IOException if there is an error reading the term vector files 
+         */
+        public TermFreqVector[] Get(int docNum)
+        {
+            TermFreqVector[] result = null;
+            if (tvx != null)
+            {
+                //We need to offset by
+                SeekTvx(docNum);
+                long tvdPosition = tvx.ReadLong();
+
+                tvd.Seek(tvdPosition);
+                int fieldCount = tvd.ReadVInt();
+
+                // No fields are vectorized for this document
+                if (fieldCount != 0)
+                {
+                    string[] fields = ReadFields(fieldCount);
+                    long[] tvfPointers = ReadTvfPointers(fieldCount);
+                    result = ReadTermVectors(docNum, fields, tvfPointers);
+                }
+            }
+            else
+            {
+                //System.out.println("No tvx file");
+            }
+            return result;
+        }
+
+        public void Get(int docNumber, TermVectorMapper mapper)
+        {
+            // Check if no term vectors are available for this segment at all
+            if (tvx != null)
+            {
+                //We need to offset by
+
+                SeekTvx(docNumber);
+                long tvdPosition = tvx.ReadLong();
+
+                tvd.Seek(tvdPosition);
+                int fieldCount = tvd.ReadVInt();
+
+                // No fields are vectorized for this document
+                if (fieldCount != 0)
+                {
+                    string[] fields = ReadFields(fieldCount);
+                    long[] tvfPointers = ReadTvfPointers(fieldCount);
+                    mapper.SetDocumentNumber(docNumber);
+                    ReadTermVectors(fields, tvfPointers, mapper);
+                }
+            }
+            else
+            {
+                //System.out.println("No tvx file");
+            }
+        }
 		
 		private SegmentTermVector[] ReadTermVectors(int docNum, System.String[] fields, long[] tvfPointers)
 		{
@@ -361,7 +475,6 @@
 			}
 		}
 		
-		
 		/// <summary> </summary>
 		/// <param name="field">The field to read in
 		/// </param>
@@ -369,9 +482,6 @@
 		/// </param>
 		/// <param name="mapper">The mapper used to map the TermVector
 		/// </param>
-		/// <returns> The TermVector located at that position
-		/// </returns>
-		/// <throws>  IOException </throws>
 		private void  ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper)
 		{
 			
@@ -388,7 +498,7 @@
 			bool storePositions;
 			bool storeOffsets;
 			
-			if (tvfFormat == FORMAT_VERSION)
+			if (format >= FORMAT_VERSION)
 			{
 				byte bits = tvf.ReadByte();
 				storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
@@ -404,28 +514,55 @@
 			int start = 0;
 			int deltaLength = 0;
 			int totalLength = 0;
-			char[] buffer = new char[10]; // init the buffer with a length of 10 character
-			char[] previousBuffer = new char[]{};
-			
+            byte[] byteBuffer;
+            char[] charBuffer;
+            bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES;
+
+            // init the buffers
+            if (preUTF8)
+            {
+                charBuffer = new char[10];
+                byteBuffer = null;
+            }
+            else
+            {
+                charBuffer = null;
+                byteBuffer = new byte[20];
+            }
+
 			for (int i = 0; i < numTerms; i++)
 			{
 				start = tvf.ReadVInt();
 				deltaLength = tvf.ReadVInt();
 				totalLength = start + deltaLength;
-				if (buffer.Length < totalLength)
-				{
-					// increase buffer
-					buffer = null; // give a hint to garbage collector
-					buffer = new char[totalLength];
-					
-					if (start > 0)
-					// just copy if necessary
-						Array.Copy(previousBuffer, 0, buffer, 0, start);
-				}
+
+                string term;
+
+                if (preUTF8)
+                {
+                    // term stored as java chars
+                    if (charBuffer.Length < totalLength)
+                    {
+                        char[] newCharBuffer = new char[(int)(1.5 * totalLength)];
+                        Array.Copy(charBuffer, 0, newCharBuffer, 0, start);
+                        charBuffer = newCharBuffer;
+                    }
+                    tvf.ReadChars(charBuffer, start, deltaLength);
+                    term = new String(charBuffer, 0, totalLength);
+                }
+                else
+                {
+                    // term stored as utf8 bytes
+                    if (byteBuffer.Length < totalLength)
+                    {
+                        byte[] newByteBuffer = new byte[(int)(1.5 * totalLength)];
+                        Array.Copy(byteBuffer, 0, newByteBuffer, 0, start);
+                        byteBuffer = newByteBuffer;
+                    }
+                    tvf.ReadBytes(byteBuffer, start, deltaLength);
+                    term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength);
+                }
 				
-				tvf.ReadChars(buffer, start, deltaLength);
-				System.String term = new System.String(buffer, 0, totalLength);
-				previousBuffer = buffer;
 				int freq = tvf.ReadVInt();
 				int[] positions = null;
 				if (storePositions)
@@ -480,34 +617,26 @@
 				mapper.Map(term, freq, offsets, positions);
 			}
 		}
-		
-		
-		
-		public virtual System.Object Clone()
+
+        public virtual object Clone()
 		{
-			
-			if (tvx == null || tvd == null || tvf == null)
-				return null;
-			
-			TermVectorsReader clone = null;
-			try
-			{
-				clone = (TermVectorsReader) base.MemberwiseClone();
-			}
-			catch (System.Exception e)
-			{
-			}
-			
-			clone.tvx = (IndexInput) tvx.Clone();
-			clone.tvd = (IndexInput) tvd.Clone();
-			clone.tvf = (IndexInput) tvf.Clone();
+            TermVectorsReader clone = (TermVectorsReader)base.MemberwiseClone();
+
+            // these are null when a TermVectorsReader was created
+            // on a segment that did not have term vectors saved
+            if (tvx != null && tvd != null && tvf != null)
+            {
+                clone.tvx = (IndexInput)tvx.Clone();
+                clone.tvd = (IndexInput)tvd.Clone();
+                clone.tvf = (IndexInput)tvf.Clone();
+            }
 			
 			return clone;
 		}
 	}
 	
 	/// <summary> Models the existing parallel array structure</summary>
-	class ParallelArrayTermVectorMapper:TermVectorMapper
+	internal class ParallelArrayTermVectorMapper:TermVectorMapper
 	{
 		
 		private System.String[] terms;

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsTermsWriter.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriter.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriter.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,363 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+    internal sealed class TermVectorsTermsWriter : TermsHashConsumer
+    {
+
+        internal readonly DocumentsWriter docWriter;
+        //internal TermVectorsWriter termVectorsWriter;
+        internal PerDoc[] docFreeList = new PerDoc[1];
+        internal int freeCount;
+        internal IndexOutput tvx;
+        internal IndexOutput tvd;
+        internal IndexOutput tvf;
+        internal int lastDocID;
+
+        public TermVectorsTermsWriter(DocumentsWriter docWriter)
+        {
+            this.docWriter = docWriter;
+        }
+
+        internal override TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread)
+        {
+            return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
+        }
+
+        internal override void createPostings(RawPostingList[] postings, int start, int count)
+        {
+            int end = start + count;
+            for (int i = start; i < end; i++)
+                postings[i] = new PostingList();
+        }
+
+        internal override void flush(IDictionary<object, object> threadsAndFields, DocumentsWriter.FlushState state)
+        {
+            lock (this)
+            {
+
+                if (tvx != null)
+                {
+
+                    if (state.numDocsInStore > 0)
+                        // In case there are some documents that we
+                        // didn't see (because they hit a non-aborting exception):
+                        fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
+
+                    tvx.Flush();
+                    tvd.Flush();
+                    tvf.Flush();
+                }
+
+                IEnumerator<KeyValuePair<object, object>> it = threadsAndFields.GetEnumerator();
+                while (it.MoveNext())
+                {
+                    KeyValuePair<object, object> entry = (KeyValuePair<object, object>)it.Current;
+                    IEnumerator<object> it2 = ((ICollection<object>)entry.Value).GetEnumerator();
+                    while (it2.MoveNext())
+                    {
+                        TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)it2.Current;
+                        perField.termsHashPerField.reset();
+                        perField.shrinkHash();
+                    }
+
+                    TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread)entry.Key;
+                    perThread.termsHashPerThread.reset(true);
+                }
+
+            }
+        }
+
+        internal override void closeDocStore(DocumentsWriter.FlushState state)
+        {
+            lock (this)
+            {
+                if (tvx != null)
+                {
+                    // At least one doc in this run had term vectors
+                    // enabled
+                    fill(state.numDocsInStore - docWriter.GetDocStoreOffset());
+                    tvx.Close();
+                    tvf.Close();
+                    tvd.Close();
+                    tvx = null;
+                    System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
+                    if (4 + state.numDocsInStore * 16 != state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION))
+                        throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) + " length in bytes of " + state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+
+                    string tvxFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
+                    string tvfFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION;
+                    string tvdFile = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION;
+      
+                    state.flushedFiles[tvxFile] = tvxFile;
+                    state.flushedFiles[tvfFile] = tvfFile;
+                    state.flushedFiles[tvdFile] = tvdFile;
+
+                    docWriter.RemoveOpenFile(tvxFile);
+                    docWriter.RemoveOpenFile(tvfFile);
+                    docWriter.RemoveOpenFile(tvdFile);
+
+                    lastDocID = 0;
+                }
+            }
+        }
+
+        internal int allocCount;
+
+        internal PerDoc getPerDoc()
+        {
+            lock (this)
+            {
+                if (freeCount == 0)
+                {
+                    allocCount++;
+                    if (allocCount > docFreeList.Length)
+                    {
+                        // Grow our free list up front to make sure we have
+                        // enough space to recycle all outstanding PerDoc
+                        // instances
+                        System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+                        docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+                    }
+                    return new PerDoc(this);
+                }
+                else
+                    return docFreeList[--freeCount];
+            }
+        }
+
+        /** Fills in no-term-vectors for all docs we haven't seen
+         *  since the last doc that had term vectors. */
+        internal void fill(int docID)
+        {
+            int docStoreOffset = docWriter.GetDocStoreOffset();
+            int end = docID + docStoreOffset;
+            if (lastDocID < end)
+            {
+                long tvfPosition = tvf.GetFilePointer();
+                while (lastDocID < end)
+                {
+                    tvx.WriteLong(tvd.GetFilePointer());
+                    tvd.WriteVInt(0);
+                    tvx.WriteLong(tvfPosition);
+                    lastDocID++;
+                }
+            }
+        }
+
+        internal void initTermVectorsWriter()
+        {
+            lock (this)
+            {
+                if (tvx == null)
+                {
+
+                    string docStoreSegment = docWriter.GetDocStoreSegment();
+
+                    if (docStoreSegment == null)
+                        return;
+
+                    System.Diagnostics.Debug.Assert(docStoreSegment != null);
+
+                    // If we hit an exception while init'ing the term
+                    // vector output files, we must abort this segment
+                    // because those files will be in an unknown
+                    // state:
+                    tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+                    tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+                    tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+
+                    tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+                    tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+                    tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
+
+                    docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+                    docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
+                    docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
+
+                    lastDocID = 0;
+                }
+            }
+        }
+
+        internal void finishDocument(PerDoc perDoc)
+        {
+            lock (this)
+            {
+
+                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));
+
+                initTermVectorsWriter();
+
+                fill(perDoc.docID);
+
+                // Append term vectors to the real outputs:
+                tvx.WriteLong(tvd.GetFilePointer());
+                tvx.WriteLong(tvf.GetFilePointer());
+                tvd.WriteVInt(perDoc.numVectorFields);
+                if (perDoc.numVectorFields > 0)
+                {
+                    for (int i = 0; i < perDoc.numVectorFields; i++)
+                        tvd.WriteVInt(perDoc.fieldNumbers[i]);
+                    System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers[0]);
+                    long lastPos = perDoc.fieldPointers[0];
+                    for (int i = 1; i < perDoc.numVectorFields; i++)
+                    {
+                        long pos = perDoc.fieldPointers[i];
+                        tvd.WriteVLong(pos - lastPos);
+                        lastPos = pos;
+                    }
+                    perDoc.tvf.WriteTo(tvf);
+                    perDoc.tvf.Reset();
+                    perDoc.numVectorFields = 0;
+                }
+
+                System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.GetDocStoreOffset());
+
+                lastDocID++;
+
+                free(perDoc);
+                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
+            }
+        }
+
+        public bool freeRAM()
+        {
+            // We don't hold any state beyond one doc, so we don't
+            // free persistent RAM here
+            return false;
+        }
+
+        internal override void Abort()
+        {
+            if (tvx != null)
+            {
+                try
+                {
+                    tvx.Close();
+                }
+                catch (System.Exception)
+                {
+                }
+                tvx = null;
+            }
+            if (tvd != null)
+            {
+                try
+                {
+                    tvd.Close();
+                }
+                catch (System.Exception)
+                {
+                }
+                tvd = null;
+            }
+            if (tvf != null)
+            {
+                try
+                {
+                    tvf.Close();
+                }
+                catch (System.Exception)
+                {
+                }
+                tvf = null;
+            }
+            lastDocID = 0;
+        }
+
+        internal void free(PerDoc doc)
+        {
+            lock (this)
+            {
+                System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+                docFreeList[freeCount++] = doc;
+            }
+        }
+
+        internal class PerDoc : DocumentsWriter.DocWriter
+        {
+            // TODO: use something more memory efficient; for small
+            // docs the 1024 buffer size of RAMOutputStream wastes alot
+            internal RAMOutputStream tvf = new RAMOutputStream();
+            internal int numVectorFields;
+
+            internal int[] fieldNumbers = new int[1];
+            internal long[] fieldPointers = new long[1];
+
+            private TermVectorsTermsWriter enclosing_instance;
+
+            internal PerDoc(TermVectorsTermsWriter enclosing_instance)
+            {
+                this.enclosing_instance = enclosing_instance;
+            }
+
+            internal void reset()
+            {
+                tvf.Reset();
+                numVectorFields = 0;
+            }
+
+            internal override void Abort()
+            {
+                reset();
+                enclosing_instance.free(this);
+            }
+
+            internal void addField(int fieldNumber)
+            {
+                if (numVectorFields == fieldNumbers.Length)
+                {
+                    fieldNumbers = ArrayUtil.Grow(fieldNumbers);
+                    fieldPointers = ArrayUtil.Grow(fieldPointers);
+                }
+                fieldNumbers[numVectorFields] = fieldNumber;
+                fieldPointers[numVectorFields] = tvf.GetFilePointer();
+                numVectorFields++;
+            }
+
+            internal override long SizeInBytes()
+            {
+                return tvf.SizeInBytes();
+            }
+
+            internal override void Finish()
+            {
+                enclosing_instance.finishDocument(this);
+            }
+        }
+
+        internal class PostingList : RawPostingList
+        {
+            internal int freq;                                       // How many times this term occurred in the current doc
+            internal int lastOffset;                                 // Last offset we saw
+            internal int lastPosition;                               // Last position where this term occurred
+        }
+
+        internal override int bytesPerPosting()
+        {
+            return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE;
+        }
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerField.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsTermsWriterPerField.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerField.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerField.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Fieldable = Lucene.Net.Documents.Fieldable;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using Token = Lucene.Net.Analysis.Token;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+    internal sealed class TermVectorsTermsWriterPerField : TermsHashConsumerPerField
+    {
+
+        internal readonly TermVectorsTermsWriterPerThread perThread;
+        internal readonly TermsHashPerField termsHashPerField;
+        internal readonly TermVectorsTermsWriter termsWriter;
+        internal readonly FieldInfo fieldInfo;
+        internal readonly DocumentsWriter.DocState docState;
+        internal readonly DocInverter.FieldInvertState fieldState;
+
+        internal bool doVectors;
+        internal bool doVectorPositions;
+        internal bool doVectorOffsets;
+
+        internal int maxNumPostings;
+
+        public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo)
+        {
+            this.termsHashPerField = termsHashPerField;
+            this.perThread = perThread;
+            this.termsWriter = perThread.termsWriter;
+            this.fieldInfo = fieldInfo;
+            docState = termsHashPerField.docState;
+            fieldState = termsHashPerField.fieldState;
+        }
+
+        internal override int getStreamCount()
+        {
+            return 2;
+        }
+
+        internal override bool start(Fieldable[] fields, int count)
+        {
+            doVectors = false;
+            doVectorPositions = false;
+            doVectorOffsets = false;
+
+            for (int i = 0; i < count; i++)
+            {
+                Fieldable field = fields[i];
+                if (field.IsIndexed() && field.IsTermVectorStored())
+                {
+                    doVectors = true;
+                    doVectorPositions |= field.IsStorePositionWithTermVector();
+                    doVectorOffsets |= field.IsStoreOffsetWithTermVector();
+                }
+            }
+
+            if (doVectors)
+            {
+                if (perThread.doc == null)
+                {
+                    perThread.doc = termsWriter.getPerDoc();
+                    perThread.doc.docID = docState.docID;
+                    System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0);
+                    System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.Length());
+                    System.Diagnostics.Debug.Assert(0 == perThread.doc.tvf.GetFilePointer());
+                }
+                else
+                {
+                    System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID);
+
+                    if (termsHashPerField.numPostings != 0)
+                        // Only necessary if previous doc hit a
+                        // non-aborting exception while writing vectors in
+                        // this field:
+                        termsHashPerField.reset();
+                }
+            }
+
+            // TODO: only if needed for performance
+            //perThread.postingsCount = 0;
+
+            return doVectors;
+        }
+
+        public void abort() { }
+
+        /** Called once per field per document if term vectors
+         *  are enabled, to write the vectors to
+         *  RAMOutputStream, which is then quickly flushed to
+         *  * the real term vectors files in the Directory. */
+        internal override void finish()
+        {
+
+            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start"));
+
+            int numPostings = termsHashPerField.numPostings;
+
+            System.Diagnostics.Debug.Assert(numPostings >= 0);
+
+            if (!doVectors || numPostings == 0)
+                return;
+
+            if (numPostings > maxNumPostings)
+                maxNumPostings = numPostings;
+
+            IndexOutput tvf = perThread.doc.tvf;
+
+            // This is called once, after inverting all occurences
+            // of a given field in the doc.  At this point we flush
+            // our hash into the DocWriter.
+
+            System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector);
+            System.Diagnostics.Debug.Assert(perThread.vectorFieldsInOrder(fieldInfo));
+
+            perThread.doc.addField(termsHashPerField.fieldInfo.number);
+
+            RawPostingList[] postings = termsHashPerField.sortPostings();
+
+            tvf.WriteVInt(numPostings);
+            byte bits = 0x0;
+            if (doVectorPositions)
+                bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
+            if (doVectorOffsets)
+                bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
+            tvf.WriteByte(bits);
+
+            int encoderUpto = 0;
+            int lastTermBytesCount = 0;
+
+            ByteSliceReader reader = perThread.vectorSliceReader;
+            char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers;
+            for (int j = 0; j < numPostings; j++)
+            {
+                TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList)postings[j];
+                int freq = posting.freq;
+
+                char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
+                int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
+
+                // We swap between two encoders to save copying
+                // last Term's byte array
+                UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto];
+
+                // TODO: we could do this incrementally
+                UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result);
+                int termBytesCount = utf8Result.length;
+
+                // TODO: UTF16toUTF8 could tell us this prefix
+                // Compute common prefix between last term and
+                // this term
+                int prefix = 0;
+                if (j > 0)
+                {
+                    byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result;
+                    byte[] termBytes = perThread.utf8Results[encoderUpto].result;
+                    while (prefix < lastTermBytesCount && prefix < termBytesCount)
+                    {
+                        if (lastTermBytes[prefix] != termBytes[prefix])
+                            break;
+                        prefix++;
+                    }
+                }
+                encoderUpto = 1 - encoderUpto;
+                lastTermBytesCount = termBytesCount;
+
+                int suffix = termBytesCount - prefix;
+                tvf.WriteVInt(prefix);
+                tvf.WriteVInt(suffix);
+                tvf.WriteBytes(utf8Result.result, prefix, suffix);
+                tvf.WriteVInt(freq);
+
+                if (doVectorPositions)
+                {
+                    termsHashPerField.initReader(reader, posting, 0);
+                    reader.WriteTo(tvf);
+                }
+
+                if (doVectorOffsets)
+                {
+                    termsHashPerField.initReader(reader, posting, 1);
+                    reader.WriteTo(tvf);
+                }
+            }
+
+            termsHashPerField.reset();
+            perThread.termsHashPerThread.reset(false);
+        }
+
+        internal void shrinkHash()
+        {
+            termsHashPerField.shrinkHash(maxNumPostings);
+            maxNumPostings = 0;
+        }
+
+        internal override void newTerm(Token t, RawPostingList p0)
+        {
+
+            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.newTerm start"));
+
+            TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList)p0;
+
+            p.freq = 1;
+
+            if (doVectorOffsets)
+            {
+                int startOffset = fieldState.offset + t.StartOffset();
+                int endOffset = fieldState.offset + t.EndOffset();
+                termsHashPerField.writeVInt(1, startOffset);
+                termsHashPerField.writeVInt(1, endOffset - startOffset);
+                p.lastOffset = endOffset;
+            }
+
+            if (doVectorPositions)
+            {
+                termsHashPerField.writeVInt(0, fieldState.position);
+                p.lastPosition = fieldState.position;
+            }
+        }
+
+        internal override void addTerm(Token t, RawPostingList p0)
+        {
+
+            System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start"));
+
+            TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList)p0;
+            p.freq++;
+
+            if (doVectorOffsets)
+            {
+                int startOffset = fieldState.offset + t.StartOffset();
+                int endOffset = fieldState.offset + t.EndOffset();
+                termsHashPerField.writeVInt(1, startOffset - p.lastOffset);
+                termsHashPerField.writeVInt(1, endOffset - startOffset);
+                p.lastOffset = endOffset;
+            }
+
+            if (doVectorPositions)
+            {
+                termsHashPerField.writeVInt(0, fieldState.position - p.lastPosition);
+                p.lastPosition = fieldState.position;
+            }
+        }
+
+        internal override void skippingLongTerm(Token t) { }
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerThread.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsTermsWriterPerThread.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerThread.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsTermsWriterPerThread.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Index
+{
+    internal sealed class TermVectorsTermsWriterPerThread : TermsHashConsumerPerThread
+    {
+
+        internal readonly TermVectorsTermsWriter termsWriter;
+        internal readonly TermsHashPerThread termsHashPerThread;
+        internal readonly DocumentsWriter.DocState docState;
+
+        internal TermVectorsTermsWriter.PerDoc doc;
+
+        public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter)
+        {
+            this.termsWriter = termsWriter;
+            this.termsHashPerThread = termsHashPerThread;
+            docState = termsHashPerThread.docState;
+        }
+
+        // Used by perField when serializing the term vectors
+        internal readonly ByteSliceReader vectorSliceReader = new ByteSliceReader();
+
+        internal readonly UnicodeUtil.UTF8Result[] utf8Results = { new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result() };
+
+        internal override void startDocument()
+        {
+            System.Diagnostics.Debug.Assert(clearLastVectorFieldName());
+            if (doc != null)
+            {
+                doc.reset();
+                doc.docID = docState.docID;
+            }
+        }
+
+        internal override DocumentsWriter.DocWriter finishDocument()
+        {
+            try
+            {
+                return doc;
+            }
+            finally
+            {
+                doc = null;
+            }
+        }
+
+        public override TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
+        {
+            return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+        }
+
+        public override void abort()
+        {
+            if (doc != null)
+            {
+                doc.Abort();
+                doc = null;
+            }
+        }
+
+        // Called only by assert
+        internal bool clearLastVectorFieldName()
+        {
+            lastVectorFieldName = null;
+            return true;
+        }
+
+        // Called only by assert
+        internal string lastVectorFieldName;
+        internal bool vectorFieldsInOrder(FieldInfo fi)
+        {
+            try
+            {
+                if (lastVectorFieldName != null)
+                    return string.CompareOrdinal(lastVectorFieldName, fi.name) < 0;
+                else
+                    return true;
+            }
+            finally
+            {
+                lastVectorFieldName = fi.name;
+            }
+        }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermVectorsWriter.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermVectorsWriter.cs Wed Jul 29 18:04:12 2009
@@ -20,6 +20,7 @@
 using Directory = Lucene.Net.Store.Directory;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
 using StringHelper = Lucene.Net.Util.StringHelper;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
 
 namespace Lucene.Net.Index
 {
@@ -29,16 +30,17 @@
 		
 		private IndexOutput tvx = null, tvd = null, tvf = null;
 		private FieldInfos fieldInfos;
-		
-		public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos)
+        internal UnicodeUtil.UTF8Result[] utf8Results = new UnicodeUtil.UTF8Result[] { new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result() };
+
+		public TermVectorsWriter(Directory directory, string segment, FieldInfos fieldInfos)
 		{
 			// Open files for TermVector storage
 			tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
-			tvx.WriteInt(TermVectorsReader.FORMAT_VERSION);
+			tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
 			tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
-			tvd.WriteInt(TermVectorsReader.FORMAT_VERSION);
+			tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
 			tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
-			tvf.WriteInt(TermVectorsReader.FORMAT_VERSION);
+			tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
 			
 			this.fieldInfos = fieldInfos;
 		}
@@ -52,8 +54,9 @@
 		/// <throws>  IOException </throws>
 		public void  AddAllDocVectors(TermFreqVector[] vectors)
 		{
-			
-			tvx.WriteLong(tvd.GetFilePointer());
+
+            tvx.WriteLong(tvd.GetFilePointer());
+            tvx.WriteLong(tvf.GetFilePointer());
 			
 			if (vectors != null)
 			{
@@ -98,19 +101,24 @@
 					
 					tvf.WriteVInt(bits);
 					
-					System.String[] terms = vectors[i].GetTerms();
+					string[] terms = vectors[i].GetTerms();
 					int[] freqs = vectors[i].GetTermFrequencies();
 					
-					System.String lastTermText = "";
+					int utf8Upto = 0;
+                    utf8Results[1].length = 0;
+
 					for (int j = 0; j < numTerms; j++)
 					{
-						System.String termText = terms[j];
-						int start = StringHelper.StringDifference(lastTermText, termText);
-						int length = termText.Length - start;
+                        UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]);
+
+                        int start = StringHelper.bytesDifference(
+                            utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length);
+
+                        int length = utf8Results[utf8Upto].length - start;
 						tvf.WriteVInt(start); // write shared prefix length
 						tvf.WriteVInt(length); // write delta length
-						tvf.WriteChars(termText, start, length); // write delta chars
-						lastTermText = termText;
+						tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes
+                        utf8Upto = 1 - utf8Upto;
 						
 						int termFreq = freqs[j];
 						
@@ -155,8 +163,8 @@
 				}
 				
 				// 2nd pass: write field pointers to tvd
-				long lastFieldPointer = 0;
-				for (int i = 0; i < numFields; i++)
+				long lastFieldPointer = fieldPointers[0];
+				for (int i = 1; i < numFields; i++)
 				{
 					long fieldPointer = fieldPointers[i];
 					tvd.WriteVLong(fieldPointer - lastFieldPointer);
@@ -166,7 +174,31 @@
 			else
 				tvd.WriteVInt(0);
 		}
-		
+
+        /**
+         * Do a bulk copy of numDocs documents from reader to our
+         * streams.  This is used to expedite merging, if the
+         * field numbers are congruent.
+         */
+        internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
+        {
+            long tvdPosition = tvd.GetFilePointer();
+            long tvfPosition = tvf.GetFilePointer();
+            long tvdStart = tvdPosition;
+            long tvfStart = tvfPosition;
+            for (int i = 0; i < numDocs; i++)
+            {
+                tvx.WriteLong(tvdPosition);
+                tvdPosition += tvdLengths[i];
+                tvx.WriteLong(tvfPosition);
+                tvfPosition += tvfLengths[i];
+            }
+            tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart);
+            tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart);
+            System.Diagnostics.Debug.Assert(tvd.GetFilePointer() == tvdPosition);
+            System.Diagnostics.Debug.Assert(tvf.GetFilePointer() == tvfPosition);
+        }
+  
 		/// <summary>Close all streams. </summary>
 		internal void  Close()
 		{

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHash.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermsHash.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHash.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHash.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,260 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+    /** This class implements {@link InvertedDocConsumer}, which
+     *  is passed each token produced by the analyzer on each
+     *  field.  It stores these tokens in a hash table, and
+     *  allocates separate byte streams per token.  Consumers of
+     *  this class, eg {@link FreqProxTermsWriter} and {@link
+     *  TermVectorsTermsWriter}, write their own byte streams
+     *  under each term.
+     */
+    sealed internal class TermsHash : InvertedDocConsumer
+    {
+
+        internal readonly TermsHashConsumer consumer;
+        internal readonly TermsHash nextTermsHash;
+        internal readonly int bytesPerPosting;
+        internal readonly int postingsFreeChunk;
+        internal readonly DocumentsWriter docWriter;
+
+        //private TermsHash primaryTermsHash;
+
+        private RawPostingList[] postingsFreeList = new RawPostingList[1];
+        private int postingsFreeCount;
+        private int postingsAllocCount;
+        internal bool trackAllocations;
+
+        public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash)
+        {
+            this.docWriter = docWriter;
+            this.consumer = consumer;
+            this.nextTermsHash = nextTermsHash;
+            this.trackAllocations = trackAllocations;
+
+            // Why + 4*POINTER_NUM_BYTE below?
+            //   +1: Posting is referenced by postingsFreeList array
+            //   +3: Posting is referenced by hash, which
+            //       targets 25-50% fill factor; approximate this
+            //       as 3X # pointers
+            bytesPerPosting = consumer.bytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE;
+            postingsFreeChunk = (int)(DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
+        }
+
+        internal override InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread)
+        {
+            return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
+        }
+
+        internal TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread)
+        {
+            return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
+        }
+
+        internal override void setFieldInfos(FieldInfos fieldInfos)
+        {
+            this.fieldInfos = fieldInfos;
+            consumer.setFieldInfos(fieldInfos);
+        }
+
+        internal override void abort()
+        {
+            lock (this)
+            {
+                consumer.Abort();
+                if (nextTermsHash != null)
+                    nextTermsHash.abort();
+            }
+        }
+
+        internal void shrinkFreePostings(IDictionary<object, ICollection<object>> threadsAndFields, DocumentsWriter.FlushState state)
+        {
+
+            System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, System.Threading.Thread.CurrentThread.Name + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer);
+
+            int newSize = ArrayUtil.GetShrinkSize(postingsFreeList.Length, postingsAllocCount);
+            if (newSize != postingsFreeList.Length)
+            {
+                RawPostingList[] newArray = new RawPostingList[newSize];
+                System.Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
+                postingsFreeList = newArray;
+            }
+        }
+
+        internal override void closeDocStore(DocumentsWriter.FlushState state)
+        {
+            lock (this)
+            {
+                consumer.closeDocStore(state);
+                if (nextTermsHash != null)
+                    nextTermsHash.closeDocStore(state);
+            }
+        }
+
+        internal override void flush(IDictionary<object, ICollection<object>> threadsAndFields, DocumentsWriter.FlushState state)
+        {
+            lock (this)
+            {
+                IDictionary<object, object> childThreadsAndFields = new Dictionary<object, object>();
+                IDictionary<object, ICollection<object>> nextThreadsAndFields;
+
+                if (nextTermsHash != null)
+                    nextThreadsAndFields = new Dictionary<object, ICollection<object>>();
+                else
+                    nextThreadsAndFields = null;
+
+                IEnumerator<KeyValuePair<object, ICollection<object>>> it = threadsAndFields.GetEnumerator();
+                while (it.MoveNext())
+                {
+
+                    KeyValuePair<object, ICollection<object>> entry = it.Current;
+
+                    TermsHashPerThread perThread = (TermsHashPerThread)entry.Key;
+
+                    ICollection<object> fields = entry.Value;
+
+                    IEnumerator<object> fieldsIt = fields.GetEnumerator();
+                    IDictionary<object, object> childFields = new Dictionary<object, object>();
+                    IDictionary<object, object> nextChildFields;
+
+                    if (nextTermsHash != null)
+                        nextChildFields = new Dictionary<object, object>();
+                    else
+                        nextChildFields = null;
+
+                    while (fieldsIt.MoveNext())
+                    {
+                        TermsHashPerField perField = (TermsHashPerField)fieldsIt.Current;
+                        childFields[perField.consumer] = perField.consumer;
+                        if (nextTermsHash != null)
+                            nextChildFields[perField.nextPerField] = perField.nextPerField;
+                    }
+
+                    childThreadsAndFields[perThread.consumer] = childFields.Keys;
+                    if (nextTermsHash != null)
+                        nextThreadsAndFields[perThread.nextPerThread] = nextChildFields.Keys;
+                }
+
+                consumer.flush(childThreadsAndFields, state);
+
+                shrinkFreePostings(threadsAndFields, state);
+
+                if (nextTermsHash != null)
+                    nextTermsHash.flush(nextThreadsAndFields, state);
+            }
+        }
+
+        internal override bool freeRAM()
+        {
+            lock (this)
+            {
+
+                if (!trackAllocations)
+                    return false;
+
+                bool any;
+                int numToFree;
+                if (postingsFreeCount >= postingsFreeChunk)
+                    numToFree = postingsFreeChunk;
+                else
+                    numToFree = postingsFreeCount;
+                any = numToFree > 0;
+                if (any)
+                {
+                    SupportClass.CollectionsSupport.ArrayFill(postingsFreeList, postingsFreeCount - numToFree, postingsFreeCount, null);
+                    postingsFreeCount -= numToFree;
+                    postingsAllocCount -= numToFree;
+                    docWriter.BytesAllocated(-numToFree * bytesPerPosting);
+                    any = true;
+                }
+
+                if (nextTermsHash != null)
+                    any |= nextTermsHash.freeRAM();
+
+                return any;
+            }
+        }
+
+        public void recyclePostings(RawPostingList[] postings, int numPostings)
+        {
+            lock (this)
+            {
+
+                System.Diagnostics.Debug.Assert(postings.Length >= numPostings);
+
+                // Move all Postings from this ThreadState back to our
+                // free list.  We pre-allocated this array while we were
+                // creating Postings to make sure it's large enough
+                System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length);
+                System.Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
+                postingsFreeCount += numPostings;
+            }
+        }
+
+        public void getPostings(RawPostingList[] postings)
+        {
+            lock (this)
+            {
+                System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start"));
+
+                System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length);
+                System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount);
+
+                int numToCopy;
+                if (postingsFreeCount < postings.Length)
+                    numToCopy = postingsFreeCount;
+                else
+                    numToCopy = postings.Length;
+                int start = postingsFreeCount - numToCopy;
+                System.Diagnostics.Debug.Assert(start >= 0);
+                System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length);
+                System.Diagnostics.Debug.Assert(numToCopy <= postings.Length);
+                System.Array.Copy(postingsFreeList, start, postings, 0, numToCopy);
+
+                // Directly allocate the remainder if any
+                if (numToCopy != postings.Length)
+                {
+                    int extra = postings.Length - numToCopy;
+                    int newPostingsAllocCount = postingsAllocCount + extra;
+
+                    consumer.createPostings(postings, numToCopy, extra);
+                    System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create"));
+                    postingsAllocCount += extra;
+
+                    if (trackAllocations)
+                        docWriter.BytesAllocated(extra * bytesPerPosting);
+
+                    if (newPostingsAllocCount > postingsFreeList.Length)
+                        // Pre-allocate the postingsFreeList so it's large
+                        // enough to hold all postings we've given out
+                        postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)];
+                }
+
+                postingsFreeCount -= numToCopy;
+
+                if (trackAllocations)
+                    docWriter.BytesUsed(postings.Length * bytesPerPosting);
+            }
+        }
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermsHashConsumer.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumer.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumer.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+    internal abstract class TermsHashConsumer
+    {
+        internal abstract int bytesPerPosting();
+        internal abstract void createPostings(RawPostingList[] postings, int start, int count);
+        internal abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
+        internal abstract void flush(IDictionary<object, object> threadsAndFields, DocumentsWriter.FlushState state);
+        internal abstract void Abort();
+        internal abstract void closeDocStore(DocumentsWriter.FlushState state);
+
+        internal FieldInfos fieldInfos;
+
+        internal void setFieldInfos(FieldInfos fieldInfos)
+        {
+            this.fieldInfos = fieldInfos;
+        }
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerField.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermsHashConsumerPerField.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerField.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerField.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Fieldable = Lucene.Net.Documents.Fieldable;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Index
+{
+    /** Implement this class to plug into the TermsHash
+     *  processor, which inverts & stores Tokens into a hash
+     *  table and provides an API for writing bytes into
+     *  multiple streams for each unique Token. */
+    internal abstract class TermsHashConsumerPerField
+    {
+        internal abstract  bool start(Fieldable[] fields, int count);
+        internal abstract  void finish();
+        internal abstract  void skippingLongTerm(Token t);
+        internal abstract  void newTerm(Token t, RawPostingList p);
+        internal abstract  void addTerm(Token t, RawPostingList p);
+        internal abstract  int getStreamCount();
+    }
+}

Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerThread.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/TermsHashConsumerPerThread.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerThread.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/TermsHashConsumerPerThread.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+    internal abstract class TermsHashConsumerPerThread
+    {
+        internal abstract void startDocument();
+        internal abstract DocumentsWriter.DocWriter finishDocument();
+        public abstract TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+        public abstract void abort();
+    }
+}



Mime
View raw message