lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dougs...@apache.org
Subject svn commit: r798995 [12/35] - in /incubator/lucene.net/trunk/C#/src: Lucene.Net/ Lucene.Net/Analysis/ Lucene.Net/Analysis/Standard/ Lucene.Net/Document/ Lucene.Net/Index/ Lucene.Net/QueryParser/ Lucene.Net/Search/ Lucene.Net/Search/Function/ Lucene.Net...
Date Wed, 29 Jul 2009 18:04:24 GMT
Added: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ReusableStringReader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/ReusableStringReader.cs?rev=798995&view=auto
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ReusableStringReader.cs (added)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/ReusableStringReader.cs Wed Jul 29 18:04:12 2009
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Index
+{
+    /// <summary>
+    /// Used by DocumentsWriter to implemented a StringReader
+    /// that can be reset to a new string; we use this when
+    /// tokenizing the string value from a Field.
+    /// </summary>
+    internal sealed class ReusableStringReader : System.IO.TextReader
+    {
+        int upto;
+        int left;
+        string s;
+
+        internal void Init(string s)
+        {
+            this.s = s;
+            left = s.Length;
+            this.upto = 0;
+        }
+
+        public int Read(char[] c)
+        {
+            return Read(c, 0, c.Length);
+        }
+
+        public override int Read(char[] c, int off, int len)
+        {
+            if (left > len)
+            {
+                SupportClass.TextSupport.GetCharsFromString(s, upto, upto + len, c, off);
+                upto += len;
+                left -= len;
+                return len;
+            }
+            else if (0 == left)
+            {
+                return -1;
+            }
+            else
+            {
+                SupportClass.TextSupport.GetCharsFromString(s, upto, upto + left, c, off);
+                int r = left;
+                left = 0;
+                upto = s.Length;
+                return r;
+            }
+        }
+
+        public override void Close() { }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfo.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfo.cs Wed Jul 29 18:04:12 2009
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-using System;
+using System.Collections.Generic;
 
+using BitVector = Lucene.Net.Util.BitVector;
 using Directory = Lucene.Net.Store.Directory;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
 using IndexInput = Lucene.Net.Store.IndexInput;
@@ -32,7 +33,7 @@
 		internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
 		internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it. 
 		
-		public System.String name; // unique name in dir
+		public string name; // unique name in dir
 		public int docCount; // number of docs in seg
 		public Directory dir; // where segment resides
 		
@@ -63,18 +64,23 @@
 		// and true for newly created merged segments (both
 		// compound and non compound).
 		
-		private System.Collections.IList files; // cached list of files that this segment uses
+		private List<string> files; // cached list of files that this segment uses
 		// in the Directory
 		
 		internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
 		
 		private int docStoreOffset; // if this segment shares stored fields & vectors, this
 		// offset is where in that file this segment's docs begin
-		private System.String docStoreSegment; // name used to derive fields/vectors file we share with
+		private string docStoreSegment; // name used to derive fields/vectors file we share with
 		// other segments
 		private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
+
+        private int delCount;                           // How many deleted docs in this segment, or -1 if not yet known
+                                                        // (if it's an older index)
+
+        private bool hasProx;                        // True if this segment has any fields with omitTf==false
 		
-		public SegmentInfo(System.String name, int docCount, Directory dir)
+		public SegmentInfo(string name, int docCount, Directory dir)
 		{
 			this.name = name;
 			this.docCount = docCount;
@@ -86,13 +92,17 @@
 			docStoreOffset = - 1;
 			docStoreSegment = name;
 			docStoreIsCompoundFile = false;
+            delCount = 0;
+            hasProx = true;
 		}
 		
-		public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false)
+		public SegmentInfo(string name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile)
+            : this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false, true)
 		{
 		}
 		
-		public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile) : this(name, docCount, dir)
+		public SegmentInfo(string name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, string docStoreSegment, bool docStoreIsCompoundFile, bool hasProx)
+            : this(name, docCount, dir)
 		{
 			this.isCompoundFile = (sbyte) (isCompoundFile ? YES : NO);
 			this.hasSingleNormFile = hasSingleNormFile;
@@ -100,7 +110,9 @@
 			this.docStoreOffset = docStoreOffset;
 			this.docStoreSegment = docStoreSegment;
 			this.docStoreIsCompoundFile = docStoreIsCompoundFile;
-			System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null);
+            this.hasProx = hasProx;
+            delCount = 0;
+            System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null);
 		}
 		
 		/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
@@ -121,10 +133,11 @@
 			else
 			{
 				normGen = new long[src.normGen.Length];
-				Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
+				System.Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
 			}
 			isCompoundFile = src.isCompoundFile;
 			hasSingleNormFile = src.hasSingleNormFile;
+            delCount = src.delCount;
 		}
 		
 		/// <summary> Construct a new SegmentInfo instance by reading a
@@ -188,6 +201,19 @@
 				}
 				isCompoundFile = (sbyte) input.ReadByte();
 				preLockless = (isCompoundFile == CHECK_DIR);
+                if (format <= SegmentInfos.FORMAT_DEL_COUNT)
+                {
+                    delCount = input.ReadInt();
+                    System.Diagnostics.Debug.Assert(delCount <= docCount);
+                }
+                else
+                {
+                    delCount = -1;
+                }
+                if (format <= SegmentInfos.FORMAT_HAS_PROX)
+                    hasProx = input.ReadByte() == 1;
+                else
+                    hasProx = true;
 			}
 			else
 			{
@@ -199,6 +225,8 @@
 				docStoreOffset = - 1;
 				docStoreIsCompoundFile = false;
 				docStoreSegment = null;
+                delCount = -1;
+                hasProx = true;
 			}
 		}
 		
@@ -231,16 +259,16 @@
 		/// <summary>Returns total size in bytes of all of files used by
 		/// this segment. 
 		/// </summary>
-		internal long SizeInBytes()
+        public /* changed for zoie 1.3.0: internal */ long SizeInBytes()
 		{
 			if (sizeInBytes == - 1)
 			{
-				System.Collections.IList files = Files();
+				List<string> files = Files();
 				int size = files.Count;
 				sizeInBytes = 0;
 				for (int i = 0; i < size; i++)
 				{
-					System.String fileName = (System.String) files[i];
+					string fileName = files[i];
 					// We don't count bytes used by a shared doc store
 					// against this segment:
 					if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
@@ -249,8 +277,8 @@
 			}
 			return sizeInBytes;
 		}
-		
-		internal bool HasDeletions()
+
+        public /* changed for zoie 1.3.0: internal */ bool HasDeletions()
 		{
 			// Cases:
 			//
@@ -300,11 +328,12 @@
 			ClearFiles();
 		}
 		
-		public System.Object Clone()
+		public object Clone()
 		{
 			SegmentInfo si = new SegmentInfo(name, docCount, dir);
 			si.isCompoundFile = isCompoundFile;
-			si.delGen = delGen;
+            si.delGen = delGen;
+            si.delCount = delCount;
 			si.preLockless = preLockless;
 			si.hasSingleNormFile = hasSingleNormFile;
 			if (normGen != null)
@@ -323,7 +352,7 @@
 			return si;
 		}
 		
-		internal System.String GetDelFileName()
+		internal string GetDelFileName()
 		{
 			if (delGen == NO)
 			{
@@ -333,9 +362,28 @@
 			}
 			else
 			{
-				// If delGen is CHECK_DIR, it's the pre-lockless-commit file format
-				return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
-			}
+                string retVal = null;
+                string current = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+                if (this.dir.FileExists(current))
+                {
+                    retVal = current;
+                }
+                else
+                {
+                    string backwards = (name + "_" + System.Convert.ToString(delGen, 16) + "." + IndexFileNames.DELETES_EXTENSION);
+                    if (this.dir.FileExists(backwards))
+                    {
+                        // we are dealing with the old name
+                        retVal = backwards;
+                    }
+                    else
+                    {
+                        // no file, creating one, so use the new name
+                        retVal = current;
+                    }
+                }
+                return retVal;
+            }
 		}
 		
 		/// <summary> Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
@@ -348,7 +396,7 @@
 			if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
 			{
 				// Must fallback to directory file exists check:
-				System.String fileName = name + ".s" + fieldNumber;
+				string fileName = name + ".s" + fieldNumber;
 				return dir.FileExists(fileName);
 			}
 			else if (normGen == null || normGen[fieldNumber] == NO)
@@ -362,7 +410,7 @@
 		}
 		
 		/// <summary> Returns true if any fields in this segment have separate norms.</summary>
-		internal bool HasSeparateNorms()
+        public /* changed for zoie 1.3.0: internal */ bool HasSeparateNorms()
 		{
 			if (normGen == null)
 			{
@@ -377,13 +425,13 @@
 					// This means this segment was saved with pre-LOCKLESS
 					// code.  So we must fallback to the original
 					// directory list check:
-					System.String[] result = dir.List();
+					string[] result = dir.List();
 					if (result == null)
 					{
 						throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
 					}
 					
-					System.String pattern;
+					string pattern;
 					pattern = name + ".s";
 					int patternLength = pattern.Length;
 					for (int i = 0; i < result.Length; i++)
@@ -447,9 +495,9 @@
 		/// </summary>
 		/// <param name="number">field index
 		/// </param>
-		internal System.String GetNormFileName(int number)
+		internal string GetNormFileName(int number)
 		{
-			System.String prefix;
+			string prefix;
 			
 			long gen;
 			if (normGen == null)
@@ -502,7 +550,7 @@
 		/// <summary> Returns true if this segment is stored as a compound
 		/// file; else, false.
 		/// </summary>
-		internal bool GetUseCompoundFile()
+        public /* changed for zoie 1.3.0: internal */ bool GetUseCompoundFile()
 		{
 			if (isCompoundFile == NO)
 			{
@@ -517,7 +565,29 @@
 				return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
 			}
 		}
-		
+
+        public /* changed for zoie 1.3.0: internal */  int GetDelCount()
+        {
+            if (delCount == -1)
+            {
+                if (HasDeletions())
+                {
+                    string delFileName = GetDelFileName();
+                    delCount = new BitVector(dir, delFileName).Count();
+                }
+                else
+                    delCount = 0;
+            }
+            System.Diagnostics.Debug.Assert(delCount <= docCount);
+            return delCount;
+        }
+
+        internal void SetDelCount(int delCount)
+        {
+            this.delCount = delCount;
+            System.Diagnostics.Debug.Assert(delCount <= docCount);
+        }
+
 		internal int GetDocStoreOffset()
 		{
 			return docStoreOffset;
@@ -534,7 +604,7 @@
 			ClearFiles();
 		}
 		
-		internal System.String GetDocStoreSegment()
+		internal string GetDocStoreSegment()
 		{
 			return docStoreSegment;
 		}
@@ -571,10 +641,24 @@
 					output.WriteLong(normGen[j]);
 				}
 			}
-			output.WriteByte((byte) isCompoundFile);
+            output.WriteByte((byte)isCompoundFile);
+            output.WriteInt(delCount);
+            output.WriteByte((byte)(hasProx ? 1 : 0));
 		}
+
+        internal void SetHasProx(bool hasProx)
+        {
+            this.hasProx = hasProx;
+            ClearFiles();
+        }
+
+        internal bool GetHasProx()
+        {
+            return hasProx;
+        }
+
 		
-		private void  AddIfExists(System.Collections.IList files, System.String fileName)
+		private void  AddIfExists(System.Collections.Generic.List<string> files, string fileName)
 		{
 			if (dir.FileExists(fileName))
 				files.Add(fileName);
@@ -586,7 +670,7 @@
 		* modify it.
 		*/
 		
-		public System.Collections.IList Files()
+		public List<string> Files()
 		{
 			
 			if (files != null)
@@ -595,7 +679,7 @@
 				return files;
 			}
 			
-			files = new System.Collections.ArrayList();
+			files = new List<string>();
 			
 			bool useCompoundFile = GetUseCompoundFile();
 			
@@ -605,7 +689,7 @@
 			}
 			else
 			{
-				System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
+				string[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
 				for (int i = 0; i < exts.Length; i++)
 					AddIfExists(files, name + "." + exts[i]);
 			}
@@ -621,7 +705,7 @@
 				}
 				else
 				{
-					System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+					string[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
 					for (int i = 0; i < exts.Length; i++)
 						AddIfExists(files, docStoreSegment + "." + exts[i]);
 				}
@@ -630,12 +714,12 @@
 			{
 				// We are not sharing, and, these files were not
 				// included in the compound file
-				System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+				string[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
 				for (int i = 0; i < exts.Length; i++)
 					AddIfExists(files, name + "." + exts[i]);
 			}
-			
-			System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
+            string delFileName = this.GetDelFileName();
+
 			if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
 			{
 				files.Add(delFileName);
@@ -658,7 +742,7 @@
 						// in the non compound file case:
 						if (!hasSingleNormFile && !useCompoundFile)
 						{
-							System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
+							string fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
 							if (dir.FileExists(fileName))
 							{
 								files.Add(fileName);
@@ -668,7 +752,7 @@
 					else if (CHECK_DIR == gen)
 					{
 						// Pre-2.1: we have to check file existence
-						System.String fileName = null;
+						string fileName = null;
 						if (useCompoundFile)
 						{
 							fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
@@ -688,20 +772,20 @@
 			{
 				// Pre-2.1: we have to scan the dir to find all
 				// matching _X.sN/_X.fN files for our segment:
-				System.String prefix;
+				string prefix;
 				if (useCompoundFile)
 					prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
 				else
 					prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
 				int prefixLength = prefix.Length;
-				System.String[] allFiles = dir.List();
+				string[] allFiles = dir.List();
 				if (allFiles == null)
 				{
 					throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
 				}
 				for (int i = 0; i < allFiles.Length; i++)
 				{
-					System.String fileName = allFiles[i];
+					string fileName = allFiles[i];
 					if (fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
 					{
 						files.Add(fileName);
@@ -720,9 +804,9 @@
 		}
 		
 		/// <summary>Used for debugging </summary>
-		public System.String SegString(Directory dir)
+		public string SegString(Directory dir)
 		{
-			System.String cfs;
+			string cfs;
 			try
 			{
 				if (GetUseCompoundFile())
@@ -730,12 +814,12 @@
 				else
 					cfs = "C";
 			}
-			catch (System.IO.IOException ioe)
+			catch (System.IO.IOException)
 			{
 				cfs = "?";
 			}
 			
-			System.String docStore;
+			string docStore;
 			
 			if (docStoreOffset != - 1)
 				docStore = "->" + docStoreSegment;
@@ -748,14 +832,14 @@
 		/// <summary>We consider another SegmentInfo instance equal if it
 		/// has the same dir and same name. 
 		/// </summary>
-		public  override bool Equals(System.Object obj)
+		public  override bool Equals(object obj)
 		{
 			SegmentInfo other;
 			try
 			{
 				other = (SegmentInfo) obj;
 			}
-			catch (System.InvalidCastException cce)
+			catch (System.InvalidCastException)
 			{
 				return false;
 			}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentInfos.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentInfos.cs Wed Jul 29 18:04:12 2009
@@ -17,6 +17,8 @@
 
 using System;
 
+using ChecksumIndexInput = Lucene.Net.Store.ChecksumIndexInput;
+using ChecksumIndexOutput = Lucene.Net.Store.ChecksumIndexOutput;
 using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
@@ -27,39 +29,12 @@
 	[Serializable]
 	sealed public class SegmentInfos : System.Collections.ArrayList
 	{
-		private class AnonymousClassFindSegmentsFile : FindSegmentsFile
-		{
-			private void  InitBlock(SegmentInfos enclosingInstance)
-			{
-				this.enclosingInstance = enclosingInstance;
-			}
-			private SegmentInfos enclosingInstance;
-			public SegmentInfos Enclosing_Instance
-			{
-				get
-				{
-					return enclosingInstance;
-				}
-				
-			}
-			internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1) : base(Param1)
-			{
-				InitBlock(enclosingInstance);
-			}
-			
-			protected internal override System.Object DoBody(System.String segmentFileName)
-			{
-				Enclosing_Instance.Read(directory, segmentFileName);
-				return null;
-			}
-		}
-
 		private class AnonymousClassFindSegmentsFile1 : FindSegmentsFile
 		{
 			internal AnonymousClassFindSegmentsFile1(Lucene.Net.Store.Directory Param1) : base(Param1)
 			{
 			}
-			protected internal override System.Object DoBody(System.String segmentFileName)
+			protected internal override object DoBody(System.String segmentFileName)
 			{
 				
 				IndexInput input = directory.OpenInput(segmentFileName);
@@ -118,9 +93,22 @@
 		/// vectors and stored fields file. 
 		/// </summary>
 		public const int FORMAT_SHARED_DOC_STORE = - 4;
-		
-		/* This must always point to the most recent file format. */
-		private static readonly int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE;
+
+        /// <summary> This format adds a checksum at the end of the file to
+        /// ensure all bytes were successfully written.</summary>
+        public const int FORMAT_CHECKSUM = -5;
+
+        /// <summary> This format adds the deletion count for each segment.
+        /// This way IndexWriter can efficiently report numDocs().</summary>
+        public const int FORMAT_DEL_COUNT = -6;
+
+        /// <summary> This format adds the boolean hasProx to record if any
+        /// fields in the segment store prox information (ie, have
+        /// omitTf==false)</summary>
+        public const int FORMAT_HAS_PROX = -7;
+
+        /* This must always point to the most recent file format. */
+        public static readonly int CURRENT_FORMAT = FORMAT_HAS_PROX;
 		
 		public int counter = 0; // used to name new segments
 		/// <summary> counts how often the index has been changed by adding or deleting docs.
@@ -269,7 +257,7 @@
 			// Clear any previous segments:
 			Clear();
 			
-			IndexInput input = directory.OpenInput(segmentFileName);
+			ChecksumIndexInput input = new ChecksumIndexInput(directory.OpenInput(segmentFileName));
 			
 			generation = GenerationFromSegmentsFileName(segmentFileName);
 			
@@ -308,6 +296,14 @@
 					else
 						version = input.ReadLong(); // read version
 				}
+
+                if (format <= FORMAT_CHECKSUM)
+                {
+                    long checksumNow = input.GetChecksum();
+                    long checksumThen = input.ReadLong();
+                    if (checksumNow != checksumThen)
+                        throw new CorruptIndexException("checksum mismatch in segments file");
+                }
 				success = true;
 			}
 			finally
@@ -334,98 +330,121 @@
 			
 			new AnonymousClassFindSegmentsFile(this, directory).Run();
 		}
-		
-		public void  Write(Directory directory)
-		{
-			
-			System.String segmentFileName = GetNextSegmentFileName();
-			
-			// Always advance the generation on write:
-			if (generation == - 1)
-			{
-				generation = 1;
-			}
-			else
-			{
-				generation++;
-			}
-			
-			IndexOutput output = directory.CreateOutput(segmentFileName);
-			
-			bool success = false;
-			
-			try
-			{
-				output.WriteInt(CURRENT_FORMAT); // write FORMAT
-				output.WriteLong(++version); // every write changes
-				// the index
-				output.WriteInt(counter); // write counter
-				output.WriteInt(Count); // write infos
-				for (int i = 0; i < Count; i++)
-				{
-					Info(i).Write(output);
-				}
-			}
-			finally
-			{
-				try
-				{
-					output.Close();
-					success = true;
-				}
-				finally
-				{
-					if (!success)
-					{
-						// Try not to leave a truncated segments_N file in
-						// the index:
-						directory.DeleteFile(segmentFileName);
-					}
-				}
-			}
-			
-			try
-			{
-				output = directory.CreateOutput(IndexFileNames.SEGMENTS_GEN);
-				try
-				{
-					output.WriteInt(FORMAT_LOCKLESS);
-					output.WriteLong(generation);
-					output.WriteLong(generation);
-				}
-				finally
-				{
-					output.Close();
-				}
-			}
-			catch (System.IO.IOException e)
-			{
-				// It's OK if we fail to write this file since it's
-				// used only as one of the retry fallbacks.
-			}
-			
-			lastGeneration = generation;
-		}
+
+        private class AnonymousClassFindSegmentsFile : FindSegmentsFile
+        {
+            private void InitBlock(SegmentInfos enclosingInstance)
+            {
+                this.enclosingInstance = enclosingInstance;
+            }
+            private SegmentInfos enclosingInstance;
+            public SegmentInfos Enclosing_Instance
+            {
+                get
+                {
+                    return enclosingInstance;
+                }
+
+            }
+            internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1)
+                : base(Param1)
+            {
+                InitBlock(enclosingInstance);
+            }
+
+            protected internal override object DoBody(System.String segmentFileName)
+            {
+                Enclosing_Instance.Read(directory, segmentFileName);
+                return null;
+            }
+        }
+
+        // only non-null after PrepareCommit has been called and before FinishCommit is called
+        internal ChecksumIndexOutput pendingOutput;
+
+        private void Write(Directory directory)
+        {
+
+            System.String segmentFileName = GetNextSegmentFileName();
+
+            // Always advance the generation on write:
+            if (generation == -1)
+            {
+                generation = 1;
+            }
+            else
+            {
+                generation++;
+            }
+
+            ChecksumIndexOutput output = new ChecksumIndexOutput(directory.CreateOutput(segmentFileName));
+
+            bool success = false;
+
+            try
+            {
+                output.WriteInt(CURRENT_FORMAT); // write FORMAT
+                output.WriteLong(++version); // every write changes
+                // the index
+                output.WriteInt(counter); // write counter
+                output.WriteInt(Count); // write infos
+                for (int i = 0; i < Count; i++)
+                {
+                    Info(i).Write(output);
+                }
+                output.PrepareCommit();
+                success = true;
+                pendingOutput = output;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    // we hit an exception above; try to close the file but suppress any exception:
+                    try
+                    {
+                        output.Close();
+                    }
+                    catch (System.Exception)
+                    {
+                        // suppress so we keep throwing the original exception
+                    }
+                    try
+                    {
+                        // try not to leave a truncated segments_N file int the index
+                        directory.DeleteFile(segmentFileName);
+                    }
+                    catch (System.Exception)
+                    {
+                        // suppress so we keep throwing the original exception
+                    }
+                }
+            }
+        }
+
 		
 		/// <summary> Returns a copy of this instance, also copying each
 		/// SegmentInfo.
 		/// </summary>
 		
-		public override System.Object Clone()
+		public override object Clone()
 		{
-			SegmentInfos si = new SegmentInfos();
-			for (int i = 0; i < base.Count; i++)
-			{
-				si.Add(((SegmentInfo) base[i]).Clone());
-			}
-			si.generation = this.generation;
-			si.lastGeneration = this.lastGeneration;
-			return si;
+            SegmentInfos si = new SegmentInfos();
+            for (int i = 0; i < base.Count; i++)
+            {
+                si.Add(((SegmentInfo)base[i]).Clone());
+            }
+            si.counter = this.counter;
+            si.version = this.version;
+            si.generation = this.generation;
+            si.lastGeneration = this.lastGeneration;
+            return si;
 		}
 
-        private SegmentInfos(SegmentInfos si) : base(si)
-        {
-        }
+        //private SegmentInfos(SegmentInfos si)
+        //    : base(si)
+        //{
+        //}
 
         public SegmentInfos()
         {
@@ -556,7 +575,7 @@
 				this.directory = directory;
 			}
 			
-			public System.Object Run()
+			public object Run()
 			{
 				System.String segmentFileName = null;
 				long lastGen = - 1;
@@ -657,7 +676,7 @@
 											}
 										}
 									}
-									catch (System.IO.IOException err2)
+									catch (System.IO.IOException)
 									{
 										// will retry
 									}
@@ -670,7 +689,7 @@
 								{
 									System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec));
 								}
-								catch (System.Threading.ThreadInterruptedException e)
+								catch (System.Threading.ThreadInterruptedException)
 								{
 									// will retry
 								}
@@ -739,7 +758,7 @@
 							retry = true;
 						}
 					}
-					else
+					else if (0 == method)
 					{
 						// Segment file has advanced since our last loop, so
 						// reset retry:
@@ -752,7 +771,7 @@
 					
 					try
 					{
-						System.Object v = DoBody(segmentFileName);
+						object v = DoBody(segmentFileName);
 						if (exc != null)
 						{
 							Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName);
@@ -786,53 +805,205 @@
 							else
 							{
 								bool tmpBool;
-                                if (System.IO.File.Exists(new System.IO.FileInfo(fileDirectory.FullName + System.IO.Path.DirectorySeparatorChar + prevSegmentFileName).FullName))
+								if (System.IO.File.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName))
 									tmpBool = true;
 								else
-                                    tmpBool = System.IO.Directory.Exists(new System.IO.FileInfo(fileDirectory.FullName + System.IO.Path.DirectorySeparatorChar + prevSegmentFileName).FullName);
+									tmpBool = System.IO.Directory.Exists(new System.IO.FileInfo(fileDirectory.FullName + "\\" + prevSegmentFileName).FullName);
 								prevExists = tmpBool;
 							}
 							
 							if (prevExists)
-							{
-								Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
-								try
-								{
-									System.Object v = DoBody(prevSegmentFileName);
-									if (exc != null)
-									{
-										Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
-									}
-									return v;
-								}
-								catch (System.IO.IOException err2)
-								{
-									Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
-								}
-							}
-						}
-					}
-				}
-			}
-			
-			/// <summary> Subclass must implement this.  The assumption is an
-			/// IOException will be thrown if something goes wrong
-			/// during the processing that could have been caused by
-			/// a writer committing.
-			/// </summary>
-			protected internal abstract System.Object DoBody(System.String segmentFileName);
-		}
-		
-		/// <summary> Returns a new SegmentInfos containg the SegmentInfo
-		/// instances in the specified range first (inclusive) to
-		/// last (exclusive), so total number of segments returned
-		/// is last-first.
-		/// </summary>
-		public SegmentInfos Range(int first, int last)
-		{
-			SegmentInfos infos = new SegmentInfos();
-			infos.AddRange((System.Collections.IList) ((System.Collections.ArrayList) this).GetRange(first, last - first));
-			return infos;
-		}
-	}
+                            {
+                                Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'");
+                                try
+                                {
+                                    object v = DoBody(prevSegmentFileName);
+                                    if (exc != null)
+                                    {
+                                        Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName);
+                                    }
+                                    return v;
+                                }
+                                catch (System.IO.IOException err2)
+                                {
+                                    Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            /// <summary> Subclass must implement this.  The assumption is an
+            /// IOException will be thrown if something goes wrong
+            /// during the processing that could have been caused by
+            /// a writer committing.
+            /// </summary>
+            protected internal abstract object DoBody(System.String segmentFileName);
+        }
+
+        /// <summary> Returns a new SegmentInfos containg the SegmentInfo
+        /// instances in the specified range first (inclusive) to
+        /// last (exclusive), so total number of segments returned
+        /// is last-first.
+        /// </summary>
+        public SegmentInfos Range(int first, int last)
+        {
+            SegmentInfos infos = new SegmentInfos();
+            infos.AddRange((System.Collections.IList)((System.Collections.ArrayList)this).GetRange(first, last - first));
+            return infos;
+        }
+
+        // carry over generation numbers from another SegmentInfos
+        internal void UpdateGeneration(SegmentInfos other)
+        {
+            lastGeneration = other.lastGeneration;
+            generation = other.generation;
+            version = other.version;
+        }
+
+        public void RollbackCommit(Directory dir)
+        {
+            if (pendingOutput != null)
+            {
+                try
+                {
+                    pendingOutput.Close();
+                }
+                catch (System.Exception)
+                {
+                    // Suppress so we keep throwing the original exception
+                    // in our caller
+                }
+
+                // Must carefully compute fileName from "generation"
+                // since lastGeneration isn't incremented:
+                try
+                {
+                    String segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
+                    dir.DeleteFile(segmentFileName);
+                }
+                catch (System.Exception)
+                {
+                    // Suppress so we keep throwing the original exception
+                    // in our caller
+                }
+                pendingOutput = null;
+            }
+        }
+
+        /** Call this to start a commit.  This writes the new
+         *  segments file, but writes an invalid checksum at the
+         *  end, so that it is not visible to readers.  Once this
+         *  is called you must call {@link #finishCommit} to complete
+         *  the commit or {@link #rollbackCommit} to abort it. */
+        public void PrepareCommit(Directory dir)
+        {
+            if (pendingOutput != null)
+                throw new System.Exception("prepareCommit was already called");
+            Write(dir);
+        }
+
+        public void FinishCommit(Directory dir)
+        {
+            if (pendingOutput == null)
+                throw new System.Exception("prepareCommit was not called");
+            bool success = false;
+            try
+            {
+                pendingOutput.FinishCommit();
+                pendingOutput.Close();
+                pendingOutput = null;
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                    RollbackCommit(dir);
+            }
+
+            // NOTE: if we crash here, we have left a segments_N
+            // file in the directory in a possibly corrupt state (if
+            // some bytes made it to stable storage and others
+            // didn't).  But, the segments_N file includes checksum
+            // at the end, which should catch this case.  So when a
+            // reader tries to read it, it will throw a
+            // CorruptIndexException, which should cause the retry
+            // logic in SegmentInfos to kick in and load the last
+            // good (previous) segments_N-1 file.
+
+            String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
+            success = false;
+            try
+            {
+                dir.Sync(fileName);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    try
+                    {
+                        dir.DeleteFile(fileName);
+                    }
+                    catch (System.Exception)
+                    {
+                        // Suppress so we keep throwing the original exception
+                    }
+                }
+            }
+
+            lastGeneration = generation;
+
+            try
+            {
+                IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN);
+                try
+                {
+                    genOutput.WriteInt(FORMAT_LOCKLESS);
+                    genOutput.WriteLong(generation);
+                    genOutput.WriteLong(generation);
+                }
+                finally
+                {
+                    genOutput.Close();
+                }
+            }
+            catch (System.Exception)
+            {
+                // It's OK if we fail to write this file since it's
+                // used only as one of the retry fallbacks.
+            }
+        }
+
+        /** Writes & syncs to the Directory dir, taking care to
+         *  remove the segments file on exception */
+        public void Commit(Directory dir)
+        {
+            PrepareCommit(dir);
+            FinishCommit(dir);
+        }
+
+        internal string SegString(Directory directory)
+        {
+            lock (this)
+            {
+                System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+                int count = Count;
+                for (int i = 0; i < count; i++)
+                {
+                    if (i > 0)
+                    {
+                        buffer.Append(' ');
+                    }
+                    SegmentInfo info = Info(i);
+                    buffer.Append(info.SegString(directory));
+                    if (info.dir != directory)
+                        buffer.Append("**");
+                }
+                return buffer.ToString();
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMergeQueue.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMergeQueue.cs Wed Jul 29 18:04:12 2009
@@ -29,7 +29,7 @@
 			Initialize(size);
 		}
 		
-		public override bool LessThan(System.Object a, System.Object b)
+		public override bool LessThan(object a, object b)
 		{
 			SegmentMergeInfo stiA = (SegmentMergeInfo) a;
 			SegmentMergeInfo stiB = (SegmentMergeInfo) b;

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/SegmentMerger.cs?rev=798995&r1=798994&r2=798995&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/SegmentMerger.cs Wed Jul 29 18:04:12 2009
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-using System;
+using System.Collections.Generic;
 
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
 using FieldSelector = Lucene.Net.Documents.FieldSelector;
 using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
-using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;
 using IndexOutput = Lucene.Net.Store.IndexOutput;
 
@@ -40,31 +41,6 @@
 	/// </seealso>
 	public sealed class SegmentMerger
 	{
-		[Serializable]
-		private class AnonymousClassFieldSelector : FieldSelector
-		{
-			public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
-			{
-				InitBlock(enclosingInstance);
-			}
-			private void  InitBlock(SegmentMerger enclosingInstance)
-			{
-				this.enclosingInstance = enclosingInstance;
-			}
-			private SegmentMerger enclosingInstance;
-			public SegmentMerger Enclosing_Instance
-			{
-				get
-				{
-					return enclosingInstance;
-				}
-				
-			}
-			public FieldSelectorResult Accept(System.String fieldName)
-			{
-				return FieldSelectorResult.LOAD_FOR_MERGE;
-			}
-		}
 		private void  InitBlock()
 		{
 			termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
@@ -118,6 +94,11 @@
 				checkAbort = new CheckAbort(merge, directory);
 			termIndexInterval = writer.GetTermIndexInterval();
 		}
+
+        internal bool HasProx()
+        {
+            return fieldInfos.HasProx();
+        }
 		
 		/// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
 		/// <param name="reader">
@@ -203,6 +184,10 @@
 			for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
 			{
 				System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+
+                if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx())
+                    continue;
+
 				if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
 					files.Add(segment + "." + ext);
 			}
@@ -240,17 +225,52 @@
 			return files;
 		}
 		
-		private void  AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads)
+		private void  AddIndexed(IndexReader reader, FieldInfos fieldInfos, ICollection<string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTf)
 		{
-			System.Collections.IEnumerator i = names.GetEnumerator();
+			IEnumerator<string> i = names.GetEnumerator();
 			while (i.MoveNext())
 			{
-				System.String field = (System.String) i.Current;
-				fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads);
+				string field = i.Current;
+				fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTf);
 			}
 		}
-		
-		/// <summary> </summary>
+
+        private SegmentReader[] matchingSegmentReaders;
+        private int[] rawDocLengths;
+        private int[] rawDocLengths2;
+
+        private void SetMatchingSegmentReaders()
+        {
+            // if the i'th reader is a SegmentReader and has
+            // identical fieldName->number mapping the this
+            // array will be non-null at position i:
+            matchingSegmentReaders = new SegmentReader[readers.Count];
+
+            // if this reader is a SegmentReader, and all of its
+            // fieldName->number mappings match the "merged"
+            // FieldInfos, then we can do a bulk copy of the
+            // stored fields
+            for (int i = 0; i < readers.Count; i++)
+            {
+                IndexReader reader = (IndexReader)readers[i];
+                if (reader is SegmentReader)
+                {
+                    SegmentReader segmentReader = (SegmentReader)reader;
+                    bool same = true;
+                    FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos();
+                    for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
+                        same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
+                    if (same)
+                        matchingSegmentReaders[i] = segmentReader;
+                }
+            }
+
+            // used for bulk-reading raw bytes for stored fields
+            rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
+            rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];
+        }
+
+        /// <summary> </summary>
 		/// <returns> The number of documents in all of the readers
 		/// </returns>
 		/// <throws>  CorruptIndexException if the index is corrupt </throws>
@@ -283,167 +303,272 @@
 					for (int j = 0; j < segmentReader.GetFieldInfos().Size(); j++)
 					{
 						FieldInfo fi = segmentReader.GetFieldInfos().FieldInfo(j);
-						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads);
+						fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTf);
 					}
 				}
 				else
 				{
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false);
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false);
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false);
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false);
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true);
-					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false);
+					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false);
+					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false);
+					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false);
+                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false);
+                    AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true);
+					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false);
+					AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false);
 					fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
 				}
 			}
 			fieldInfos.Write(directory, segment + ".fnm");
 			
 			int docCount = 0;
-			
+
+            SetMatchingSegmentReaders();
+
 			if (mergeDocStores)
 			{
-				
-				// If the i'th reader is a SegmentReader and has
-				// identical fieldName -> number mapping, then this
-				// array will be non-null at position i:
-				SegmentReader[] matchingSegmentReaders = new SegmentReader[readers.Count];
-				
-				// If this reader is a SegmentReader, and all of its
-				// field name -> number mappings match the "merged"
-				// FieldInfos, then we can do a bulk copy of the
-				// stored fields:
-				for (int i = 0; i < readers.Count; i++)
-				{
-					IndexReader reader = (IndexReader) readers[i];
-					if (reader is SegmentReader)
-					{
-						SegmentReader segmentReader = (SegmentReader) reader;
-						bool same = true;
-						FieldInfos segmentFieldInfos = segmentReader.GetFieldInfos();
-						for (int j = 0; same && j < segmentFieldInfos.Size(); j++)
-							same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j));
-						if (same)
-						{
-							matchingSegmentReaders[i] = segmentReader;
-						}
-					}
-				}
-				
-				// Used for bulk-reading raw bytes for stored fields
-				int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS];
-				
 				// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
 				// in  merge mode, we use this FieldSelector
 				FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
 				
 				// merge field values
 				FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
-				
-				try
-				{
-					for (int i = 0; i < readers.Count; i++)
-					{
-						IndexReader reader = (IndexReader) readers[i];
-						SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
-						FieldsReader matchingFieldsReader;
-						if (matchingSegmentReader != null)
-							matchingFieldsReader = matchingSegmentReader.GetFieldsReader();
-						else
-							matchingFieldsReader = null;
-						int maxDoc = reader.MaxDoc();
-						for (int j = 0; j < maxDoc; )
-						{
-							if (!reader.IsDeleted(j))
-							{
-								// skip deleted docs
-								if (matchingSegmentReader != null)
-								{
-									// We can optimize this case (doing a bulk
-									// byte copy) since the field numbers are
-									// identical
-									int start = j;
-									int numDocs = 0;
-									do 
-									{
-										j++;
-										numDocs++;
-									}
-									while (j < maxDoc && !matchingSegmentReader.IsDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);
-									
-									IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
-									fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
-									docCount += numDocs;
-									if (checkAbort != null)
-										checkAbort.Work(300 * numDocs);
-								}
-								else
-								{
-									fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
-									j++;
-									docCount++;
-									if (checkAbort != null)
-										checkAbort.Work(300);
-								}
-							}
-							else
-								j++;
-						}
-					}
-				}
-				finally
-				{
-					fieldsWriter.Close();
-				}
 
-                System.Diagnostics.Debug.Assert(docCount*8 == directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION),
-                    "after MergeFields: fdx size mismatch: " + docCount + " docs vs " + 
-                    directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION) +
-                    " length in bytes of " + segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); 
-			}
-			// If we are skipping the doc stores, that means there
-			// are no deletions in any of these segments, so we
-			// just sum numDocs() of each segment to get total docCount
-			else
-				for (int i = 0; i < readers.Count; i++)
-					docCount += ((IndexReader) readers[i]).NumDocs();
-			
-			return docCount;
-		}
-		
-		/// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
-		/// <throws>  IOException </throws>
-		private void  MergeVectors()
-		{
-			TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
-			
-			try
-			{
-				for (int r = 0; r < readers.Count; r++)
-				{
-					IndexReader reader = (IndexReader) readers[r];
-					int maxDoc = reader.MaxDoc();
-					for (int docNum = 0; docNum < maxDoc; docNum++)
-					{
-						// skip deleted docs
-						if (reader.IsDeleted(docNum))
-							continue;
-						termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
-						if (checkAbort != null)
-							checkAbort.Work(300);
-					}
-				}
-			}
-			finally
-			{
-				termVectorsWriter.Close();
-			}
+                try
+                {
+                    for (int i = 0; i < readers.Count; i++)
+                    {
+                        IndexReader reader = (IndexReader)readers[i];
+                        SegmentReader matchingSegmentReader = matchingSegmentReaders[i];
+                        FieldsReader matchingFieldsReader;
+                        bool hasMatchingReader;
+                        if (matchingSegmentReader != null)
+                        {
+                            FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader();
+                            if (fieldsReader != null && !fieldsReader.CanReadRawDocs())
+                            {
+                                matchingFieldsReader = null;
+                                hasMatchingReader = false;
+                            }
+                            else
+                            {
+                                matchingFieldsReader = fieldsReader;
+                                hasMatchingReader = true;
+                            }
+                        }
+                        else
+                        {
+                            hasMatchingReader = false;
+                            matchingFieldsReader = null;
+                        }
+                        int maxDoc = reader.MaxDoc();
+                        bool hasDeletions = reader.HasDeletions();
+                        for (int j = 0; j < maxDoc; )
+                        {
+                            if (!hasDeletions || !reader.IsDeleted(j))
+                            { // skip deleted docs
+                                if (hasMatchingReader)
+                                {
+                                    // We can optimize this case (doing a bulk
+                                    // byte copy) since the field numbers are
+                                    // identical
+                                    int start = j;
+                                    int numDocs = 0;
+                                    do
+                                    {
+                                        j++;
+                                        numDocs++;
+                                        if (j >= maxDoc)
+                                            break;
+                                        if (hasDeletions && matchingSegmentReader.IsDeleted(j))
+                                        {
+                                            j++;
+                                            break;
+                                        }
+                                    } while (numDocs < MAX_RAW_MERGE_DOCS);
 
-            System.Diagnostics.Debug.Assert(4 + mergedDocs * 8 == directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION),
-                "after MergeVectors: tvx size mismatch: " + mergedDocs + " docs vs " +
-                directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION) +
-                " length in bytes of " + segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
-		}
+                                    IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs);
+                                    fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs);
+                                    docCount += numDocs;
+                                    if (checkAbort != null)
+                                        checkAbort.Work(300 * numDocs);
+                                }
+                                else
+                                {
+                                    // NOTE: it's very important to first assign
+                                    // to doc then pass it to
+                                    // termVectorsWriter.addAllDocVectors; see
+                                    // LUCENE-1282
+                                    Document doc = reader.Document(j, fieldSelectorMerge);
+                                    fieldsWriter.AddDocument(doc);
+                                    j++;
+                                    docCount++;
+                                    if (checkAbort != null)
+                                        checkAbort.Work(300);
+                                }
+                            }
+                            else
+                                j++;
+                        }
+                    }
+                }
+                finally
+                {
+                    fieldsWriter.Close();
+                }
+
+                long fdxFileLength = directory.FileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION);
+
+                // {{dougsale-2.4.0}
+                // this shouldn't be a problem for us - if it is,
+                // then it's not a JRE bug...
+                //if (4+docCount*8 != fdxFileLength)
+                //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+                //  // we detect that the bug has struck, here, and
+                //  // throw an exception to prevent the corruption from
+                //  // entering the index.  See LUCENE-1282 for
+                //  // details.
+                //  throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption");
+
+            }
+            else
+                // If we are skipping the doc stores, that means there
+                // are no deletions in any of these segments, so we
+                // just sum numDocs() of each segment to get total docCount
+                for (int i = 0; i < readers.Count; i++)
+                    docCount += ((IndexReader)readers[i]).NumDocs();
+
+            return docCount;
+        }
+
+        [System.Serializable]
+        private class AnonymousClassFieldSelector : FieldSelector
+        {
+            public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
+            {
+                InitBlock(enclosingInstance);
+            }
+            private void InitBlock(SegmentMerger enclosingInstance)
+            {
+                this.enclosingInstance = enclosingInstance;
+            }
+            private SegmentMerger enclosingInstance;
+            public SegmentMerger Enclosing_Instance
+            {
+                get
+                {
+                    return enclosingInstance;
+                }
+
+            }
+            public FieldSelectorResult Accept(System.String fieldName)
+            {
+                return FieldSelectorResult.LOAD_FOR_MERGE;
+            }
+        }
+
+        /// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
+        /// <throws>  IOException </throws>
+        private void MergeVectors()
+        {
+            TermVectorsWriter termVectorsWriter =
+              new TermVectorsWriter(directory, segment, fieldInfos);
+
+            try
+            {
+                for (int r = 0; r < readers.Count; r++)
+                {
+                    SegmentReader matchingSegmentReader = matchingSegmentReaders[r];
+                    TermVectorsReader matchingVectorsReader;
+                    bool hasMatchingReader;
+                    if (matchingSegmentReader != null)
+                    {
+                        matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig;
+
+                        // If the TV* files are an older format then they
+                        // cannot read raw docs:
+                        if (matchingVectorsReader != null && !matchingVectorsReader.CanReadRawDocs())
+                        {
+                            matchingVectorsReader = null;
+                            hasMatchingReader = false;
+                        }
+                        else
+                            hasMatchingReader = matchingVectorsReader != null;
+
+                    }
+                    else
+                    {
+                        hasMatchingReader = false;
+                        matchingVectorsReader = null;
+                    }
+                    IndexReader reader = (IndexReader)readers[r];
+                    bool hasDeletions = reader.HasDeletions();
+                    int maxDoc = reader.MaxDoc();
+                    for (int docNum = 0; docNum < maxDoc; )
+                    {
+                        // skip deleted docs
+                        if (!hasDeletions || !reader.IsDeleted(docNum))
+                        {
+                            if (hasMatchingReader)
+                            {
+                                // We can optimize this case (doing a bulk
+                                // byte copy) since the field numbers are
+                                // identical
+                                int start = docNum;
+                                int numDocs = 0;
+                                do
+                                {
+                                    docNum++;
+                                    numDocs++;
+                                    if (docNum >= maxDoc)
+                                        break;
+                                    if (hasDeletions && matchingSegmentReader.IsDeleted(docNum))
+                                    {
+                                        docNum++;
+                                        break;
+                                    }
+                                } while (numDocs < MAX_RAW_MERGE_DOCS);
+
+                                matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
+                                termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
+                                if (checkAbort != null)
+                                    checkAbort.Work(300 * numDocs);
+                            }
+                            else
+                            {
+                                // NOTE: it's very important to first assign
+                                // to vectors then pass it to
+                                // termVectorsWriter.addAllDocVectors; see
+                                // LUCENE-1282
+                                TermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
+                                termVectorsWriter.AddAllDocVectors(vectors);
+                                docNum++;
+                                if (checkAbort != null)
+                                    checkAbort.Work(300);
+                            }
+                        }
+                        else
+                            docNum++;
+                    }
+                }
+            }
+            finally
+            {
+                termVectorsWriter.Close();
+            }
+
+            long tvxSize = directory.FileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
+
+            // {{dougsale-2.4.0}
+            // this shouldn't be a problem for us - if it is,
+            // then it's not a JRE bug
+            //if (4 + mergedDocs * 16 != tvxSize)
+            //  // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+            //  // we detect that the bug has struck, here, and
+            //  // throw an exception to prevent the corruption from
+            //  // entering the index.  See LUCENE-1282 for
+            //  // details.
+            //  throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption");
+        }
 		
 		private IndexOutput freqOutput = null;
 		private IndexOutput proxOutput = null;
@@ -458,8 +583,9 @@
 			try
 			{
 				freqOutput = directory.CreateOutput(segment + ".frq");
-				proxOutput = directory.CreateOutput(segment + ".prx");
-				termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
+                if (HasProx())
+                    proxOutput = directory.CreateOutput(segment + ".prx");
+                termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
 				skipInterval = termInfosWriter.skipInterval;
 				maxSkipLevels = termInfosWriter.maxSkipLevels;
 				skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
@@ -483,15 +609,28 @@
 		private void  MergeTermInfos()
 		{
 			int base_Renamed = 0;
-			for (int i = 0; i < readers.Count; i++)
+            int readerCount = readers.Count;
+			for (int i = 0; i < readerCount; i++)
 			{
 				IndexReader reader = (IndexReader) readers[i];
 				TermEnum termEnum = reader.Terms();
 				SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
-				base_Renamed += reader.NumDocs();
+
+                int[] docMap = smi.GetDocMap();
+                if (docMap != null)
+                {
+                    if (docMaps == null)
+                    {
+                        docMaps = new int[readerCount][];
+                        delCounts = new int[readerCount];
+                    }
+                    docMaps[i] = docMap;
+                    delCounts[i] = smi.reader.MaxDoc() - smi.reader.NumDocs();
+                }
+
+                base_Renamed += reader.NumDocs();
 				if (smi.Next())
-					queue.Put(smi);
-				// initialize queue
+					queue.Put(smi);// initialize queue
 				else
 					smi.Close();
 			}
@@ -544,9 +683,22 @@
 		private int MergeTermInfo(SegmentMergeInfo[] smis, int n)
 		{
 			long freqPointer = freqOutput.GetFilePointer();
-			long proxPointer = proxOutput.GetFilePointer();
-			
-			int df = AppendPostings(smis, n); // append posting data
+			long proxPointer;
+            if (proxOutput != null)
+                proxPointer = proxOutput.GetFilePointer();
+            else
+                proxPointer = 0;
+			
+			int df;
+            if (fieldInfos.FieldInfo(smis[0].term.field).omitTf)
+            {
+                // append posting data
+                df = AppendPostingsNoTf(smis, n);
+            }
+            else
+            {
+                df = AppendPostings(smis, n);
+            }
 			
 			long skipPointer = skipListWriter.WriteSkip(freqOutput);
 			
@@ -560,7 +712,17 @@
 			return df;
 		}
 		
-		private byte[] payloadBuffer = null;
+		private byte[] payloadBuffer;
+        private int[][] docMaps;
+        internal int[][] GetDocMaps()
+        {
+            return docMaps;
+        }
+        private int[] delCounts;
+        internal int[] GetDelCounts()
+        {
+            return delCounts;
+        }
 		
 		/// <summary>Process postings from multiple segments all positioned on the
 		/// same term. Writes out merged entries into freqOutput and
@@ -622,7 +784,7 @@
 						freqOutput.WriteVInt(freq); // write frequency in doc
 					}
 					
-					/** See {@link DocumentWriter#writePostings(Posting[], String) for 
+					/** See {@link DocumentWriter#writePostings(Posting[], String)} for 
 					*  documentation about the encoding of positions and payloads
 					*/
 					int lastPosition = 0; // write position deltas
@@ -663,8 +825,56 @@
 			}
 			return df;
 		}
-		
-		private void  MergeNorms()
+
+        /// <summary>
+        /// Process postings from multiple segments without tf, all positioned on the same term.
+        /// Writes out merged entries only into freqOutput, proxOut is not written.
+        /// </summary>
+        /// <param name="smis">smis array of segments</param>
+        /// <param name="n">number of cells in the array actually occupied</param>
+        /// <returns></returns>
+        private int AppendPostingsNoTf(SegmentMergeInfo[] smis, int n)
+        {
+            int lastDoc = 0;
+            int df = 0;           // number of docs w/ term
+            skipListWriter.ResetSkip();
+            int lastPayloadLength = -1;   // ensures that we write the first length
+            for (int i = 0; i < n; i++)
+            {
+                SegmentMergeInfo smi = smis[i];
+                TermPositions postings = smi.GetPositions();
+                System.Diagnostics.Debug.Assert(postings != null);
+                int base_Renamed = smi.base_Renamed;
+                int[] docMap = smi.GetDocMap();
+                postings.Seek(smi.termEnum);
+                while (postings.Next())
+                {
+                    int doc = postings.Doc();
+                    if (docMap != null)
+                        doc = docMap[doc];                      // map around deletions
+                    doc += base_Renamed;                              // convert to merged space
+
+                    if (doc < 0 || (df > 0 && doc <= lastDoc))
+                        throw new CorruptIndexException("docs out of order (" + doc +
+                            " <= " + lastDoc + " )");
+
+                    df++;
+
+                    if ((df % skipInterval) == 0)
+                    {
+                        skipListWriter.SetSkipData(lastDoc, false, lastPayloadLength);
+                        skipListWriter.BufferSkip(df);
+                    }
+
+                    int docCode = (doc - lastDoc);
+                    lastDoc = doc;
+                    freqOutput.WriteVInt(docCode);    // write doc & freq=1
+                }
+            }
+            return df;
+        }
+
+        private void MergeNorms()
 		{
 			byte[] normBuffer = null;
 			IndexOutput output = null;



Mime
View raw message