lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aro...@apache.org
Subject svn commit: r671404 [5/10] - /incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/
Date Wed, 25 Jun 2008 02:52:24 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Index/IndexWriter.cs?rev=671404&r1=671403&r2=671404&view=diff
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Index/IndexWriter.cs Tue Jun 24 19:52:22 2008
@@ -16,63 +16,154 @@
  */
 
 using System;
-using Analyzer = Lucene.Net.Analysis.Analyzer;
+
 using Document = Lucene.Net.Documents.Document;
-using Similarity = Lucene.Net.Search.Similarity;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
 using Directory = Lucene.Net.Store.Directory;
 using FSDirectory = Lucene.Net.Store.FSDirectory;
-using IndexInput = Lucene.Net.Store.IndexInput;
-using IndexOutput = Lucene.Net.Store.IndexOutput;
 using Lock = Lucene.Net.Store.Lock;
-using RAMDirectory = Lucene.Net.Store.RAMDirectory;
+using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
+using BitVector = Lucene.Net.Util.BitVector;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Similarity = Lucene.Net.Search.Similarity;
 
 namespace Lucene.Net.Index
 {
 	
-	/// <summary> An IndexWriter creates and maintains an index.
-	/// 
-	/// <p>
-	/// The third argument (<code>create</code>) to the <a
-	/// href="#IndexWriter(Lucene.Net.Store.Directory,
-	/// Lucene.Net.Analysis.Analyzer, boolean)"><b>constructor</b></a>
+	/// <summary>An <code>IndexWriter</code> creates and maintains an index.
+	/// <p>The <code>create</code> argument to the 
+	/// <a href="#IndexWriter(Lucene.Net.Store.Directory, Lucene.Net.Analysis.Analyzer, boolean)"><b>constructor</b></a>
 	/// determines whether a new index is created, or whether an existing index is
-	/// opened for the addition of new documents. Note that you can open an index
-	/// with create=true even while readers are using the index. The old readers will
-	/// continue to search the "point in time" snapshot they had opened, and won't
-	/// see the newly created index until they re-open.
-	/// </p>
-	/// 
-	/// <p>
-	/// In either case, documents are added with the <a
+	/// opened.  Note that you
+	/// can open an index with <code>create=true</code> even while readers are
+	/// using the index.  The old readers will continue to search
+	/// the "point in time" snapshot they had opened, and won't
+	/// see the newly created index until they re-open.  There are
+	/// also <a href="#IndexWriter(Lucene.Net.Store.Directory, Lucene.Net.Analysis.Analyzer)"><b>constructors</b></a>
+	/// with no <code>create</code> argument which
+	/// will create a new index if there is not already an index at the
+	/// provided path and otherwise open the existing index.</p>
+	/// <p>In either case, documents are added with <a
 	/// href="#addDocument(Lucene.Net.Documents.Document)"><b>addDocument</b></a>
-	/// method. When finished adding documents, <a href="#close()"><b>close</b></a>
-	/// should be called.
-	/// </p>
-	/// 
-	/// <p>
-	/// If an index will not have more documents added for a while and optimal search
+	/// and removed with <a
+	/// href="#deleteDocuments(Lucene.Net.Index.Term)"><b>deleteDocuments</b></a>.
+	/// A document can be updated with <a href="#updateDocument(Lucene.Net.Index.Term, Lucene.Net.Documents.Document)"><b>updateDocument</b></a> 
+	/// (which just deletes and then adds the entire document).
+	/// When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>
+	/// <p>These changes are buffered in memory and periodically
+	/// flushed to the {@link Directory} (during the above method
+	/// calls).  A flush is triggered when there are enough
+	/// buffered deletes (see {@link #setMaxBufferedDeleteTerms})
+	/// or enough added documents since the last flush, whichever
+	/// is sooner.  For the added documents, flushing is triggered
+	/// either by RAM usage of the documents (see {@link
+	/// #setRAMBufferSizeMB}) or the number of added documents.
+	/// The default is to flush when RAM usage hits 16 MB.  For
+	/// best indexing speed you should flush by RAM usage with a
+	/// large RAM buffer.  You can also force a flush by calling
+	/// {@link #flush}.  When a flush occurs, both pending deletes
+	/// and added documents are flushed to the index.  A flush may
+	/// also trigger one or more segment merges which by default
+	/// run with a background thread so as not to block the
+	/// addDocument calls (see <a href="#mergePolicy">below</a>
+	/// for changing the {@link MergeScheduler}).</p>
+	/// <a name="autoCommit"></a>
+	/// <p>The optional <code>autoCommit</code> argument to the
+	/// <a href="#IndexWriter(Lucene.Net.Store.Directory, boolean, Lucene.Net.Analysis.Analyzer)"><b>constructors</b></a>
+	/// controls visibility of the changes to {@link IndexReader} instances reading the same index.
+	/// When this is <code>false</code>, changes are not
+	/// visible until {@link #Close()} is called.
+	/// Note that changes will still be flushed to the
+	/// {@link Lucene.Net.Store.Directory} as new files,
+	/// but are not committed (no new <code>segments_N</code> file
+	/// is written referencing the new files) until {@link #close} is
+	/// called.  If something goes terribly wrong (for example the
+	/// JVM crashes) before {@link #Close()}, then
+	/// the index will reflect none of the changes made (it will
+	/// remain in its starting state).
+	/// You can also call {@link #Abort()}, which closes the writer without committing any
+	/// changes, and removes any index
+	/// files that had been flushed but are now unreferenced.
+	/// This mode is useful for preventing readers from refreshing
+	/// at a bad time (for example after you've done all your
+	/// deletes but before you've done your adds).
+	/// It can also be used to implement simple single-writer
+	/// transactional semantics ("all or none").</p>
+	/// <p>When <code>autoCommit</code> is <code>true</code> then
+	/// every flush is also a commit ({@link IndexReader}
+	/// instances will see each flush as changes to the index).
+	/// This is the default, to match the behavior before 2.2.
+	/// When running in this mode, be careful not to refresh your
+	/// readers while optimize or segment merges are taking place
+	/// as this can tie up substantial disk space.</p>
+	/// </summary>
+	/// <summary><p>Regardless of <code>autoCommit</code>, an {@link
+	/// IndexReader} or {@link Lucene.Net.Search.IndexSearcher} will only see the
+	/// index as of the "point in time" that it was opened.  Any
+	/// changes committed to the index after the reader was opened
+	/// are not visible until the reader is re-opened.</p>
+	/// <p>If an index will not have more documents added for a while and optimal search
 	/// performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
-	/// method should be called before the index is closed.
-	/// </p>
-	/// 
-	/// <p>
-	/// Opening an IndexWriter creates a lock file for the directory in use. Trying
-	/// to open another IndexWriter on the same directory will lead to an
-	/// IOException. The IOException is also thrown if an IndexReader on the same
-	/// directory is used to delete documents from the index.
-	/// </p>
-	/// 
-	/// <p>
-	/// As of <b>2.1</b>, IndexWriter can now delete documents by {@link Term} (see
-	/// {@link #deleteDocuments} ) and update (delete then add) documents (see
-	/// {@link #updateDocument}). Deletes are buffered until {@link
-	/// #setMaxBufferedDeleteTerms} <code>Terms</code> at which point they are
-	/// flushed to the index. Note that a flush occurs when there are enough buffered
-	/// deletes or enough added documents, whichever is sooner. When a flush occurs,
-	/// both pending deletes and added documents are flushed to the index.
-	/// </p>
+	/// method should be called before the index is closed.</p>
+	/// <p>Opening an <code>IndexWriter</code> creates a lock file for the directory in use. Trying to open
+	/// another <code>IndexWriter</code> on the same directory will lead to a
+	/// {@link LockObtainFailedException}. The {@link LockObtainFailedException}
+	/// is also thrown if an IndexReader on the same directory is used to delete documents
+	/// from the index.</p>
+	/// </summary>
+	/// <summary><a name="deletionPolicy"></a>
+	/// <p>Expert: <code>IndexWriter</code> allows an optional
+	/// {@link IndexDeletionPolicy} implementation to be
+	/// specified.  You can use this to control when prior commits
+	/// are deleted from the index.  The default policy is {@link
+	/// KeepOnlyLastCommitDeletionPolicy} which removes all prior
+	/// commits as soon as a new commit is done (this matches
+	/// behavior before 2.2).  Creating your own policy can allow
+	/// you to explicitly keep previous "point in time" commits
+	/// alive in the index for some time, to allow readers to
+	/// refresh to the new commit without having the old commit
+	/// deleted out from under them.  This is necessary on
+	/// filesystems like NFS that do not support "delete on last
+	/// close" semantics, which Lucene's "point in time" search
+	/// normally relies on. </p>
+	/// <a name="mergePolicy"></a> <p>Expert:
+	/// <code>IndexWriter</code> allows you to separately change
+	/// the {@link MergePolicy} and the {@link MergeScheduler}.
+	/// The {@link MergePolicy} is invoked whenever there are
+	/// changes to the segments in the index.  Its role is to
+	/// select which merges to do, if any, and return a {@link
+	/// MergePolicy.MergeSpecification} describing the merges.  It
+	/// also selects merges to do for optimize().  (The default is
+	/// {@link LogByteSizeMergePolicy}.  Then, the {@link
+	/// MergeScheduler} is invoked with the requested merges and
+	/// it decides when and how to run the merges.  The default is
+	/// {@link ConcurrentMergeScheduler}. </p>
 	/// </summary>
 	
+	/*
+	* Clarification: Check Points (and commits)
+	* Being able to set autoCommit=false allows IndexWriter to flush and 
+	* write new index files to the directory without writing a new segments_N
+	* file which references these new files. It also means that the state of 
+	* the in memory SegmentInfos object is different than the most recent
+	* segments_N file written to the directory.
+	* 
+	* Each time the SegmentInfos is changed, and matches the (possibly 
+	* modified) directory files, we have a new "check point". 
+	* If the modified/new SegmentInfos is written to disk - as a new 
+	* (generation of) segments_N file - this check point is also an 
+	* IndexCommitPoint.
+	* 
+	* With autoCommit=true, every checkPoint is also a CommitPoint.
+	* With autoCommit=false, some checkPoints may not be commits.
+	* 
+	* A new checkpoint always replaces the previous checkpoint and 
+	* becomes the new "front" of the index. This allows the IndexFileDeleter 
+	* to delete files that are referenced only by stale checkpoints.
+	* (files that were created since the last commit, but are no longer
+	* referenced by the "front" of the index). For this, IndexFileDeleter 
+	* keeps track of the last non commit checkpoint.
+	*/
 	public class IndexWriter
 	{
 		private void  InitBlock()
@@ -80,32 +171,45 @@
 			similarity = Similarity.GetDefault();
 		}
 		
-		/// <summary> Default value for the write lock timeout (1,000).
-		/// 
-		/// </summary>
-		/// <seealso cref="#setDefaultWriteLockTimeout">
+		/// <summary> Default value for the write lock timeout (1,000).</summary>
+		/// <seealso cref="setDefaultWriteLockTimeout">
 		/// </seealso>
 		public static long WRITE_LOCK_TIMEOUT = 1000;
 		
 		private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
 		
+		/// <summary> Name of the write lock in the index.</summary>
 		public const System.String WRITE_LOCK_NAME = "write.lock";
 		
-		/// <summary> Default value is 10. Change using {@link #SetMergeFactor(int)}.</summary>
-		public const int DEFAULT_MERGE_FACTOR = 10;
+		/// <deprecated>
+		/// </deprecated>
+		/// <seealso cref="LogMergePolicy.DEFAULT_MERGE_FACTOR">
+		/// </seealso>
+		public static readonly int DEFAULT_MERGE_FACTOR;
+		
+		/// <summary> Value to denote a flush trigger is disabled</summary>
+		public const int DISABLE_AUTO_FLUSH = - 1;
 		
-		/// <summary> Default value is 10. Change using {@link #SetMaxBufferedDocs(int)}.</summary>
-		public const int DEFAULT_MAX_BUFFERED_DOCS = 10;
+		/// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+		/// by default). Change using {@link #SetMaxBufferedDocs(int)}.
+		/// </summary>
+		public static readonly int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
 		
-		/// <summary> Default value is 1000. Change using
-		/// {@link #SetMaxBufferedDeleteTerms(int)}.
+		/// <summary> Default value is 16 MB (which means flush when buffered
+		/// docs consume 16 MB RAM).  Change using {@link #setRAMBufferSizeMB}.
 		/// </summary>
-		public const int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000;
+		public const double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
 		
-		/// <summary> Default value is {@link Integer#MAX_VALUE}. Change using
-		/// {@link #SetMaxMergeDocs(int)}.
+		/// <summary> Disabled by default (because IndexWriter flushes by RAM usage
+		/// by default). Change using {@link #SetMaxBufferedDeleteTerms(int)}.
 		/// </summary>
-		public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
+		public static readonly int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
+		
+		/// <deprecated>
+		/// </deprecated>
+		/// <seealso cref="LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS">
+		/// </seealso>
+		public static readonly int DEFAULT_MAX_MERGE_DOCS;
 		
 		/// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
 		public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
@@ -113,319 +217,515 @@
 		/// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
 		public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
 		
-		private Directory directory; // where this index resides
+		/// <summary> Absolute hard maximum length for a term.  If a term
+		/// arrives from the analyzer longer than this length, it
+		/// is skipped and a message is printed to infoStream, if
+		/// set (see {@link #setInfoStream}).
+		/// </summary>
+		public static readonly int MAX_TERM_LENGTH;
+		
+		// The normal read buffer size defaults to 1024, but
+		// increasing this during merging seems to yield
+		// performance gains.  However we don't want to increase
+		// it too much because there are quite a few
+		// BufferedIndexInputs created during merging.  See
+		// LUCENE-888 for details.
+		private const int MERGE_READ_BUFFER_SIZE = 4096;
+		
+		// Used for printing messages
+		private static System.Object MESSAGE_ID_LOCK = new System.Object();
+		private static int MESSAGE_ID = 0;
+		private int messageID = - 1;
 		
+		private Directory directory; // where this index resides
 		private Analyzer analyzer; // how to analyze text
 		
-		private Similarity similarity; // how to
-		// normalize
+		private Similarity similarity; // how to normalize
 		
-		private bool inTransaction = false; // true iff we are in a transaction
+		private bool commitPending; // true if segmentInfos has changes not yet committed
+		private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
 		
-		private bool commitPending; // true if segmentInfos has changes not yet
-		// committed
+		private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
+		private bool localAutoCommit; // saved autoCommit during local transaction
+		private bool autoCommit = true; // false if we should commit only on close
 		
-		private System.Collections.Hashtable protectedSegments; // segment names that should not be
-		// deleted until commit
+		private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+		private DocumentsWriter docWriter;
+		private IndexFileDeleter deleter;
 		
-		private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback
-		// to if the commit fails
+		private System.Collections.Hashtable segmentsToOptimize = new System.Collections.Hashtable(); // used by optimize to note those needing optimization
 		
-		internal SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+		private Lock writeLock;
 		
-		internal SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in
-		// ramDirectory
+		private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
 		
-		private RAMDirectory ramDirectory = new RAMDirectory(); // for temp
-		// segs
+		private bool closeDir;
+		private bool closed;
+		private bool closing;
 		
-		private IndexFileDeleter deleter;
+		// Holds all SegmentInfo instances currently involved in
+		// merges
+		private System.Collections.Hashtable mergingSegments = new System.Collections.Hashtable();
 		
-		private Lock writeLock;
+		private MergePolicy mergePolicy = new LogByteSizeMergePolicy();
+		private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
+		private System.Collections.ArrayList pendingMerges = new System.Collections.ArrayList();
+		private System.Collections.Hashtable runningMerges = new System.Collections.Hashtable();
+		private System.Collections.IList mergeExceptions = new System.Collections.ArrayList();
+		private long mergeGen;
+		private bool stopMerges;
 		
-		private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+		/// <summary> Used internally to throw an {@link
+		/// AlreadyClosedException} if this IndexWriter has been
+		/// closed.
+		/// </summary>
+		/// <throws>  AlreadyClosedException if this IndexWriter is </throws>
+		protected internal void  EnsureOpen()
+		{
+			if (closed)
+			{
+				throw new AlreadyClosedException("this IndexWriter is closed");
+			}
+		}
 		
-		// The max number of delete terms that can be buffered before
-		// they must be flushed to disk.
-		private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
-		
-		// This Hashmap buffers delete terms in ram before they are applied.
-		// The key is delete term; the value is number of ram
-		// segments the term applies to.
-		private System.Collections.Hashtable bufferedDeleteTerms = new System.Collections.Hashtable();
-		
-		private int numBufferedDeleteTerms = 0;
-		
-		/// <summary> Use compound file setting. Defaults to true, minimizing the number of
-		/// files used. Setting this to false may improve indexing performance, but
-		/// may also cause file handle problems.
+		/// <summary> Prints a message to the infoStream (if non-null),
+		/// prefixed with the identifying information for this
+		/// writer and the thread that's calling it.
 		/// </summary>
-		private bool useCompoundFile = true;
+		public virtual void  Message(System.String message)
+		{
+			if (infoStream != null)
+				infoStream.WriteLine("IW " + messageID + " [" + SupportClass.ThreadClass.Current().Name + "]: " + message);
+		}
 		
-		private bool closeDir;
+		private void  SetMessageID()
+		{
+			lock (this)
+			{
+				if (infoStream != null && messageID == - 1)
+				{
+					lock (MESSAGE_ID_LOCK)
+					{
+						messageID = MESSAGE_ID++;
+					}
+				}
+			}
+		}
 		
-		/// <summary> Get the current setting of whether to use the compound file format. Note
-		/// that this just returns the value you set with setUseCompoundFile(boolean)
-		/// or the default. You cannot use this to query the status of an existing
-		/// index.
+		/// <summary> Casts current mergePolicy to LogMergePolicy, and throws
+		/// an exception if the mergePolicy is not a LogMergePolicy.
+		/// </summary>
+		private LogMergePolicy GetLogMergePolicy()
+		{
+			if (mergePolicy is LogMergePolicy)
+				return (LogMergePolicy) mergePolicy;
+			else
+				throw new System.ArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
+		}
+		
+		/// <summary><p>Get the current setting of whether newly flushed
+		/// segments will use the compound file format.  Note that
+		/// this just returns the value previously set with
+		/// setUseCompoundFile(boolean), or the default value
+		/// (true).  You cannot use this to query the status of
+		/// previously flushed segments.</p>
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.getUseCompoundFile as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
 		/// 
 		/// </summary>
-		/// <seealso cref="#SetUseCompoundFile(boolean)">
+		/// <seealso cref="SetUseCompoundFile(boolean)">
 		/// </seealso>
 		public virtual bool GetUseCompoundFile()
 		{
-			return useCompoundFile;
+			return GetLogMergePolicy().GetUseCompoundFile();
 		}
 		
-		/// <summary> Setting to turn on usage of a compound file. When on, multiple files for
-		/// each segment are merged into a single file once the segment creation is
-		/// finished. This is done regardless of what directory is in use.
+		/// <summary><p>Setting to turn on usage of a compound file. When on,
+		/// multiple files for each segment are merged into a
+		/// single file when a new segment is flushed.</p>
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.setUseCompoundFile as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
 		/// </summary>
 		public virtual void  SetUseCompoundFile(bool value_Renamed)
 		{
-			useCompoundFile = value_Renamed;
+			GetLogMergePolicy().SetUseCompoundFile(value_Renamed);
+			GetLogMergePolicy().SetUseCompoundDocStore(value_Renamed);
 		}
 		
-		/// <summary> Expert: Set the Similarity implementation used by this IndexWriter.
+		/// <summary>Expert: Set the Similarity implementation used by this IndexWriter.
 		/// 
 		/// </summary>
-		/// <seealso cref="Similarity#SetDefault(Similarity)">
+		/// <seealso cref="Similarity.SetDefault(Similarity)">
 		/// </seealso>
 		public virtual void  SetSimilarity(Similarity similarity)
 		{
+			EnsureOpen();
 			this.similarity = similarity;
 		}
 		
-		/// <summary> Expert: Return the Similarity implementation used by this IndexWriter.
+		/// <summary>Expert: Return the Similarity implementation used by this IndexWriter.
 		/// 
-		/// <p>
-		/// This defaults to the current value of {@link Similarity#GetDefault()}.
+		/// <p>This defaults to the current value of {@link Similarity#GetDefault()}.
 		/// </summary>
 		public virtual Similarity GetSimilarity()
 		{
+			EnsureOpen();
 			return this.similarity;
 		}
 		
-		/// <summary> Expert: Set the interval between indexed terms. Large values cause less
-		/// memory to be used by IndexReader, but slow random-access to terms. Small
+		/// <summary>Expert: Set the interval between indexed terms.  Large values cause less
+		/// memory to be used by IndexReader, but slow random-access to terms.  Small
 		/// values cause more memory to be used by an IndexReader, and speed
 		/// random-access to terms.
 		/// 
 		/// This parameter determines the amount of computation required per query
-		/// term, regardless of the number of documents that contain that term. In
-		/// particular, it is the maximum number of other terms that must be scanned
-		/// before a term is located and its frequency and position information may
-		/// be processed. In a large index with user-entered query terms, query
+		/// term, regardless of the number of documents that contain that term.  In
+		/// particular, it is the maximum number of other terms that must be
+		/// scanned before a term is located and its frequency and position information
+		/// may be processed.  In a large index with user-entered query terms, query
 		/// processing time is likely to be dominated not by term lookup but rather
-		/// by the processing of frequency and positional data. In a small index or
-		/// when many uncommon query terms are generated (e.g., by wildcard queries)
-		/// term lookup may become a dominant cost.
+		/// by the processing of frequency and positional data.  In a small index
+		/// or when many uncommon query terms are generated (e.g., by wildcard
+		/// queries) term lookup may become a dominant cost.
 		/// 
 		/// In particular, <code>numUniqueTerms/interval</code> terms are read into
-		/// memory by an IndexReader, and, on average, <code>interval/2</code>
-		/// terms must be scanned for each random term access.
+		/// memory by an IndexReader, and, on average, <code>interval/2</code> terms
+		/// must be scanned for each random term access.
 		/// 
 		/// </summary>
-		/// <seealso cref="#DEFAULT_TERM_INDEX_INTERVAL">
+		/// <seealso cref="DEFAULT_TERM_INDEX_INTERVAL">
 		/// </seealso>
 		public virtual void  SetTermIndexInterval(int interval)
 		{
+			EnsureOpen();
 			this.termIndexInterval = interval;
 		}
 		
-		/// <summary> Expert: Return the interval between indexed terms.
+		/// <summary>Expert: Return the interval between indexed terms.
 		/// 
 		/// </summary>
-		/// <seealso cref="#SetTermIndexInterval(int)">
+		/// <seealso cref="SetTermIndexInterval(int)">
 		/// </seealso>
 		public virtual int GetTermIndexInterval()
 		{
+			EnsureOpen();
 			return termIndexInterval;
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>path</code>. Text
-		/// will be analyzed with <code>a</code>. If <code>create</code> is
-		/// true, then a new, empty index will be created in <code>path</code>,
-		/// replacing the index already there, if any.
+		/// <summary> Constructs an IndexWriter for the index in <code>path</code>.
+		/// Text will be analyzed with <code>a</code>.  If <code>create</code>
+		/// is true, then a new, empty index will be created in
+		/// <code>path</code>, replacing the index already there, if any.
 		/// 
 		/// </summary>
-		/// <param name="">path
-		/// the path to the index directory
+		/// <param name="path">the path to the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <param name="">create
-		/// <code>true</code> to create the index or overwrite the
-		/// existing one; <code>false</code> to append to the existing
+		/// <param name="create"><code>true</code> to create the index or overwrite
+		/// the existing one; <code>false</code> to append to the existing
 		/// index
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be read/written to, or if it does not
-		/// exist, and <code>create</code> is <code>false</code>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be read/written to, or </throws>
+		/// <summary>  if it does not exist and <code>create</code> is
+		/// <code>false</code> or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(System.String path, Analyzer a, bool create)
 		{
 			InitBlock();
-			Init(path, a, create);
+			Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>path</code>. Text
-		/// will be analyzed with <code>a</code>. If <code>create</code> is
-		/// true, then a new, empty index will be created in <code>path</code>,
-		/// replacing the index already there, if any.
+		/// <summary> Constructs an IndexWriter for the index in <code>path</code>.
+		/// Text will be analyzed with <code>a</code>.  If <code>create</code>
+		/// is true, then a new, empty index will be created in
+		/// <code>path</code>, replacing the index already there, if any.
 		/// 
 		/// </summary>
-		/// <param name="">path
-		/// the path to the index directory
+		/// <param name="path">the path to the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <param name="">create
-		/// <code>true</code> to create the index or overwrite the
-		/// existing one; <code>false</code> to append to the existing
+		/// <param name="create"><code>true</code> to create the index or overwrite
+		/// the existing one; <code>false</code> to append to the existing
 		/// index
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be read/written to, or if it does not
-		/// exist, and <code>create</code> is <code>false</code>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be read/written to, or </throws>
+		/// <summary>  if it does not exist and <code>create</code> is
+		/// <code>false</code> or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create)
 		{
 			InitBlock();
-			Init(path, a, create);
+			Init(FSDirectory.GetDirectory(path), a, create, true, null, true);
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>d</code>. Text will
-		/// be analyzed with <code>a</code>. If <code>create</code> is true,
-		/// then a new, empty index will be created in <code>d</code>, replacing
-		/// the index already there, if any.
+		/// <summary> Constructs an IndexWriter for the index in <code>d</code>.
+		/// Text will be analyzed with <code>a</code>.  If <code>create</code>
+		/// is true, then a new, empty index will be created in
+		/// <code>d</code>, replacing the index already there, if any.
 		/// 
 		/// </summary>
-		/// <param name="">d
-		/// the index directory
+		/// <param name="d">the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <param name="">create
-		/// <code>true</code> to create the index or overwrite the
-		/// existing one; <code>false</code> to append to the existing
+		/// <param name="create"><code>true</code> to create the index or overwrite
+		/// the existing one; <code>false</code> to append to the existing
 		/// index
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be read/written to, or if it does not
-		/// exist, and <code>create</code> is <code>false</code>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be read/written to, or </throws>
+		/// <summary>  if it does not exist and <code>create</code> is
+		/// <code>false</code> or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(Directory d, Analyzer a, bool create)
 		{
 			InitBlock();
-			Init(d, a, create, false);
+			Init(d, a, create, false, null, true);
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>path</code>, creating
-		/// it first if it does not already exist, otherwise appending to the
-		/// existing index. Text will be analyzed with <code>a</code>.
+		/// <summary> Constructs an IndexWriter for the index in
+		/// <code>path</code>, first creating it if it does not
+		/// already exist.  Text will be analyzed with
+		/// <code>a</code>.
 		/// 
 		/// </summary>
-		/// <param name="">path
-		/// the path to the index directory
+		/// <param name="path">the path to the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be created or read/written to
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be </throws>
+		/// <summary>  read/written to or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(System.String path, Analyzer a)
 		{
 			InitBlock();
-			if (IndexReader.IndexExists(path))
-			{
-				Init(path, a, false);
-			}
-			else
-			{
-				Init(path, a, true);
-			}
+			Init(FSDirectory.GetDirectory(path), a, true, null, true);
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>path</code>, creating
-		/// it first if it does not already exist, otherwise appending to the
-		/// existing index. Text will be analyzed with <code>a</code>.
+		/// <summary> Constructs an IndexWriter for the index in
+		/// <code>path</code>, first creating it if it does not
+		/// already exist.  Text will be analyzed with
+		/// <code>a</code>.
 		/// 
 		/// </summary>
-		/// <param name="">path
-		/// the path to the index directory
+		/// <param name="path">the path to the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be created or read/written to
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be </throws>
+		/// <summary>  read/written to or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(System.IO.FileInfo path, Analyzer a)
 		{
 			InitBlock();
-			if (IndexReader.IndexExists(path))
-			{
-				Init(path, a, false);
-			}
-			else
-			{
-				Init(path, a, true);
-			}
+			Init(FSDirectory.GetDirectory(path), a, true, null, true);
 		}
 		
-		/// <summary> Constructs an IndexWriter for the index in <code>d</code>, creating it
-		/// first if it does not already exist, otherwise appending to the existing
-		/// index. Text will be analyzed with <code>a</code>.
+		/// <summary> Constructs an IndexWriter for the index in
+		/// <code>d</code>, first creating it if it does not
+		/// already exist.  Text will be analyzed with
+		/// <code>a</code>.
 		/// 
 		/// </summary>
-		/// <param name="">d
-		/// the index directory
+		/// <param name="d">the index directory
 		/// </param>
-		/// <param name="">a
-		/// the analyzer to use
+		/// <param name="a">the analyzer to use
 		/// </param>
-		/// <throws>  IOException </throws>
-		/// <summary>             if the directory cannot be created or read/written to
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be </throws>
+		/// <summary>  read/written to or if there is any other low-level
+		/// IO error
 		/// </summary>
 		public IndexWriter(Directory d, Analyzer a)
 		{
 			InitBlock();
-			if (IndexReader.IndexExists(d))
-			{
-				Init(d, a, false, false);
-			}
-			else
-			{
-				Init(d, a, true, false);
-			}
+			Init(d, a, false, null, true);
+		}
+		
+		/// <summary> Constructs an IndexWriter for the index in
+		/// <code>d</code>, first creating it if it does not
+		/// already exist.  Text will be analyzed with
+		/// <code>a</code>.
+		/// 
+		/// </summary>
+		/// <param name="d">the index directory
+		/// </param>
+		/// <param name="autoCommit">see <a href="#autoCommit">above</a>
+		/// </param>
+		/// <param name="a">the analyzer to use
+		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be </throws>
+		/// <summary>  read/written to or if there is any other low-level
+		/// IO error
+		/// </summary>
+		public IndexWriter(Directory d, bool autoCommit, Analyzer a)
+		{
+			InitBlock();
+			Init(d, a, false, null, autoCommit);
+		}
+		
+		/// <summary> Constructs an IndexWriter for the index in <code>d</code>.
+		/// Text will be analyzed with <code>a</code>.  If <code>create</code>
+		/// is true, then a new, empty index will be created in
+		/// <code>d</code>, replacing the index already there, if any.
+		/// 
+		/// </summary>
+		/// <param name="d">the index directory
+		/// </param>
+		/// <param name="autoCommit">see <a href="#autoCommit">above</a>
+		/// </param>
+		/// <param name="a">the analyzer to use
+		/// </param>
+		/// <param name="create"><code>true</code> to create the index or overwrite
+		/// the existing one; <code>false</code> to append to the existing
+		/// index
+		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be read/written to, or </throws>
+		/// <summary>  if it does not exist and <code>create</code> is
+		/// <code>false</code> or if there is any other low-level
+		/// IO error
+		/// </summary>
+		public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create)
+		{
+			InitBlock();
+			Init(d, a, create, false, null, autoCommit);
 		}
 		
-		private IndexWriter(Directory d, Analyzer a, bool create, bool closeDir)
+		/// <summary> Expert: constructs an IndexWriter with a custom {@link
+		/// IndexDeletionPolicy}, for the index in <code>d</code>,
+		/// first creating it if it does not already exist.  Text
+		/// will be analyzed with <code>a</code>.
+		/// 
+		/// </summary>
+		/// <param name="d">the index directory
+		/// </param>
+		/// <param name="autoCommit">see <a href="#autoCommit">above</a>
+		/// </param>
+		/// <param name="a">the analyzer to use
+		/// </param>
+		/// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be </throws>
+		/// <summary>  read/written to or if there is any other low-level
+		/// IO error
+		/// </summary>
+		public IndexWriter(Directory d, bool autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
 		{
 			InitBlock();
-			Init(d, a, create, closeDir);
+			Init(d, a, false, deletionPolicy, autoCommit);
 		}
 		
-		private void  Init(System.String path, Analyzer a, bool create)
+		/// <summary> Expert: constructs an IndexWriter with a custom {@link
+		/// IndexDeletionPolicy}, for the index in <code>d</code>.
+		/// Text will be analyzed with <code>a</code>.  If
+		/// <code>create</code> is true, then a new, empty index
+		/// will be created in <code>d</code>, replacing the index
+		/// already there, if any.
+		/// 
+		/// </summary>
+		/// <param name="d">the index directory
+		/// </param>
+		/// <param name="autoCommit">see <a href="#autoCommit">above</a>
+		/// </param>
+		/// <param name="a">the analyzer to use
+		/// </param>
+		/// <param name="create"><code>true</code> to create the index or overwrite
+		/// the existing one; <code>false</code> to append to the existing
+		/// index
+		/// </param>
+		/// <param name="deletionPolicy">see <a href="#deletionPolicy">above</a>
+		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  LockObtainFailedException if another writer </throws>
+		/// <summary>  has this index open (<code>write.lock</code> could not
+		/// be obtained)
+		/// </summary>
+		/// <throws>  IOException if the directory cannot be read/written to, or </throws>
+		/// <summary>  if it does not exist and <code>create</code> is
+		/// <code>false</code> or if there is any other low-level
+		/// IO error
+		/// </summary>
+		public IndexWriter(Directory d, bool autoCommit, Analyzer a, bool create, IndexDeletionPolicy deletionPolicy)
 		{
-			Init(FSDirectory.GetDirectory(path), a, create, true);
+			InitBlock();
+			Init(d, a, create, false, deletionPolicy, autoCommit);
 		}
 		
-		private void  Init(System.IO.FileInfo path, Analyzer a, bool create)
+		private void  Init(Directory d, Analyzer a, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
 		{
-			Init(FSDirectory.GetDirectory(path), a, create, true);
+			if (IndexReader.IndexExists(d))
+			{
+				Init(d, a, false, closeDir, deletionPolicy, autoCommit);
+			}
+			else
+			{
+				Init(d, a, true, closeDir, deletionPolicy, autoCommit);
+			}
 		}
 		
-		private void  Init(Directory d, Analyzer a, bool create, bool closeDir)
+		private void  Init(Directory d, Analyzer a, bool create, bool closeDir, IndexDeletionPolicy deletionPolicy, bool autoCommit)
 		{
 			this.closeDir = closeDir;
 			directory = d;
 			analyzer = a;
+			this.infoStream = defaultInfoStream;
+			SetMessageID();
 			
 			if (create)
 			{
@@ -437,7 +737,7 @@
 			if (!writeLock.Obtain(writeLockTimeout))
 			// obtain write lock
 			{
-				throw new System.IO.IOException("Index locked for write: " + writeLock);
+				throw new LockObtainFailedException("Index locked for write: " + writeLock);
 			}
 			this.writeLock = writeLock; // save it
 			
@@ -445,9 +745,9 @@
 			{
 				if (create)
 				{
-					// Try to read first. This is to allow create
+					// Try to read first.  This is to allow create
 					// against an index that's currently open for
-					// searching. In this case we write the next
+					// searching.  In this case we write the next
 					// segments_N file with no segments:
 					try
 					{
@@ -465,12 +765,26 @@
 					segmentInfos.Read(directory);
 				}
 				
-				// Create a deleter to keep track of which files can
-				// be deleted:
-				deleter = new IndexFileDeleter(segmentInfos, directory);
-				deleter.SetInfoStream(infoStream);
-				deleter.FindDeletableFiles();
-				deleter.DeleteFiles();
+				this.autoCommit = autoCommit;
+				if (!autoCommit)
+				{
+					rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
+				}
+				
+				docWriter = new DocumentsWriter(directory, this);
+				docWriter.SetInfoStream(infoStream);
+				
+				// Default deleter (for backwards compatibility) is
+				// KeepOnlyLastCommitDeleter:
+				deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter);
+				
+				PushMaxBufferedDocs();
+				
+				if (infoStream != null)
+				{
+					Message("init: create=" + create);
+					MessageState();
+				}
 			}
 			catch (System.IO.IOException e)
 			{
@@ -480,197 +794,412 @@
 			}
 		}
 		
-		/// <summary> Determines the largest number of documents ever merged by addDocument().
-		/// Small values (e.g., less than 10,000) are best for interactive indexing,
-		/// as this limits the length of pauses while indexing to a few seconds.
-		/// Larger values are best for batched indexing and speedier searches.
-		/// 
-		/// <p>
-		/// The default value is {@link Integer#MAX_VALUE}.
+		/// <summary> Expert: set the merge policy used by this writer.</summary>
+		public virtual void  SetMergePolicy(MergePolicy mp)
+		{
+			EnsureOpen();
+			if (mp == null)
+				throw new System.NullReferenceException("MergePolicy must be non-null");
+			
+			if (mergePolicy != mp)
+				mergePolicy.Close();
+			mergePolicy = mp;
+			PushMaxBufferedDocs();
+			if (infoStream != null)
+			{
+				Message("setMergePolicy " + mp);
+			}
+		}
+		
+		/// <summary> Expert: returns the current MergePolicy in use by this writer.</summary>
+		/// <seealso cref="setMergePolicy">
+		/// </seealso>
+		public virtual MergePolicy GetMergePolicy()
+		{
+			EnsureOpen();
+			return mergePolicy;
+		}
+		
+		/// <summary> Expert: set the merge scheduler used by this writer.</summary>
+		public virtual void  SetMergeScheduler(MergeScheduler mergeScheduler)
+		{
+			EnsureOpen();
+			if (mergeScheduler == null)
+				throw new System.NullReferenceException("MergeScheduler must be non-null");
+			
+			if (this.mergeScheduler != mergeScheduler)
+			{
+				FinishMerges(true);
+				this.mergeScheduler.Close();
+			}
+			this.mergeScheduler = mergeScheduler;
+			if (infoStream != null)
+			{
+				Message("setMergeScheduler " + mergeScheduler);
+			}
+		}
+		
+		/// <summary> Expert: returns the current MergePolicy in use by this
+		/// writer.
+		/// </summary>
+		/// <seealso cref="setMergePolicy">
+		/// </seealso>
+		public virtual MergeScheduler GetMergeScheduler()
+		{
+			EnsureOpen();
+			return mergeScheduler;
+		}
+		
+		/// <summary><p>Determines the largest segment (measured by
+		/// document count) that may be merged with other segments.
+		/// Small values (e.g., less than 10,000) are best for
+		/// interactive indexing, as this limits the length of
+		/// pauses while indexing to a few seconds.  Larger values
+		/// are best for batched indexing and speedier
+		/// searches.</p>
+		/// 
+		/// <p>The default value is {@link Integer#MAX_VALUE}.</p>
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.setMaxMergeDocs as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
+		/// 
+		/// <p>The default merge policy ({@link
+		/// LogByteSizeMergePolicy}) also allows you to set this
+		/// limit by net size (in MB) of the segment, using {@link
+		/// LogByteSizeMergePolicy#setMaxMergeMB}.</p>
 		/// </summary>
 		public virtual void  SetMaxMergeDocs(int maxMergeDocs)
 		{
-			this.maxMergeDocs = maxMergeDocs;
+			GetLogMergePolicy().SetMaxMergeDocs(maxMergeDocs);
 		}
 		
-		/// <seealso cref="#setMaxMergeDocs">
+		/// <summary> <p>Returns the largest segment (measured by document
+		/// count) that may be merged with other segments.</p>
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.getMaxMergeDocs as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
+		/// 
+		/// </summary>
+		/// <seealso cref="setMaxMergeDocs">
 		/// </seealso>
 		public virtual int GetMaxMergeDocs()
 		{
-			return maxMergeDocs;
+			return GetLogMergePolicy().GetMaxMergeDocs();
 		}
 		
 		/// <summary> The maximum number of terms that will be indexed for a single field in a
-		/// document. This limits the amount of memory required for indexing, so that
+		/// document.  This limits the amount of memory required for indexing, so that
 		/// collections with very large files will not crash the indexing process by
-		/// running out of memory.<p/> Note that this effectively truncates large
-		/// documents, excluding from the index terms that occur further in the
-		/// document. If you know your source documents are large, be sure to set
-		/// this value high enough to accomodate the expected size. If you set it to
-		/// Integer.MAX_VALUE, then the only limit is your memory, but you should
-		/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
-		/// terms will be indexed for a field.
+		/// running out of memory.  This setting refers to the number of running terms,
+		/// not to the number of different terms.<p/>
+		/// <strong>Note:</strong> this silently truncates large documents, excluding from the
+		/// index all terms that occur further in the document.  If you know your source
+		/// documents are large, be sure to set this value high enough to accomodate
+		/// the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
+		/// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+		/// By default, no more than 10,000 terms will be indexed for a field.
 		/// </summary>
 		public virtual void  SetMaxFieldLength(int maxFieldLength)
 		{
+			EnsureOpen();
 			this.maxFieldLength = maxFieldLength;
+			if (infoStream != null)
+				Message("setMaxFieldLength " + maxFieldLength);
 		}
 		
-		/// <seealso cref="#setMaxFieldLength">
+		/// <summary> Returns the maximum number of terms that will be
+		/// indexed for a single field in a document.
+		/// </summary>
+		/// <seealso cref="setMaxFieldLength">
 		/// </seealso>
 		public virtual int GetMaxFieldLength()
 		{
+			EnsureOpen();
 			return maxFieldLength;
 		}
 		
-		/// <summary> Determines the minimal number of documents required before the buffered
-		/// in-memory documents are merged and a new Segment is created. Since
-		/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
-		/// large value gives faster indexing. At the same time, mergeFactor limits
-		/// the number of files open in a FSDirectory.
-		/// 
-		/// <p>
-		/// The default value is 10.
-		/// 
-		/// </summary>
-		/// <throws>  IllegalArgumentException </throws>
-		/// <summary>             if maxBufferedDocs is smaller than 2
+		/// <summary>Determines the minimal number of documents required
+		/// before the buffered in-memory documents are flushed as
+		/// a new Segment.  Large values generally gives faster
+		/// indexing.
+		/// 
+		/// <p>When this is set, the writer will flush every
+		/// maxBufferedDocs added documents.  Pass in {@link
+		/// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
+		/// to number of buffered documents.  Note that if flushing
+		/// by RAM usage is also enabled, then the flush will be
+		/// triggered by whichever comes first.</p>
+		/// 
+		/// <p>Disabled by default (writer flushes by RAM usage).</p>
+		/// 
+		/// </summary>
+		/// <throws>  IllegalArgumentException if maxBufferedDocs is </throws>
+		/// <summary> enabled but smaller than 2, or it disables maxBufferedDocs
+		/// when ramBufferSize is already disabled
 		/// </summary>
+		/// <seealso cref="setRAMBufferSizeMB">
+		/// </seealso>
 		public virtual void  SetMaxBufferedDocs(int maxBufferedDocs)
 		{
-			if (maxBufferedDocs < 2)
-				throw new System.ArgumentException("maxBufferedDocs must at least be 2");
-			this.minMergeDocs = maxBufferedDocs;
+			EnsureOpen();
+			if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
+				throw new System.ArgumentException("maxBufferedDocs must at least be 2 when enabled");
+			if (maxBufferedDocs == DISABLE_AUTO_FLUSH && GetRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)
+				throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
+			docWriter.SetMaxBufferedDocs(maxBufferedDocs);
+			PushMaxBufferedDocs();
+			if (infoStream != null)
+				Message("setMaxBufferedDocs " + maxBufferedDocs);
+		}
+		
+		/// <summary> If we are flushing by doc count (not by RAM usage), and
+		/// using LogDocMergePolicy then push maxBufferedDocs down
+		/// as its minMergeDocs, to keep backwards compatibility.
+		/// </summary>
+		private void  PushMaxBufferedDocs()
+		{
+			if (docWriter.GetMaxBufferedDocs() != DISABLE_AUTO_FLUSH)
+			{
+				MergePolicy mp = mergePolicy;
+				if (mp is LogDocMergePolicy)
+				{
+					LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
+					int maxBufferedDocs = docWriter.GetMaxBufferedDocs();
+					if (lmp.GetMinMergeDocs() != maxBufferedDocs)
+					{
+						if (infoStream != null)
+							Message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");
+						lmp.SetMinMergeDocs(maxBufferedDocs);
+					}
+				}
+			}
 		}
 		
-		/// <seealso cref="#setMaxBufferedDocs">
+		/// <summary> Returns the number of buffered added documents that will
+		/// trigger a flush if enabled.
+		/// </summary>
+		/// <seealso cref="setMaxBufferedDocs">
 		/// </seealso>
 		public virtual int GetMaxBufferedDocs()
 		{
-			return minMergeDocs;
+			EnsureOpen();
+			return docWriter.GetMaxBufferedDocs();
 		}
 		
-		/// <summary> <p>
-		/// Determines the minimal number of delete terms required before the
-		/// buffered in-memory delete terms are applied and flushed. If there are
-		/// documents buffered in memory at the time, they are merged and a new
-		/// segment is created.
-		/// </p>
+		/// <summary>Determines the amount of RAM that may be used for
+		/// buffering added documents before they are flushed as a
+		/// new Segment.  Generally for faster indexing performance
+		/// it's best to flush by RAM usage instead of document
+		/// count and use as large a RAM buffer as you can.
 		/// 
-		/// <p>
-		/// The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.
+		/// <p>When this is set, the writer will flush whenever
+		/// buffered documents use this much RAM.  Pass in {@link
+		/// #DISABLE_AUTO_FLUSH} to prevent triggering a flush due
+		/// to RAM usage.  Note that if flushing by document count
+		/// is also enabled, then the flush will be triggered by
+		/// whichever comes first.</p>
+		/// 
+		/// <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
 		/// 
 		/// </summary>
-		/// <throws>  IllegalArgumentException </throws>
-		/// <summary>             if maxBufferedDeleteTerms is smaller than 1
-		/// </p>
+		/// <throws>  IllegalArgumentException if ramBufferSize is </throws>
+		/// <summary> enabled but non-positive, or it disables ramBufferSize
+		/// when maxBufferedDocs is already disabled
+		/// </summary>
+		public virtual void  SetRAMBufferSizeMB(double mb)
+		{
+			if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)
+				throw new System.ArgumentException("ramBufferSize should be > 0.0 MB when enabled");
+			if (mb == DISABLE_AUTO_FLUSH && GetMaxBufferedDocs() == DISABLE_AUTO_FLUSH)
+				throw new System.ArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
+			docWriter.SetRAMBufferSizeMB(mb);
+			if (infoStream != null)
+				Message("setRAMBufferSizeMB " + mb);
+		}
+		
+		/// <summary> Returns the value set by {@link #setRAMBufferSizeMB} if enabled.</summary>
+		public virtual double GetRAMBufferSizeMB()
+		{
+			return docWriter.GetRAMBufferSizeMB();
+		}
+		
+		/// <summary> <p>Determines the minimal number of delete terms required before the buffered
+		/// in-memory delete terms are applied and flushed. If there are documents
+		/// buffered in memory at the time, they are merged and a new segment is
+		/// created.</p>
+		/// <p>Disabled by default (writer flushes by RAM usage).</p>
+		/// 
+		/// </summary>
+		/// <throws>  IllegalArgumentException if maxBufferedDeleteTerms </throws>
+		/// <summary> is enabled but smaller than 1
 		/// </summary>
+		/// <seealso cref="setRAMBufferSizeMB">
+		/// </seealso>
 		public virtual void  SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
 		{
-			if (maxBufferedDeleteTerms < 1)
-				throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1");
-			this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
+			EnsureOpen();
+			if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1)
+				throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
+			docWriter.SetMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
+			if (infoStream != null)
+				Message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms);
 		}
 		
-		/// <seealso cref="#setMaxBufferedDeleteTerms">
+		/// <summary> Returns the number of buffered deleted terms that will
+		/// trigger a flush if enabled.
+		/// </summary>
+		/// <seealso cref="setMaxBufferedDeleteTerms">
 		/// </seealso>
 		public virtual int GetMaxBufferedDeleteTerms()
 		{
-			return maxBufferedDeleteTerms;
+			EnsureOpen();
+			return docWriter.GetMaxBufferedDeleteTerms();
 		}
 		
-		/// <summary> Determines how often segment indices are merged by addDocument(). With
+		/// <summary>Determines how often segment indices are merged by addDocument().  With
 		/// smaller values, less RAM is used while indexing, and searches on
-		/// unoptimized indices are faster, but indexing speed is slower. With larger
-		/// values, more RAM is used during indexing, and while searches on
-		/// unoptimized indices are slower, indexing is faster. Thus larger values (>
-		/// 10) are best for batch index creation, and smaller values (< 10) for
-		/// indices that are interactively maintained.
+		/// unoptimized indices are faster, but indexing speed is slower.  With larger
+		/// values, more RAM is used during indexing, and while searches on unoptimized
+		/// indices are slower, indexing is faster.  Thus larger values (> 10) are best
+		/// for batch index creation, and smaller values (< 10) for indices that are
+		/// interactively maintained.
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.setMergeFactor as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
 		/// 
-		/// <p>
-		/// This must never be less than 2. The default value is 10.
+		/// <p>This must never be less than 2.  The default value is 10.
 		/// </summary>
 		public virtual void  SetMergeFactor(int mergeFactor)
 		{
-			if (mergeFactor < 2)
-				throw new System.ArgumentException("mergeFactor cannot be less than 2");
-			this.mergeFactor = mergeFactor;
+			GetLogMergePolicy().SetMergeFactor(mergeFactor);
 		}
 		
-		/// <seealso cref="#setMergeFactor">
+		/// <summary> <p>Returns the number of segments that are merged at
+		/// once and also controls the total number of segments
+		/// allowed to accumulate in the index.</p>
+		/// 
+		/// <p>Note that this method is a convenience method: it
+		/// just calls mergePolicy.getMergeFactor as long as
+		/// mergePolicy is an instance of {@link LogMergePolicy}.
+		/// Otherwise an IllegalArgumentException is thrown.</p>
+		/// 
+		/// </summary>
+		/// <seealso cref="setMergeFactor">
 		/// </seealso>
 		public virtual int GetMergeFactor()
 		{
-			return mergeFactor;
+			return GetLogMergePolicy().GetMergeFactor();
+		}
+		
+		/// <summary>If non-null, this will be the default infoStream used
+		/// by a newly instantiated IndexWriter.
+		/// </summary>
+		/// <seealso cref="setInfoStream">
+		/// </seealso>
+		public static void  SetDefaultInfoStream(System.IO.TextWriter infoStream)
+		{
+			IndexWriter.defaultInfoStream = infoStream;
+		}
+		
+		/// <summary> Returns the current default infoStream for newly
+		/// instantiated IndexWriters.
+		/// </summary>
+		/// <seealso cref="setDefaultInfoStream">
+		/// </seealso>
+		public static System.IO.TextWriter GetDefaultInfoStream()
+		{
+			return IndexWriter.defaultInfoStream;
 		}
 		
-		/// <summary> If non-null, information about merges and a message when maxFieldLength
-		/// is reached will be printed to this.
+		/// <summary>If non-null, information about merges, deletes and a
+		/// message when maxFieldLength is reached will be printed
+		/// to this.
 		/// </summary>
 		public virtual void  SetInfoStream(System.IO.TextWriter infoStream)
 		{
+			EnsureOpen();
 			this.infoStream = infoStream;
+			SetMessageID();
+			docWriter.SetInfoStream(infoStream);
+			deleter.SetInfoStream(infoStream);
+			if (infoStream != null)
+				MessageState();
 		}
 		
-		/// <seealso cref="#setInfoStream">
+		private void  MessageState()
+		{
+			Message("setInfoStream: dir=" + directory + " autoCommit=" + autoCommit + " mergePolicy=" + mergePolicy + " mergeScheduler=" + mergeScheduler + " ramBufferSizeMB=" + docWriter.GetRAMBufferSizeMB() + " maxBuffereDocs=" + docWriter.GetMaxBufferedDocs() + " maxBuffereDeleteTerms=" + docWriter.GetMaxBufferedDeleteTerms() + " maxFieldLength=" + maxFieldLength + " index=" + SegString());
+		}
+		
+		/// <summary> Returns the current infoStream in use by this writer.</summary>
+		/// <seealso cref="setInfoStream">
 		/// </seealso>
 		public virtual System.IO.TextWriter GetInfoStream()
 		{
+			EnsureOpen();
 			return infoStream;
 		}
 		
-		/// <summary> Sets the maximum time to wait for a write lock (in milliseconds) for this
-		/// instance of IndexWriter.
-		/// 
-		/// </summary>
 		/// <seealso cref="">
 		/// </seealso>
-		/// <seealso cref="to change the default value for all">
-		/// instances of IndexWriter.
+		/// <seealso cref="setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.">
 		/// </seealso>
 		public virtual void  SetWriteLockTimeout(long writeLockTimeout)
 		{
+			EnsureOpen();
 			this.writeLockTimeout = writeLockTimeout;
 		}
 		
-		/// <seealso cref="#setWriteLockTimeout">
+		/// <summary> Returns allowed timeout when acquiring the write lock.</summary>
+		/// <seealso cref="setWriteLockTimeout">
 		/// </seealso>
 		public virtual long GetWriteLockTimeout()
 		{
+			EnsureOpen();
 			return writeLockTimeout;
 		}
 		
-		/// <summary> Sets the default (for any instance of IndexWriter) maximum time to wait
-		/// for a write lock (in milliseconds).
+		/// <summary> Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
+		/// milliseconds).
 		/// </summary>
 		public static void  SetDefaultWriteLockTimeout(long writeLockTimeout)
 		{
 			IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
 		}
 		
-		/// <seealso cref="#setDefaultWriteLockTimeout">
+		/// <summary> Returns default write lock timeout for newly
+		/// instantiated IndexWriters.
+		/// </summary>
+		/// <seealso cref="setDefaultWriteLockTimeout">
 		/// </seealso>
 		public static long GetDefaultWriteLockTimeout()
 		{
 			return IndexWriter.WRITE_LOCK_TIMEOUT;
 		}
 		
-		/// <summary> Flushes all changes to an index and closes all associated files.
-		/// 
-		/// <p>
-		/// If an Exception is hit during close, eg due to disk full or some other
-		/// reason, then both the on-disk index and the internal state of the
-		/// IndexWriter instance will be consistent. However, the close will not be
-		/// complete even though part of it (flushing buffered documents) may have
-		/// succeeded, so the write lock will still be held.
-		/// </p>
+		/// <summary> Flushes all changes to an index and closes all
+		/// associated files.
 		/// 
-		/// <p>
-		/// If you can correct the underlying cause (eg free up some disk space) then
-		/// you can call close() again. Failing that, if you want to force the write
-		/// lock to be released (dangerous, because you may then lose buffered docs
-		/// in the IndexWriter instance) then you can do something like this:
-		/// </p>
+		/// <p> If an Exception is hit during close, eg due to disk
+		/// full or some other reason, then both the on-disk index
+		/// and the internal state of the IndexWriter instance will
+		/// be consistent.  However, the close will not be complete
+		/// even though part of it (flushing buffered documents)
+		/// may have succeeded, so the write lock will still be
+		/// held.</p>
+		/// 
+		/// <p> If you can correct the underlying cause (eg free up
+		/// some disk space) then you can call close() again.
+		/// Failing that, if you want to force the write lock to be
+		/// released (dangerous, because you may then lose buffered
+		/// docs in the IndexWriter instance) then you can do
+		/// something like this:</p>
 		/// 
 		/// <pre>
 		/// try {
@@ -682,241 +1211,478 @@
 		/// }
 		/// </pre>
 		/// 
-		/// after which, you must be certain not to use the writer instance anymore.
-		/// </p>
+		/// after which, you must be certain not to use the writer
+		/// instance anymore.</p>
 		/// </summary>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  Close()
 		{
-			lock (this)
-			{
-				FlushRamSegments();
-				ramDirectory.Close();
-				if (writeLock != null)
-				{
-					writeLock.Release(); // release write lock
-					writeLock = null;
-				}
-				if (closeDir)
-					directory.Close();
-			}
+			Close(true);
 		}
 		
-		/// <summary>Release the write lock, if needed. </summary>
-		~IndexWriter()
+		/// <summary> Closes the index with or without waiting for currently
+		/// running merges to finish.  This is only meaningful when
+		/// using a MergeScheduler that runs merges in background
+		/// threads.
+		/// </summary>
+		/// <param name="waitForMerges">if true, this call will block
+		/// until all merges complete; else, it will ask all
+		/// running merges to abort, wait until those merges have
+		/// finished (which should be at most a few seconds), and
+		/// then return.
+		/// </param>
+		public virtual void  Close(bool waitForMerges)
 		{
-			try
+			bool doClose;
+			lock (this)
 			{
-				if (writeLock != null)
+				// Ensure that only one thread actually gets to do the closing:
+				if (!closing)
 				{
-					writeLock.Release(); // release write lock
-					writeLock = null;
+					doClose = true;
+					closing = true;
 				}
+				else
+					doClose = false;
 			}
-			finally
-			{
-			}
-		}
-		
-		/// <summary>Returns the Directory used by this index. </summary>
-		public virtual Directory GetDirectory()
-		{
-			return directory;
-		}
-		
-		/// <summary>Returns the analyzer used by this index. </summary>
-		public virtual Analyzer GetAnalyzer()
-		{
-			return analyzer;
+			if (doClose)
+				CloseInternal(waitForMerges);
+			// Another thread beat us to it (is actually doing the
+			// close), so we will block until that other thread
+			// has finished closing
+			else
+				WaitForClose();
 		}
 		
-		/// <summary>Returns the number of documents currently in this index. </summary>
-		public virtual int DocCount()
+		private void  WaitForClose()
 		{
 			lock (this)
 			{
-				int count = ramSegmentInfos.Count;
-				for (int i = 0; i < segmentInfos.Count; i++)
+				while (!closed && closing)
 				{
-					SegmentInfo si = segmentInfos.Info(i);
-					count += si.docCount;
+					try
+					{
+						System.Threading.Monitor.Wait(this);
+					}
+					catch (System.Threading.ThreadInterruptedException ie)
+					{
+					}
 				}
-				return count;
 			}
 		}
 		
-		/// <summary> The maximum number of terms that will be indexed for a single field in a
-		/// document. This limits the amount of memory required for indexing, so that
-		/// collections with very large files will not crash the indexing process by
-		/// running out of memory.<p/> Note that this effectively truncates large
-		/// documents, excluding from the index terms that occur further in the
-		/// document. If you know your source documents are large, be sure to set
-		/// this value high enough to accomodate the expected size. If you set it to
-		/// Integer.MAX_VALUE, then the only limit is your memory, but you should
-		/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
-		/// terms will be indexed for a field.
-		/// 
-		/// </summary>
-		private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
+		private void  CloseInternal(bool waitForMerges)
+		{
+			try
+			{
+				if (infoStream != null)
+					Message("now flush at close");
+				
+				docWriter.Close();
+				
+				// Only allow a new merge to be triggered if we are
+				// going to wait for merges:
+				Flush(waitForMerges, true);
+				
+				mergePolicy.Close();
+				
+				FinishMerges(waitForMerges);
+				
+				mergeScheduler.Close();
+				
+				lock (this)
+				{
+					if (commitPending)
+					{
+						bool success = false;
+						try
+						{
+							segmentInfos.Write(directory); // now commit changes
+							success = true;
+						}
+						finally
+						{
+							if (!success)
+							{
+								if (infoStream != null)
+									Message("hit exception committing segments file during close");
+								DeletePartialSegmentsFile();
+							}
+						}
+						if (infoStream != null)
+							Message("close: wrote segments file \"" + segmentInfos.GetCurrentSegmentFileName() + "\"");
+						
+						deleter.Checkpoint(segmentInfos, true);
+						
+						commitPending = false;
+						rollbackSegmentInfos = null;
+					}
+					
+					if (infoStream != null)
+						Message("at close: " + SegString());
+					
+					docWriter = null;
+					
+					deleter.Close();
+				}
+				
+				if (closeDir)
+					directory.Close();
+				
+				if (writeLock != null)
+				{
+					writeLock.Release(); // release write lock
+					writeLock = null;
+				}
+				closed = true;
+			}
+			finally
+			{
+				lock (this)
+				{
+					if (!closed)
+						closing = false;
+					System.Threading.Monitor.PulseAll(this);
+				}
+			}
+		}
 		
-		/// <summary> Adds a document to this index. If the document contains more than
-		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
-		/// are discarded.
-		/// 
-		/// <p>
-		/// Note that if an Exception is hit (for example disk full) then the index
-		/// will be consistent, but this document may not have been added.
-		/// Furthermore, it's possible the index will have one segment in
-		/// non-compound format even when using compound files (when a merge has
-		/// partially succeeded).
-		/// </p>
+		/// <summary>Tells the docWriter to close its currently open shared
+		/// doc stores (stored fields & vectors files).
+		/// Return value specifices whether new doc store files are compound or not.
+		/// </summary>
+		private bool FlushDocStores()
+		{
+			lock (this)
+			{
+				
+				System.Collections.IList files = docWriter.Files();
+				
+				bool useCompoundDocStore = false;
+				
+				if (files.Count > 0)
+				{
+					System.String docStoreSegment;
+					
+					bool success = false;
+					try
+					{
+						docStoreSegment = docWriter.CloseDocStore();
+						success = true;
+					}
+					finally
+					{
+						if (!success)
+						{
+							if (infoStream != null)
+								Message("hit exception closing doc store segment");
+							docWriter.Abort(null);
+						}
+					}
+					
+					useCompoundDocStore = mergePolicy.UseCompoundDocStore(segmentInfos);
+					
+					if (useCompoundDocStore && docStoreSegment != null)
+					{
+						// Now build compound doc store file
+						
+						success = false;
+						
+						int numSegments = segmentInfos.Count;
+						System.String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION;
+						
+						try
+						{
+							CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
+							int size = files.Count;
+							for (int i = 0; i < size; i++)
+								cfsWriter.AddFile((System.String) files[i]);
+							
+							// Perform the merge
+							cfsWriter.Close();
+							
+							for (int i = 0; i < numSegments; i++)
+							{
+								SegmentInfo si = segmentInfos.Info(i);
+								if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
+									si.SetDocStoreIsCompoundFile(true);
+							}
+							Checkpoint();
+							success = true;
+						}
+						finally
+						{
+							if (!success)
+							{
+								
+								if (infoStream != null)
+									Message("hit exception building compound file doc store for segment " + docStoreSegment);
+								
+								// Rollback to no compound file
+								for (int i = 0; i < numSegments; i++)
+								{
+									SegmentInfo si = segmentInfos.Info(i);
+									if (si.GetDocStoreOffset() != - 1 && si.GetDocStoreSegment().Equals(docStoreSegment))
+										si.SetDocStoreIsCompoundFile(false);
+								}
+								deleter.DeleteFile(compoundFileName);
+								DeletePartialSegmentsFile();
+							}
+						}
+						
+						deleter.Checkpoint(segmentInfos, false);
+					}
+				}
+				
+				return useCompoundDocStore;
+			}
+		}
+		
+		/// <summary>Release the write lock, if needed. </summary>
+		~IndexWriter()
+		{
+			try
+			{
+				if (writeLock != null)
+				{
+					writeLock.Release(); // release write lock
+					writeLock = null;
+				}
+			}
+			finally
+			{
+			}
+		}
+		
+		/// <summary>Returns the Directory used by this index. </summary>
+		public virtual Directory GetDirectory()
+		{
+			EnsureOpen();
+			return directory;
+		}
+		
+		/// <summary>Returns the analyzer used by this index. </summary>
+		public virtual Analyzer GetAnalyzer()
+		{
+			EnsureOpen();
+			return analyzer;
+		}
+		
+		/// <summary>Returns the number of documents currently in this index. </summary>
+		public virtual int DocCount()
+		{
+			lock (this)
+			{
+				EnsureOpen();
+				int count = docWriter.GetNumDocsInRAM();
+				for (int i = 0; i < segmentInfos.Count; i++)
+				{
+					SegmentInfo si = segmentInfos.Info(i);
+					count += si.docCount;
+				}
+				return count;
+			}
+		}
+		
+		/// <summary> The maximum number of terms that will be indexed for a single field in a
+		/// document.  This limits the amount of memory required for indexing, so that
+		/// collections with very large files will not crash the indexing process by
+		/// running out of memory.<p/>
+		/// Note that this effectively truncates large documents, excluding from the
+		/// index terms that occur further in the document.  If you know your source
+		/// documents are large, be sure to set this value high enough to accomodate
+		/// the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
+		/// is your memory, but you should anticipate an OutOfMemoryError.<p/>
+		/// By default, no more than 10,000 terms will be indexed for a field.
 		/// 
-		/// <p>
-		/// This method periodically flushes pending documents to the Directory
-		/// (every {@link #setMaxBufferedDocs}), and also periodically merges
-		/// segments in the index (every {@link #setMergeFactor} flushes). When this
-		/// occurs, the method will take more time to run (possibly a long time if
-		/// the index is large), and will require free temporary space in the
-		/// Directory to do the merging.
-		/// </p>
+		/// </summary>
+		private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
+		
+		/// <summary> Adds a document to this index.  If the document contains more than
+		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
+		/// discarded.
+		/// 
+		/// <p> Note that if an Exception is hit (for example disk full)
+		/// then the index will be consistent, but this document
+		/// may not have been added.  Furthermore, it's possible
+		/// the index will have one segment in non-compound format
+		/// even when using compound files (when a merge has
+		/// partially succeeded).</p>
+		/// 
+		/// <p> This method periodically flushes pending documents
+		/// to the Directory (every {@link #setMaxBufferedDocs}),
+		/// and also periodically merges segments in the index
+		/// (every {@link #setMergeFactor} flushes).  When this
+		/// occurs, the method will take more time to run (possibly
+		/// a long time if the index is large), and will require
+		/// free temporary space in the Directory to do the
+		/// merging.</p>
+		/// 
+		/// <p>The amount of free space required when a merge is triggered is
+		/// up to 1X the size of all segments being merged, when no
+		/// readers/searchers are open against the index, and up to 2X the
+		/// size of all segments being merged when readers/searchers are open
+		/// against the index (see {@link #Optimize()} for details). The
+		/// sequence of primitive merge operations performed is governed by
+		/// the merge policy.
+		/// 
+		/// <p>Note that each term in the document can be no longer
+		/// than 16383 characters, otherwise an
+		/// IllegalArgumentException will be thrown.</p>
 		/// 
-		/// <p>
-		/// The amount of free space required when a merge is triggered is up to 1X
-		/// the size of all segments being merged, when no readers/searchers are open
-		/// against the index, and up to 2X the size of all segments being merged
-		/// when readers/searchers are open against the index (see
-		/// {@link #Optimize()} for details). Most merges are small (merging the
-		/// smallest segments together), but whenever a full merge occurs (all
-		/// segments in the index, which is the worst case for temporary space usage)
-		/// then the maximum free disk space required is the same as
-		/// {@link #optimize}.
-		/// </p>
 		/// </summary>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  AddDocument(Document doc)
 		{
 			AddDocument(doc, analyzer);
 		}
 		
 		/// <summary> Adds a document to this index, using the provided analyzer instead of the
-		/// value of {@link #GetAnalyzer()}. If the document contains more than
-		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
-		/// are discarded.
+		/// value of {@link #GetAnalyzer()}.  If the document contains more than
+		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder are
+		/// discarded.
+		/// 
+		/// <p>See {@link #AddDocument(Document)} for details on
+		/// index and IndexWriter state after an Exception, and
+		/// flushing/merging temporary free space requirements.</p>
 		/// 
-		/// <p>
-		/// See {@link #AddDocument(Document)} for details on index and IndexWriter
-		/// state after an Exception, and flushing/merging temporary free space
-		/// requirements.
-		/// </p>
 		/// </summary>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  AddDocument(Document doc, Analyzer analyzer)
 		{
-			SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
-			lock (this)
+			EnsureOpen();
+			bool doFlush = false;
+			bool success = false;
+			try
 			{
-				ramSegmentInfos.Add(newSegmentInfo);
-				MaybeFlushRamSegments();
+				doFlush = docWriter.AddDocument(doc, analyzer);
+				success = true;
 			}
+			finally
+			{
+				if (!success)
+				{
+					
+					if (infoStream != null)
+						Message("hit exception adding document");
+					
+					lock (this)
+					{
+						// If docWriter has some aborted files that were
+						// never incref'd, then we clean them up here
+						if (docWriter != null)
+						{
+							System.Collections.IList files = docWriter.AbortedFiles();
+							if (files != null)
+								deleter.DeleteNewFiles(files);
+						}
+					}
+				}
+			}
+			if (doFlush)
+				Flush(true, false);
 		}
 		
-		internal virtual SegmentInfo BuildSingleDocSegment(Document doc, Analyzer analyzer)
-		{
-			DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
-			dw.SetInfoStream(infoStream);
-			System.String segmentName = NewRamSegmentName();
-			dw.AddDocument(segmentName, doc);
-			return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
-		}
-		
-		/// <summary> Deletes the document(s) containing <code>term</code>.
-		/// 
-		/// </summary>
-		/// <param name="">term
-		/// the term to identify the documents to be deleted
+		/// <summary> Deletes the document(s) containing <code>term</code>.</summary>
+		/// <param name="term">the term to identify the documents to be deleted
 		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  DeleteDocuments(Term term)
 		{
-			lock (this)
-			{
-				BufferDeleteTerm(term);
-				MaybeFlushRamSegments();
-			}
+			EnsureOpen();
+			bool doFlush = docWriter.BufferDeleteTerm(term);
+			if (doFlush)
+				Flush(true, false);
 		}
 		
-		/// <summary> Deletes the document(s) containing any of the terms. All deletes are
-		/// flushed at the same time.
-		/// 
+		/// <summary> Deletes the document(s) containing any of the
+		/// terms. All deletes are flushed at the same time.
 		/// </summary>
-		/// <param name="">terms
-		/// array of terms to identify the documents to be deleted
+		/// <param name="terms">array of terms to identify the documents
+		/// to be deleted
 		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  DeleteDocuments(Term[] terms)
 		{
-			lock (this)
-			{
-				for (int i = 0; i < terms.Length; i++)
-				{
-					BufferDeleteTerm(terms[i]);
-				}
-				MaybeFlushRamSegments();
-			}
+			EnsureOpen();
+			bool doFlush = docWriter.BufferDeleteTerms(terms);
+			if (doFlush)
+				Flush(true, false);
 		}
 		
-		/// <summary> Updates a document by first deleting the document(s) containing
-		/// <code>term</code> and then adding the new document. The delete and then
-		/// add are atomic as seen by a reader on the same index (flush may happen
-		/// only after the add).
-		/// 
+		/// <summary> Updates a document by first deleting the document(s)
+		/// containing <code>term</code> and then adding the new
+		/// document.  The delete and then add are atomic as seen
+		/// by a reader on the same index (flush may happen only after
+		/// the add).
 		/// </summary>
-		/// <param name="">term
-		/// the term to identify the document(s) to be deleted
+		/// <param name="term">the term to identify the document(s) to be
+		/// deleted
 		/// </param>
-		/// <param name="">doc
-		/// the document to be added
+		/// <param name="doc">the document to be added
 		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  UpdateDocument(Term term, Document doc)
 		{
+			EnsureOpen();
 			UpdateDocument(term, doc, GetAnalyzer());
 		}
 		
-		/// <summary> Updates a document by first deleting the document(s) containing
-		/// <code>term</code> and then adding the new document. The delete and then
-		/// add are atomic as seen by a reader on the same index (flush may happen
-		/// only after the add).
-		/// 
+		/// <summary> Updates a document by first deleting the document(s)
+		/// containing <code>term</code> and then adding the new
+		/// document.  The delete and then add are atomic as seen
+		/// by a reader on the same index (flush may happen only after
+		/// the add).
 		/// </summary>
-		/// <param name="">term
-		/// the term to identify the document(s) to be deleted
+		/// <param name="term">the term to identify the document(s) to be
+		/// deleted
 		/// </param>
-		/// <param name="">doc
-		/// the document to be added
+		/// <param name="doc">the document to be added
 		/// </param>
-		/// <param name="">analyzer
-		/// the analyzer to use when analyzing the document
+		/// <param name="analyzer">the analyzer to use when analyzing the document
 		/// </param>
+		/// <throws>  CorruptIndexException if the index is corrupt </throws>
+		/// <throws>  IOException if there is a low-level IO error </throws>
 		public virtual void  UpdateDocument(Term term, Document doc, Analyzer analyzer)
 		{
-			SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
-			lock (this)
+			EnsureOpen();
+			bool doFlush = false;
+			bool success = false;
+			try
 			{
-				BufferDeleteTerm(term);
-				ramSegmentInfos.Add(newSegmentInfo);
-				MaybeFlushRamSegments();
+				doFlush = docWriter.UpdateDocument(term, doc, analyzer);
+				success = true;
 			}
-		}
-		
-		internal System.String NewRamSegmentName()
-		{
-			lock (this)
+			finally
 			{
-#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
-                return "_ram_" + Lucene.Net.Documents.NumberTools.ToString(ramSegmentInfos.counter++);
-#else
-				return "_ram_" + System.Convert.ToString(ramSegmentInfos.counter++, 16);
-#endif
+				if (!success)
+				{
+					
+					if (infoStream != null)
+						Message("hit exception updating document");
+					
+					lock (this)
+					{
+						// If docWriter has some aborted files that were
+						// never incref'd, then we clean them up here
+						System.Collections.IList files = docWriter.AbortedFiles();
+						if (files != null)
+							deleter.DeleteNewFiles(files);
+					}
+				}
 			}
+			if (doFlush)
+				Flush(true, false);
 		}
 		
 		// for test purpose
-		public int GetSegmentCount()
+		internal int GetSegmentCount()
 		{
 			lock (this)
 			{
@@ -925,16 +1691,16 @@
 		}
 		
 		// for test purpose
-		public int GetRamSegmentCount()
+		internal int GetNumBufferedDocuments()
 		{
 			lock (this)
 			{
-				return ramSegmentInfos.Count;
+				return docWriter.GetNumDocsInRAM();
 			}
 		}
 		
 		// for test purpose
-		public int GetDocCount(int i)
+		internal int GetDocCount(int i)
 		{
 			lock (this)
 			{
@@ -951,280 +1717,741 @@
 		
 		internal System.String NewSegmentName()
 		{
-			lock (this)
+			// Cannot synchronize on IndexWriter because that causes
+			// deadlock
+			lock (segmentInfos)
 			{
-#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
-                return "_" + Lucene.Net.Documents.NumberTools.ToString(segmentInfos.counter++);
-#else
-				return "_" + System.Convert.ToString(segmentInfos.counter++, 16);
-#endif
+				// Important to set commitPending so that the
+				// segmentInfos is written on close.  Otherwise we
+				// could close, re-open and re-return the same segment
+				// name that was previously returned which can cause
+				// problems at least with ConcurrentMergeScheduler.
+				commitPending = true;
+				return "_" + SupportClass.Number.ToString(segmentInfos.counter++);
 			}
 		}
 		
-		/// <summary> Determines how often segment indices are merged by addDocument(). With
-		/// smaller values, less RAM is used while indexing, and searches on
-		/// unoptimized indices are faster, but indexing speed is slower. With larger
-		/// values, more RAM is used during indexing, and while searches on
-		/// unoptimized indices are slower, indexing is faster. Thus larger values (>
-		/// 10) are best for batch index creation, and smaller values (< 10) for
-		/// indices that are interactively maintained.
-		/// 
-		/// <p>
-		/// This must never be less than 2. The default value is
-		/// {@link #DEFAULT_MERGE_FACTOR}.
-		/// 
-		/// </summary>
-		private int mergeFactor = DEFAULT_MERGE_FACTOR;
+		/// <summary>If non-null, information about merges will be printed to this.</summary>
+		private System.IO.TextWriter infoStream = null;
+		private static System.IO.TextWriter defaultInfoStream = null;
 		
-		/// <summary> Determines the minimal number of documents required before the buffered
-		/// in-memory documents are merging and a new Segment is created. Since
-		/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
-		/// large value gives faster indexing. At the same time, mergeFactor limits
-		/// the number of files open in a FSDirectory.
-		/// 
-		/// <p>
-		/// The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.
+		/// <summary> Requests an "optimize" operation on an index, priming the index
+		/// for the fastest available search. Traditionally this has meant
+		/// merging all segments into a single segment as is done in the

[... 2934 lines stripped ...]


Mime
View raw message