lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [10/11] git commit: Skeleton porting of Lucene.Net.Misc
Date Mon, 15 Sep 2014 22:47:08 GMT
Skeleton porting of Lucene.Net.Misc


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/674f0cb9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/674f0cb9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/674f0cb9

Branch: refs/heads/master
Commit: 674f0cb97dfae0961d5f9622f49f17d891be08bc
Parents: 882f487
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Tue Sep 16 01:39:08 2014 +0300
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Tue Sep 16 01:39:08 2014 +0300

----------------------------------------------------------------------
 src/Lucene.Net.Misc/ByteBuffer.cs               |  325 ++++++
 src/Lucene.Net.Misc/Document/LazyDocument.cs    |  226 ++++
 .../Index/CompoundFileExtractor.cs              |  165 +++
 src/Lucene.Net.Misc/Index/IndexSplitter.cs      |  200 ++++
 .../Index/MultiPassIndexSplitter.cs             |  329 ++++++
 src/Lucene.Net.Misc/Index/PKIndexSplitter.cs    |  220 ++++
 .../Index/Sorter/BlockJoinComparatorSource.cs   |  321 ++++++
 .../Sorter/EarlyTerminatingSortingCollector.cs  |  147 +++
 src/Lucene.Net.Misc/Index/Sorter/Sorter.cs      |  404 +++++++
 .../Index/Sorter/SortingAtomicReader.cs         | 1081 ++++++++++++++++++
 .../Index/Sorter/SortingMergePolicy.cs          |  309 +++++
 src/Lucene.Net.Misc/Lucene.Net.Misc.csproj      |   73 ++
 src/Lucene.Net.Misc/Misc/GetTermInfo.cs         |   74 ++
 src/Lucene.Net.Misc/Misc/HighFreqTerms.cs       |  230 ++++
 src/Lucene.Net.Misc/Misc/IndexMergeTool.cs      |   66 ++
 src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs |  238 ++++
 src/Lucene.Net.Misc/Misc/TermStats.cs           |   55 +
 src/Lucene.Net.Misc/Properties/AssemblyInfo.cs  |   35 +
 src/Lucene.Net.Misc/Store/NativePosixUtil.cs    |   64 ++
 .../Store/NativeUnixDirectory.cs                |  527 +++++++++
 src/Lucene.Net.Misc/Store/WindowsDirectory.cs   |  181 +++
 src/Lucene.Net.Misc/Util/Fst/ListOfOutputs.cs   |  246 ++++
 .../Util/Fst/UpToTwoPositiveIntOutputs.cs       |  328 ++++++
 23 files changed, 5844 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/ByteBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/ByteBuffer.cs b/src/Lucene.Net.Misc/ByteBuffer.cs
new file mode 100644
index 0000000..204f7de
--- /dev/null
+++ b/src/Lucene.Net.Misc/ByteBuffer.cs
@@ -0,0 +1,325 @@
+//-------------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2014 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class is used to simulate the java.nio.ByteBuffer class in C#.
+//
+//	Instances are only obtainable via the static 'allocate' method.
+//
+//	Some methods are not available:
+//		Methods which create shared views of the buffer, such as: array,
+//		asCharBuffer, asDoubleBuffer, asFloatBuffer, asIntBuffer, asLongBuffer,
+//		asReadOnlyBuffer, asShortBuffer, duplicate, slice, & wrap.
+//
+//		Methods mark, reset, isReadOnly, order, compareTo, arrayOffset, & limit (setter).
+//-------------------------------------------------------------------------------------------
+public class ByteBuffer
+{
+	//'Mode' is only used to determine whether to return data length or capacity from the 'limit' method:
+	private enum Mode
+	{
+		Read,
+		Write
+	}
+	private Mode mode;
+
+	private System.IO.MemoryStream stream;
+	private System.IO.BinaryReader reader;
+	private System.IO.BinaryWriter writer;
+
+	private ByteBuffer()
+	{
+		stream = new System.IO.MemoryStream();
+		reader = new System.IO.BinaryReader(stream);
+		writer = new System.IO.BinaryWriter(stream);
+	}
+
+	~ByteBuffer()
+	{
+		reader.Close();
+		writer.Close();
+		stream.Close();
+		stream.Dispose();
+	}
+
+	public static ByteBuffer allocate(int capacity)
+	{
+		ByteBuffer buffer = new ByteBuffer();
+		buffer.stream.Capacity = capacity;
+		buffer.mode = Mode.Write;
+		return buffer;
+	}
+
+	public static ByteBuffer allocateDirect(int capacity)
+	{
+		//this wrapper class makes no distinction between 'allocate' & 'allocateDirect'
+		return allocate(capacity);
+	}
+
+	public int capacity()
+	{
+		return stream.Capacity;
+	}
+
+	public ByteBuffer flip()
+	{
+		mode = Mode.Read;
+		stream.SetLength(stream.Position);
+		stream.Position = 0;
+		return this;
+	}
+
+	public ByteBuffer clear()
+	{
+		mode = Mode.Write;
+		stream.Position = 0;
+		return this;
+	}
+
+	public ByteBuffer compact()
+	{
+		mode = Mode.Write;
+		System.IO.MemoryStream newStream = new System.IO.MemoryStream(stream.Capacity);
+		stream.CopyTo(newStream);
+		stream = newStream;
+		return this;
+	}
+
+	public ByteBuffer rewind()
+	{
+		stream.Position = 0;
+		return this;
+	}
+
+	public long limit()
+	{
+		if (mode == Mode.Write)
+			return stream.Capacity;
+		else
+			return stream.Length;
+	}
+
+	public long position()
+	{
+		return stream.Position;
+	}
+
+	public ByteBuffer position(long newPosition)
+	{
+		stream.Position = newPosition;
+		return this;
+	}
+
+	public long remaining()
+	{
+		return this.limit() - this.position();
+	}
+
+	public bool hasRemaining()
+	{
+		return this.remaining() > 0;
+	}
+
+	public int get()
+	{
+		return stream.ReadByte();
+	}
+
+	public ByteBuffer get(byte[] dst, int offset, int length)
+	{
+		stream.Read(dst, offset, length);
+		return this;
+	}
+
+	public ByteBuffer put(byte b)
+	{
+		stream.WriteByte(b);
+		return this;
+	}
+
+	public ByteBuffer put(byte[] src, int offset, int length)
+	{
+		stream.Write(src, offset, length);
+		return this;
+	}
+
+	public bool Equals(ByteBuffer other)
+	{
+		if (other != null && this.remaining() == other.remaining())
+		{
+			long thisOriginalPosition = this.position();
+			long otherOriginalPosition = other.position();
+
+			bool differenceFound = false;
+			while (stream.Position < stream.Length)
+			{
+				if (this.get() != other.get())
+				{
+					differenceFound = true;
+					break;
+				}
+			}
+
+			this.position(thisOriginalPosition);
+			other.position(otherOriginalPosition);
+
+			return ! differenceFound;
+		}
+		else
+			return false;
+	}
+
+	//methods using the internal BinaryReader:
+	public char getChar()
+	{
+		return reader.ReadChar();
+	}
+	public char getChar(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		char value = reader.ReadChar();
+		stream.Position = originalPosition;
+		return value;
+	}
+	public double getDouble()
+	{
+		return reader.ReadDouble();
+	}
+	public double getDouble(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		double value = reader.ReadDouble();
+		stream.Position = originalPosition;
+		return value;
+	}
+	public float getFloat()
+	{
+		return reader.ReadSingle();
+	}
+	public float getFloat(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		float value = reader.ReadSingle();
+		stream.Position = originalPosition;
+		return value;
+	}
+	public int getInt()
+	{
+		return reader.ReadInt32();
+	}
+	public int getInt(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		int value = reader.ReadInt32();
+		stream.Position = originalPosition;
+		return value;
+	}
+	public long getLong()
+	{
+		return reader.ReadInt64();
+	}
+	public long getLong(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		long value = reader.ReadInt64();
+		stream.Position = originalPosition;
+		return value;
+	}
+	public short getShort()
+	{
+		return reader.ReadInt16();
+	}
+	public short getShort(int index)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		short value = reader.ReadInt16();
+		stream.Position = originalPosition;
+		return value;
+	}
+
+	//methods using the internal BinaryWriter:
+	public ByteBuffer putChar(char value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putChar(int index, char value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+	public ByteBuffer putDouble(double value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putDouble(int index, double value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+	public ByteBuffer putFloat(float value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putFloat(int index, float value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+	public ByteBuffer putInt(int value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putInt(int index, int value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+	public ByteBuffer putLong(long value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putLong(int index, long value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+	public ByteBuffer putShort(short value)
+	{
+		writer.Write(value);
+		return this;
+	}
+	public ByteBuffer putShort(int index, short value)
+	{
+		long originalPosition = stream.Position;
+		stream.Position = index;
+		writer.Write(value);
+		stream.Position = originalPosition;
+		return this;
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Document/LazyDocument.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Document/LazyDocument.cs b/src/Lucene.Net.Misc/Document/LazyDocument.cs
new file mode 100644
index 0000000..6faed94
--- /dev/null
+++ b/src/Lucene.Net.Misc/Document/LazyDocument.cs
@@ -0,0 +1,226 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.document
+{
+
+	/// <summary>
+	/// Copyright 2004 The Apache Software Foundation
+	/// 
+	/// Licensed under the Apache License, Version 2.0 (the "License");
+	/// you may not use this file except in compliance with the License.
+	/// You may obtain a copy of the License at
+	/// 
+	///     http://www.apache.org/licenses/LICENSE-2.0
+	/// 
+	/// Unless required by applicable law or agreed to in writing, software
+	/// distributed under the License is distributed on an "AS IS" BASIS,
+	/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	/// See the License for the specific language governing permissions and
+	/// limitations under the License.
+	/// </summary>
+
+	using Analyzer = org.apache.lucene.analysis.Analyzer;
+	using TokenStream = org.apache.lucene.analysis.TokenStream;
+	using FieldInfo = org.apache.lucene.index.FieldInfo;
+	using IndexReader = org.apache.lucene.index.IndexReader;
+	using IndexableField = org.apache.lucene.index.IndexableField;
+	using IndexableFieldType = org.apache.lucene.index.IndexableFieldType;
+	using BytesRef = org.apache.lucene.util.BytesRef;
+
+	/// <summary>
+	/// Defers actually loading a field's value until you ask
+	///  for it.  You must not use the returned Field instances
+	///  after the provided reader has been closed. </summary>
+	/// <seealso cref= #getField </seealso>
+	public class LazyDocument
+	{
+	  private readonly IndexReader reader;
+	  private readonly int docID;
+
+	  // null until first field is loaded
+	  private Document doc;
+
+	  private IDictionary<int?, IList<LazyField>> fields = new Dictionary<int?, IList<LazyField>>();
+	  private HashSet<string> fieldNames = new HashSet<string>();
+
+	  public LazyDocument(IndexReader reader, int docID)
+	  {
+		this.reader = reader;
+		this.docID = docID;
+	  }
+
+	  /// <summary>
+	  /// Creates an IndexableField whose value will be lazy loaded if and 
+	  /// when it is used. 
+	  /// <para>
+	  /// <b>NOTE:</b> This method must be called once for each value of the field 
+	  /// name specified in sequence that the values exist.  This method may not be 
+	  /// used to generate multiple, lazy, IndexableField instances refering to 
+	  /// the same underlying IndexableField instance.
+	  /// </para>
+	  /// <para>
+	  /// The lazy loading of field values from all instances of IndexableField 
+	  /// objects returned by this method are all backed by a single Document 
+	  /// per LazyDocument instance.
+	  /// </para>
+	  /// </summary>
+	  public virtual IndexableField getField(FieldInfo fieldInfo)
+	  {
+
+		fieldNames.Add(fieldInfo.name);
+		IList<LazyField> values = fields[fieldInfo.number];
+		if (null == values)
+		{
+		  values = new List<>();
+		  fields[fieldInfo.number] = values;
+		}
+
+		LazyField value = new LazyField(this, fieldInfo.name, fieldInfo.number);
+		values.Add(value);
+
+		lock (this)
+		{
+		  // edge case: if someone asks this LazyDoc for more LazyFields
+		  // after other LazyFields from the same LazyDoc have been
+		  // actuallized, we need to force the doc to be re-fetched
+		  // so the new LazyFields are also populated.
+		  doc = null;
+		}
+		return value;
+	  }
+
+	  /// <summary>
+	  /// non-private for test only access
+	  /// @lucene.internal 
+	  /// </summary>
+	  internal virtual Document Document
+	  {
+		  get
+		  {
+			  lock (this)
+			  {
+				if (doc == null)
+				{
+				  try
+				  {
+					doc = reader.document(docID, fieldNames);
+				  }
+				  catch (IOException ioe)
+				  {
+					throw new IllegalStateException("unable to load document", ioe);
+				  }
+				}
+				return doc;
+			  }
+		  }
+	  }
+
+	  // :TODO: synchronize to prevent redundent copying? (sync per field name?)
+	  private void fetchRealValues(string name, int fieldNum)
+	  {
+		Document d = Document;
+
+		IList<LazyField> lazyValues = fields[fieldNum];
+		IndexableField[] realValues = d.getFields(name);
+
+		Debug.Assert(realValues.Length <= lazyValues.Count, "More lazy values then real values for field: " + name);
+
+		for (int i = 0; i < lazyValues.Count; i++)
+		{
+		  LazyField f = lazyValues[i];
+		  if (null != f)
+		  {
+			f.realValue = realValues[i];
+		  }
+		}
+	  }
+
+
+	  /// <summary>
+	  /// @lucene.internal 
+	  /// </summary>
+	  public class LazyField : IndexableField
+	  {
+		  private readonly LazyDocument outerInstance;
+
+		internal string name_Renamed;
+		internal int fieldNum;
+		internal volatile IndexableField realValue = null;
+
+		internal LazyField(LazyDocument outerInstance, string name, int fieldNum)
+		{
+			this.outerInstance = outerInstance;
+		  this.name_Renamed = name;
+		  this.fieldNum = fieldNum;
+		}
+
+		/// <summary>
+		/// non-private for test only access
+		/// @lucene.internal 
+		/// </summary>
+		public virtual bool hasBeenLoaded()
+		{
+		  return null != realValue;
+		}
+
+		internal virtual IndexableField RealValue
+		{
+			get
+			{
+			  if (null == realValue)
+			  {
+				outerInstance.fetchRealValues(name_Renamed, fieldNum);
+			  }
+			  Debug.Assert(hasBeenLoaded(), "field value was not lazy loaded");
+			  Debug.Assert(realValue.name().Equals(name()), "realvalue name != name: " + realValue.name() + " != " + name());
+    
+			  return realValue;
+			}
+		}
+
+		public override string name()
+		{
+		  return name_Renamed;
+		}
+
+		public override float boost()
+		{
+		  return 1.0f;
+		}
+
+		public override BytesRef binaryValue()
+		{
+		  return RealValue.binaryValue();
+		}
+
+		public override string stringValue()
+		{
+		  return RealValue.stringValue();
+		}
+
+		public override Reader readerValue()
+		{
+		  return RealValue.readerValue();
+		}
+
+		public override Number numericValue()
+		{
+		  return RealValue.numericValue();
+		}
+
+		public override IndexableFieldType fieldType()
+		{
+		  return RealValue.fieldType();
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.analysis.TokenStream tokenStream(org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException
+		public override TokenStream tokenStream(Analyzer analyzer)
+		{
+		  return RealValue.tokenStream(analyzer);
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
new file mode 100644
index 0000000..855b6f3
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
@@ -0,0 +1,165 @@
+using System;
+
+namespace org.apache.lucene.index
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	/// <summary>
+	/// Prints the filename and size of each file within a given compound file.
+	/// Add the -extract flag to extract files to the current working directory.
+	/// In order to make the extracted version of the index work, you have to copy
+	/// the segments file from the compound index into the directory where the extracted files are stored. </summary>
+	/// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] &lt;cfsfile&gt; </param>
+
+
+	using CompoundFileDirectory = org.apache.lucene.store.CompoundFileDirectory;
+	using Directory = org.apache.lucene.store.Directory;
+	using FSDirectory = org.apache.lucene.store.FSDirectory;
+	using IOContext = org.apache.lucene.store.IOContext;
+	using IndexInput = org.apache.lucene.store.IndexInput;
+	using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+	using CommandLineUtil = org.apache.lucene.util.CommandLineUtil;
+
+	/// <summary>
+	/// Command-line tool for extracting sub-files out of a compound file.
+	/// </summary>
+	public class CompoundFileExtractor
+	{
+
+	  public static void Main(string[] args)
+	  {
+		string filename = null;
+		bool extract = false;
+		string dirImpl = null;
+
+		int j = 0;
+		while (j < args.Length)
+		{
+		  string arg = args[j];
+		  if ("-extract".Equals(arg))
+		  {
+			extract = true;
+		  }
+		  else if ("-dir-impl".Equals(arg))
+		  {
+			if (j == args.Length - 1)
+			{
+			  Console.WriteLine("ERROR: missing value for -dir-impl option");
+			  Environment.Exit(1);
+			}
+			j++;
+			dirImpl = args[j];
+		  }
+		  else if (filename == null)
+		  {
+			filename = arg;
+		  }
+		  j++;
+		}
+
+		if (filename == null)
+		{
+		  Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
+		  return;
+		}
+
+		Directory dir = null;
+		CompoundFileDirectory cfr = null;
+		IOContext context = IOContext.READ;
+
+		try
+		{
+		  File file = new File(filename);
+		  string dirname = file.AbsoluteFile.Parent;
+		  filename = file.Name;
+		  if (dirImpl == null)
+		  {
+			dir = FSDirectory.open(new File(dirname));
+		  }
+		  else
+		  {
+			dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname));
+		  }
+
+		  cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);
+
+		  string[] files = cfr.listAll();
+		  ArrayUtil.timSort(files); // sort the array of filename so that the output is more readable
+
+		  for (int i = 0; i < files.Length; ++i)
+		  {
+			long len = cfr.fileLength(files[i]);
+
+			if (extract)
+			{
+			  Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
+			  IndexInput ii = cfr.openInput(files[i], context);
+
+			  FileOutputStream f = new FileOutputStream(files[i]);
+
+			  // read and write with a small buffer, which is more effective than reading byte by byte
+			  sbyte[] buffer = new sbyte[1024];
+			  int chunk = buffer.Length;
+			  while (len > 0)
+			  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufLen = (int) Math.min(chunk, len);
+				int bufLen = (int) Math.Min(chunk, len);
+				ii.readBytes(buffer, 0, bufLen);
+				f.write(buffer, 0, bufLen);
+				len -= bufLen;
+			  }
+
+			  f.close();
+			  ii.close();
+			}
+			else
+			{
+			  Console.WriteLine(files[i] + ": " + len + " bytes");
+			}
+		  }
+		}
+		catch (IOException ioe)
+		{
+		  Console.WriteLine(ioe.ToString());
+		  Console.Write(ioe.StackTrace);
+		}
+		finally
+		{
+		  try
+		  {
+			if (dir != null)
+			{
+			  dir.close();
+			}
+			if (cfr != null)
+			{
+			  cfr.close();
+			}
+		  }
+		  catch (IOException ioe)
+		  {
+			Console.WriteLine(ioe.ToString());
+			Console.Write(ioe.StackTrace);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/IndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/IndexSplitter.cs b/src/Lucene.Net.Misc/Index/IndexSplitter.cs
new file mode 100644
index 0000000..a0e9946
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/IndexSplitter.cs
@@ -0,0 +1,200 @@
+using System;
+using System.Collections.Generic;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace org.apache.lucene.index
+{
+
+
+	using FSDirectory = org.apache.lucene.store.FSDirectory;
+
+	/// <summary>
+	/// Command-line tool that enables listing segments in an
+	/// index, copying specific segments to another index, and
+	/// deleting segments from an index.
+	/// 
+	/// <para>This tool does file-level copying of segments files.
+	/// This means it's unable to split apart a single segment
+	/// into multiple segments.  For example if your index is a
+	/// single segment, this tool won't help.  Also, it does basic
+	/// file-level copying (using simple
+	/// File{In,Out}putStream) so it will not work with non
+	/// FSDirectory Directory impls.</para>
+	/// 
+	/// @lucene.experimental You can easily
+	/// accidentally remove segments from your index so be
+	/// careful!
+	/// </summary>
+	public class IndexSplitter
+	{
+	  public SegmentInfos infos;
+
+	  internal FSDirectory fsDir;
+
+	  internal File dir;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void main(String[] args) throws Exception
+	  public static void Main(string[] args)
+	  {
+		if (args.Length < 2)
+		{
+		  Console.Error.WriteLine("Usage: IndexSplitter <srcDir> -l (list the segments and their sizes)");
+		  Console.Error.WriteLine("IndexSplitter <srcDir> <destDir> <segments>+");
+		  Console.Error.WriteLine("IndexSplitter <srcDir> -d (delete the following segments)");
+		  return;
+		}
+		File srcDir = new File(args[0]);
+		IndexSplitter @is = new IndexSplitter(srcDir);
+		if (!srcDir.exists())
+		{
+		  throw new Exception("srcdir:" + srcDir.AbsolutePath + " doesn't exist");
+		}
+		if (args[1].Equals("-l"))
+		{
+		  @is.listSegments();
+		}
+		else if (args[1].Equals("-d"))
+		{
+		  IList<string> segs = new List<string>();
+		  for (int x = 2; x < args.Length; x++)
+		  {
+			segs.Add(args[x]);
+		  }
+		  @is.remove(segs.ToArray());
+		}
+		else
+		{
+		  File targetDir = new File(args[1]);
+		  IList<string> segs = new List<string>();
+		  for (int x = 2; x < args.Length; x++)
+		  {
+			segs.Add(args[x]);
+		  }
+		  @is.Split(targetDir, segs.ToArray());
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public IndexSplitter(java.io.File dir) throws java.io.IOException
+	  public IndexSplitter(File dir)
+	  {
+		this.dir = dir;
+		fsDir = FSDirectory.open(dir);
+		infos = new SegmentInfos();
+		infos.read(fsDir);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void listSegments() throws java.io.IOException
+	  public virtual void listSegments()
+	  {
+		DecimalFormat formatter = new DecimalFormat("###,###.###", DecimalFormatSymbols.getInstance(Locale.ROOT));
+		for (int x = 0; x < infos.size(); x++)
+		{
+		  SegmentCommitInfo info = infos.info(x);
+		  string sizeStr = formatter.format(info.sizeInBytes());
+		  Console.WriteLine(info.info.name + " " + sizeStr);
+		}
+	  }
+
+	  private int getIdx(string name)
+	  {
+		for (int x = 0; x < infos.size(); x++)
+		{
+		  if (name.Equals(infos.info(x).info.name))
+		  {
+			return x;
+		  }
+		}
+		return -1;
+	  }
+
+	  private SegmentCommitInfo getInfo(string name)
+	  {
+		for (int x = 0; x < infos.size(); x++)
+		{
+		  if (name.Equals(infos.info(x).info.name))
+		  {
+			return infos.info(x);
+		  }
+		}
+		return null;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void remove(String[] segs) throws java.io.IOException
+	  public virtual void remove(string[] segs)
+	  {
+		foreach (string n in segs)
+		{
+		  int idx = getIdx(n);
+		  infos.remove(idx);
+		}
+		infos.changed();
+		infos.commit(fsDir);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split(java.io.File destDir, String[] segs) throws java.io.IOException
+	  public virtual void Split(File destDir, string[] segs)
+	  {
+		destDir.mkdirs();
+		FSDirectory destFSDir = FSDirectory.open(destDir);
+		SegmentInfos destInfos = new SegmentInfos();
+		destInfos.counter = infos.counter;
+		foreach (string n in segs)
+		{
+		  SegmentCommitInfo infoPerCommit = getInfo(n);
+		  SegmentInfo info = infoPerCommit.info;
+		  // Same info just changing the dir:
+		  SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics);
+		  destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen));
+		  // now copy files over
+		  ICollection<string> files = infoPerCommit.files();
+		  foreach (String srcName in files)
+		  {
+			File srcFile = new File(dir, srcName);
+			File destFile = new File(destDir, srcName);
+			copyFile(srcFile, destFile);
+		  }
+		}
+		destInfos.changed();
+		destInfos.commit(destFSDir);
+		// System.out.println("destDir:"+destDir.getAbsolutePath());
+	  }
+
+	  private static readonly sbyte[] copyBuffer = new sbyte[32 * 1024];
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static void copyFile(java.io.File src, java.io.File dst) throws java.io.IOException
+	  private static void copyFile(File src, File dst)
+	  {
+		InputStream @in = new FileInputStream(src);
+		OutputStream @out = new FileOutputStream(dst);
+		int len;
+		while ((len = @in.read(copyBuffer)) > 0)
+		{
+		  @out.write(copyBuffer, 0, len);
+		}
+		@in.close();
+		@out.close();
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
new file mode 100644
index 0000000..1e03fed
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
@@ -0,0 +1,329 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.index
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode;
+	using Directory = org.apache.lucene.store.Directory;
+	using FSDirectory = org.apache.lucene.store.FSDirectory;
+	using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+	using Bits = org.apache.lucene.util.Bits;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// This tool splits input index into multiple equal parts. The method employed
+	/// here uses <seealso cref="IndexWriter#addIndexes(IndexReader[])"/> where the input data
+	/// comes from the input index with artificially applied deletes to the document
+	/// id-s that fall outside the selected partition.
+	/// <para>Note 1: Deletes are only applied to a buffered list of deleted docs and
+	/// don't affect the source index - this tool works also with read-only indexes.
+	/// </para>
+	/// <para>Note 2: the disadvantage of this tool is that source index needs to be
+	/// read as many times as there are parts to be created, hence the name of this
+	/// tool.
+	/// 
+	/// </para>
+	/// <para><b>NOTE</b>: this tool is unaware of documents added
+	/// atomically via <seealso cref="IndexWriter#addDocuments"/> or {@link
+	/// IndexWriter#updateDocuments}, which means it can easily
+	/// break up such document groups.
+	/// </para>
+	/// </summary>
+	public class MultiPassIndexSplitter
+	{
+
+	  /// <summary>
+	  /// Split source index into multiple parts. </summary>
+	  /// <param name="in"> source index, can have deletions, can have
+	  /// multiple segments (or multiple readers). </param>
+	  /// <param name="outputs"> list of directories where the output parts will be stored. </param>
+	  /// <param name="seq"> if true, then the source index will be split into equal
+	  /// increasing ranges of document id-s. If false, source document id-s will be
+	  /// assigned in a deterministic round-robin fashion to one of the output splits. </param>
+	  /// <exception cref="IOException"> If there is a low-level I/O error </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split(org.apache.lucene.util.Version version, IndexReader in, org.apache.lucene.store.Directory[] outputs, boolean seq) throws java.io.IOException
+	  public virtual void Split(Version version, IndexReader @in, Directory[] outputs, bool seq)
+	  {
+		if (outputs == null || outputs.Length < 2)
+		{
+		  throw new IOException("Invalid number of outputs.");
+		}
+		if (@in == null || @in.numDocs() < 2)
+		{
+		  throw new IOException("Not enough documents for splitting");
+		}
+		int numParts = outputs.Length;
+		// wrap a potentially read-only input
+		// this way we don't have to preserve original deletions because neither
+		// deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
+		FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in);
+		int maxDoc = input.maxDoc();
+		int partLen = maxDoc / numParts;
+		for (int i = 0; i < numParts; i++)
+		{
+		  input.undeleteAll();
+		  if (seq) // sequential range
+		  {
+			int lo = partLen * i;
+			int hi = lo + partLen;
+			// below range
+			for (int j = 0; j < lo; j++)
+			{
+			  input.deleteDocument(j);
+			}
+			// above range - last part collects all id-s that remained due to
+			// integer rounding errors
+			if (i < numParts - 1)
+			{
+			  for (int j = hi; j < maxDoc; j++)
+			  {
+				input.deleteDocument(j);
+			  }
+			}
+		  }
+		  else
+		  {
+			// round-robin
+			for (int j = 0; j < maxDoc; j++)
+			{
+			  if ((j + numParts - i) % numParts != 0)
+			  {
+				input.deleteDocument(j);
+			  }
+			}
+		  }
+		  IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null)
+			 .setOpenMode(OpenMode.CREATE));
+		  Console.Error.WriteLine("Writing part " + (i + 1) + " ...");
+		  // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<? extends FakeDeleteAtomicIndexReader> sr = input.getSequentialSubReaders();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+		  IList<?> sr = input.SequentialSubReaders;
+		  w.addIndexes(sr.ToArray()); // TODO: maybe take List<IR> here?
+		  w.close();
+		}
+		Console.Error.WriteLine("Done.");
+	  }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+	  public static void Main(string[] args)
+	  {
+		if (args.Length < 5)
+		{
+		  Console.Error.WriteLine("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
+		  Console.Error.WriteLine("\tinputIndex\tpath to input index, multiple values are ok");
+		  Console.Error.WriteLine("\t-out ouputDir\tpath to output directory to contain partial indexes");
+		  Console.Error.WriteLine("\t-num numParts\tnumber of parts to produce");
+		  Console.Error.WriteLine("\t-seq\tsequential docid-range split (default is round-robin)");
+		  Environment.Exit(-1);
+		}
+		List<IndexReader> indexes = new List<IndexReader>();
+		string outDir = null;
+		int numParts = -1;
+		bool seq = false;
+		for (int i = 0; i < args.Length; i++)
+		{
+		  if (args[i].Equals("-out"))
+		  {
+			outDir = args[++i];
+		  }
+		  else if (args[i].Equals("-num"))
+		  {
+			numParts = Convert.ToInt32(args[++i]);
+		  }
+		  else if (args[i].Equals("-seq"))
+		  {
+			seq = true;
+		  }
+		  else
+		  {
+			File file = new File(args[i]);
+			if (!file.exists() || !file.Directory)
+			{
+			  Console.Error.WriteLine("Invalid input path - skipping: " + file);
+			  continue;
+			}
+			Directory dir = FSDirectory.open(new File(args[i]));
+			try
+			{
+			  if (!DirectoryReader.indexExists(dir))
+			  {
+				Console.Error.WriteLine("Invalid input index - skipping: " + file);
+				continue;
+			  }
+			}
+			catch (Exception)
+			{
+			  Console.Error.WriteLine("Invalid input index - skipping: " + file);
+			  continue;
+			}
+			indexes.Add(DirectoryReader.open(dir));
+		  }
+		}
+		if (outDir == null)
+		{
+		  throw new Exception("Required argument missing: -out outputDir");
+		}
+		if (numParts < 2)
+		{
+		  throw new Exception("Invalid value of required argument: -num numParts");
+		}
+		if (indexes.Count == 0)
+		{
+		  throw new Exception("No input indexes to process");
+		}
+		File @out = new File(outDir);
+		if (!@out.mkdirs())
+		{
+		  throw new Exception("Can't create output directory: " + @out);
+		}
+		Directory[] dirs = new Directory[numParts];
+		for (int i = 0; i < numParts; i++)
+		{
+		  dirs[i] = FSDirectory.open(new File(@out, "part-" + i));
+		}
+		MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();
+		IndexReader input;
+		if (indexes.Count == 1)
+		{
+		  input = indexes[0];
+		}
+		else
+		{
+		  input = new MultiReader(indexes.ToArray());
+		}
+		splitter.Split(Version.LUCENE_CURRENT, input, dirs, seq);
+	  }
+
+	  /// <summary>
+	  /// This class emulates deletions on the underlying index.
+	  /// </summary>
+	  private sealed class FakeDeleteIndexReader : BaseCompositeReader<FakeDeleteAtomicIndexReader>
+	  {
+
+		public FakeDeleteIndexReader(IndexReader reader) : base(initSubReaders(reader))
+		{
+		}
+
+		internal static FakeDeleteAtomicIndexReader[] initSubReaders(IndexReader reader)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves();
+		  IList<AtomicReaderContext> leaves = reader.leaves();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.size()];
+		  FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.Count];
+		  int i = 0;
+		  foreach (AtomicReaderContext ctx in leaves)
+		  {
+			subs[i++] = new FakeDeleteAtomicIndexReader(ctx.reader());
+		  }
+		  return subs;
+		}
+
+		public void deleteDocument(int docID)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int i = readerIndex(docID);
+		  int i = readerIndex(docID);
+		  SequentialSubReaders.get(i).deleteDocument(docID - readerBase(i));
+		}
+
+		public void undeleteAll()
+		{
+		  foreach (FakeDeleteAtomicIndexReader r in SequentialSubReaders)
+		  {
+			r.undeleteAll();
+		  }
+		}
+
+		protected internal override void doClose()
+		{
+		}
+
+		// no need to override numDocs/hasDeletions,
+		// as we pass the subreaders directly to IW.addIndexes().
+	  }
+
+	  private sealed class FakeDeleteAtomicIndexReader : FilterAtomicReader
+	  {
+		internal FixedBitSet liveDocs;
+
+		public FakeDeleteAtomicIndexReader(AtomicReader reader) : base(reader)
+		{
+		  undeleteAll(); // initialize main bitset
+		}
+
+		public override int numDocs()
+		{
+		  return liveDocs.cardinality();
+		}
+
+		public void undeleteAll()
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = in.maxDoc();
+		  int maxDoc = @in.maxDoc();
+		  liveDocs = new FixedBitSet(@in.maxDoc());
+		  if (@in.hasDeletions())
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs();
+			Bits oldLiveDocs = @in.LiveDocs;
+			Debug.Assert(oldLiveDocs != null);
+			// this loop is a little bit ineffective, as Bits has no nextSetBit():
+			for (int i = 0; i < maxDoc; i++)
+			{
+			  if (oldLiveDocs.get(i))
+			  {
+				  liveDocs.set(i);
+			  }
+			}
+		  }
+		  else
+		  {
+			// mark all docs as valid
+			liveDocs.set(0, maxDoc);
+		  }
+		}
+
+		public void deleteDocument(int n)
+		{
+		  liveDocs.clear(n);
+		}
+
+		public override Bits LiveDocs
+		{
+			get
+			{
+			  return liveDocs;
+			}
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
new file mode 100644
index 0000000..f3e7ed4
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
@@ -0,0 +1,220 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.index
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode;
+	using DocIdSet = org.apache.lucene.search.DocIdSet;
+	using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+	using Filter = org.apache.lucene.search.Filter;
+	using TermRangeFilter = org.apache.lucene.search.TermRangeFilter;
+	using Directory = org.apache.lucene.store.Directory;
+	using Bits = org.apache.lucene.util.Bits;
+	using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+	using IOUtils = org.apache.lucene.util.IOUtils;
+	using Version = org.apache.lucene.util.Version;
+
+	/// <summary>
+	/// Split an index based on a <seealso cref="Filter"/>.
+	/// </summary>
+
+	public class PKIndexSplitter
+	{
+	  private readonly Filter docsInFirstIndex;
+	  private readonly Directory input;
+	  private readonly Directory dir1;
+	  private readonly Directory dir2;
+	  private readonly IndexWriterConfig config1;
+	  private readonly IndexWriterConfig config2;
+
+	  /// <summary>
+	  /// Split an index based on a <seealso cref="Filter"/>. All documents that match the filter
+	  /// are sent to dir1, remaining ones to dir2.
+	  /// </summary>
+	  public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) : this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version))
+	  {
+	  }
+
+	  private static IndexWriterConfig newDefaultConfig(Version version)
+	  {
+		return (new IndexWriterConfig(version, null)).setOpenMode(OpenMode.CREATE);
+	  }
+
+	  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
+	  {
+		this.input = input;
+		this.dir1 = dir1;
+		this.dir2 = dir2;
+		this.docsInFirstIndex = docsInFirstIndex;
+		this.config1 = config1;
+		this.config2 = config2;
+	  }
+
+	  /// <summary>
+	  /// Split an index based on a  given primary key term 
+	  /// and a 'middle' term.  If the middle term is present, it's
+	  /// sent to dir2.
+	  /// </summary>
+	  public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) : this(version, input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false))
+	  {
+	  }
+
+	  public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2)
+	  {
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split() throws java.io.IOException
+	  public virtual void Split()
+	  {
+		bool success = false;
+		DirectoryReader reader = DirectoryReader.open(input);
+		try
+		{
+		  // pass an individual config in here since one config can not be reused!
+		  createIndex(config1, dir1, reader, docsInFirstIndex, false);
+		  createIndex(config2, dir2, reader, docsInFirstIndex, true);
+		  success = true;
+		}
+		finally
+		{
+		  if (success)
+		  {
+			IOUtils.close(reader);
+		  }
+		  else
+		  {
+			IOUtils.closeWhileHandlingException(reader);
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void createIndex(IndexWriterConfig config, org.apache.lucene.store.Directory target, IndexReader reader, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException
+	  private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter)
+	  {
+		bool success = false;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final IndexWriter w = new IndexWriter(target, config);
+		IndexWriter w = new IndexWriter(target, config);
+		try
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves();
+		  IList<AtomicReaderContext> leaves = reader.leaves();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final IndexReader[] subReaders = new IndexReader[leaves.size()];
+		  IndexReader[] subReaders = new IndexReader[leaves.Count];
+		  int i = 0;
+		  foreach (AtomicReaderContext ctx in leaves)
+		  {
+			subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter);
+		  }
+		  w.addIndexes(subReaders);
+		  success = true;
+		}
+		finally
+		{
+		  if (success)
+		  {
+			IOUtils.close(w);
+		  }
+		  else
+		  {
+			IOUtils.closeWhileHandlingException(w);
+		  }
+		}
+	  }
+
+	  private class DocumentFilteredAtomicIndexReader : FilterAtomicReader
+	  {
+		internal readonly Bits liveDocs;
+		internal readonly int numDocs_Renamed;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException
+		public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.reader())
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = in.maxDoc();
+		  int maxDoc = @in.maxDoc();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.FixedBitSet bits = new org.apache.lucene.util.FixedBitSet(maxDoc);
+		  FixedBitSet bits = new FixedBitSet(maxDoc);
+		  // ignore livedocs here, as we filter them later:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet docs = preserveFilter.getDocIdSet(context, null);
+		  DocIdSet docs = preserveFilter.getDocIdSet(context, null);
+		  if (docs != null)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = docs.iterator();
+			DocIdSetIterator it = docs.GetEnumerator();
+			if (it != null)
+			{
+			  bits.or(it);
+			}
+		  }
+		  if (negateFilter)
+		  {
+			bits.flip(0, maxDoc);
+		  }
+
+		  if (@in.hasDeletions())
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs();
+			Bits oldLiveDocs = @in.LiveDocs;
+			Debug.Assert(oldLiveDocs != null);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = bits.iterator();
+			DocIdSetIterator it = bits.GetEnumerator();
+			for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc())
+			{
+			  if (!oldLiveDocs.get(i))
+			  {
+				// we can safely modify the current bit, as the iterator already stepped over it:
+				bits.clear(i);
+			  }
+			}
+		  }
+
+		  this.liveDocs = bits;
+		  this.numDocs_Renamed = bits.cardinality();
+		}
+
+		public override int numDocs()
+		{
+		  return numDocs_Renamed;
+		}
+
+		public override Bits LiveDocs
+		{
+			get
+			{
+			  return liveDocs;
+			}
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
new file mode 100644
index 0000000..70ad20a
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
@@ -0,0 +1,321 @@
+using System;
+
+namespace org.apache.lucene.index.sorter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using DocIdSet = org.apache.lucene.search.DocIdSet;
+	using FieldComparator = org.apache.lucene.search.FieldComparator;
+	using FieldComparatorSource = org.apache.lucene.search.FieldComparatorSource;
+	using Filter = org.apache.lucene.search.Filter;
+	using IndexSearcher = org.apache.lucene.search.IndexSearcher; // javadocs
+	using Query = org.apache.lucene.search.Query; // javadocs
+	using ScoreDoc = org.apache.lucene.search.ScoreDoc; // javadocs
+	using Scorer = org.apache.lucene.search.Scorer;
+	using Sort = org.apache.lucene.search.Sort;
+	using SortField = org.apache.lucene.search.SortField;
+	using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+
+	/// <summary>
+	/// Helper class to sort readers that contain blocks of documents.
+	/// <para>
+	/// Note that this class is intended to used with <seealso cref="SortingMergePolicy"/>,
+	/// and for other purposes has some limitations:
+	/// <ul>
+	///    <li>Cannot yet be used with <seealso cref="IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter"/>
+	///    <li>Filling sort field values is not yet supported.
+	/// </ul>
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	// TODO: can/should we clean this thing up (e.g. return a proper sort value)
+	// and move to the join/ module?
+	public class BlockJoinComparatorSource : FieldComparatorSource
+	{
+	  internal readonly Filter parentsFilter;
+	  internal readonly Sort parentSort;
+	  internal readonly Sort childSort;
+
+	  /// <summary>
+	  /// Create a new BlockJoinComparatorSource, sorting only blocks of documents
+	  /// with {@code parentSort} and not reordering children with a block.
+	  /// </summary>
+	  /// <param name="parentsFilter"> Filter identifying parent documents </param>
+	  /// <param name="parentSort"> Sort for parent documents </param>
+	  public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) : this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC))
+	  {
+	  }
+
+	  /// <summary>
+	  /// Create a new BlockJoinComparatorSource, specifying the sort order for both
+	  /// blocks of documents and children within a block.
+	  /// </summary>
+	  /// <param name="parentsFilter"> Filter identifying parent documents </param>
+	  /// <param name="parentSort"> Sort for parent documents </param>
+	  /// <param name="childSort"> Sort for child documents in the same block </param>
+	  public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort)
+	  {
+		this.parentsFilter = parentsFilter;
+		this.parentSort = parentSort;
+		this.childSort = childSort;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws java.io.IOException
+	  public override FieldComparator<int?> newComparator(string fieldname, int numHits, int sortPos, bool reversed)
+	  {
+		// we keep parallel slots: the parent ids and the child ids
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int parentSlots[] = new int[numHits];
+		int[] parentSlots = new int[numHits];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int childSlots[] = new int[numHits];
+		int[] childSlots = new int[numHits];
+
+		SortField[] parentFields = parentSort.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int parentReverseMul[] = new int[parentFields.length];
+		int[] parentReverseMul = new int[parentFields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> parentComparators[] = new org.apache.lucene.search.FieldComparator[parentFields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+		FieldComparator<?>[] parentComparators = new FieldComparator[parentFields.Length];
+		for (int i = 0; i < parentFields.Length; i++)
+		{
+		  parentReverseMul[i] = parentFields[i].Reverse ? - 1 : 1;
+		  parentComparators[i] = parentFields[i].getComparator(1, i);
+		}
+
+		SortField[] childFields = childSort.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int childReverseMul[] = new int[childFields.length];
+		int[] childReverseMul = new int[childFields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> childComparators[] = new org.apache.lucene.search.FieldComparator[childFields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+		FieldComparator<?>[] childComparators = new FieldComparator[childFields.Length];
+		for (int i = 0; i < childFields.Length; i++)
+		{
+		  childReverseMul[i] = childFields[i].Reverse ? - 1 : 1;
+		  childComparators[i] = childFields[i].getComparator(1, i);
+		}
+
+		// NOTE: we could return parent ID as value but really our sort "value" is more complex...
+		// So we throw UOE for now. At the moment you really should only use this at indexing time.
+		return new FieldComparatorAnonymousInnerClassHelper(this, parentSlots, childSlots, parentReverseMul, parentComparators, childReverseMul, childComparators);
+	  }
+
+	  private class FieldComparatorAnonymousInnerClassHelper : FieldComparator<int?>
+	  {
+		  private readonly BlockJoinComparatorSource outerInstance;
+
+		  private int[] parentSlots;
+		  private int[] childSlots;
+		  private int[] parentReverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] parentComparators;
+		  private FieldComparator<?>[] parentComparators;
+		  private int[] childReverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] childComparators;
+		  private FieldComparator<?>[] childComparators;
+
+		  public FieldComparatorAnonymousInnerClassHelper<T1, T2>(BlockJoinComparatorSource outerInstance, int[] parentSlots, int[] childSlots, int[] parentReverseMul, FieldComparator<T1>[] parentComparators, int[] childReverseMul, FieldComparator<T2>[] childComparators)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.parentSlots = parentSlots;
+			  this.childSlots = childSlots;
+			  this.parentReverseMul = parentReverseMul;
+			  this.parentComparators = parentComparators;
+			  this.childReverseMul = childReverseMul;
+			  this.childComparators = childComparators;
+		  }
+
+		  internal int bottomParent;
+		  internal int bottomChild;
+		  internal FixedBitSet parentBits;
+
+		  public override int compare(int slot1, int slot2)
+		  {
+			try
+			{
+			  return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
+			}
+			catch (IOException e)
+			{
+			  throw new Exception(e);
+			}
+		  }
+
+		  public override int Bottom
+		  {
+			  set
+			  {
+				bottomParent = parentSlots[value];
+				bottomChild = childSlots[value];
+			  }
+		  }
+
+		  public override int? TopValue
+		  {
+			  set
+			  {
+				// we dont have enough information (the docid is needed)
+				throw new System.NotSupportedException("this comparator cannot be used with deep paging");
+			  }
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int compareBottom(int doc) throws java.io.IOException
+		  public override int compareBottom(int doc)
+		  {
+			return compare(bottomChild, bottomParent, doc, parent(doc));
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int compareTop(int doc) throws java.io.IOException
+		  public override int compareTop(int doc)
+		  {
+			// we dont have enough information (the docid is needed)
+			throw new System.NotSupportedException("this comparator cannot be used with deep paging");
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void copy(int slot, int doc) throws java.io.IOException
+		  public override void copy(int slot, int doc)
+		  {
+			childSlots[slot] = doc;
+			parentSlots[slot] = parent(doc);
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator<Integer> setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+		  public override FieldComparator<int?> setNextReader(AtomicReaderContext context)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet parents = parentsFilter.getDocIdSet(context, null);
+			DocIdSet parents = outerInstance.parentsFilter.getDocIdSet(context, null);
+			if (parents == null)
+			{
+			  throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
+			}
+			if (!(parents is FixedBitSet))
+			{
+			  throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
+			}
+			parentBits = (FixedBitSet) parents;
+			for (int i = 0; i < parentComparators.Length; i++)
+			{
+			  parentComparators[i] = parentComparators[i].setNextReader(context);
+			}
+			for (int i = 0; i < childComparators.Length; i++)
+			{
+			  childComparators[i] = childComparators[i].setNextReader(context);
+			}
+			return this;
+		  }
+
+		  public override int? value(int slot)
+		  {
+			// really our sort "value" is more complex...
+			throw new System.NotSupportedException("filling sort field values is not yet supported");
+		  }
+
+		  public override Scorer Scorer
+		  {
+			  set
+			  {
+				base.Scorer = value;
+	//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+	//ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator<?> comp : parentComparators)
+				foreach (FieldComparator<?> comp in parentComparators)
+				{
+				  comp.Scorer = value;
+				}
+	//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+	//ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator<?> comp : childComparators)
+				foreach (FieldComparator<?> comp in childComparators)
+				{
+				  comp.Scorer = value;
+				}
+			  }
+		  }
+
+		  internal virtual int parent(int doc)
+		  {
+			return parentBits.nextSetBit(doc);
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: int compare(int docID1, int parent1, int docID2, int parent2) throws java.io.IOException
+		  internal virtual int compare(int docID1, int parent1, int docID2, int parent2)
+		  {
+			if (parent1 == parent2) // both are in the same block
+			{
+			  if (docID1 == parent1 || docID2 == parent2)
+			  {
+				// keep parents at the end of blocks
+				return docID1 - docID2;
+			  }
+			  else
+			  {
+				return compare(docID1, docID2, childComparators, childReverseMul);
+			  }
+			}
+			else
+			{
+			  int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
+			  if (cmp == 0)
+			  {
+				return parent1 - parent2;
+			  }
+			  else
+			  {
+				return cmp;
+			  }
+			}
+		  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: int compare(int docID1, int docID2, org.apache.lucene.search.FieldComparator<?> comparators[] , int reverseMul[]) throws java.io.IOException
+		  internal virtual int compare<T1>(int docID1, int docID2, FieldComparator<T1>[] comparators, int[] reverseMul)
+		  {
+			for (int i = 0; i < comparators.Length; i++)
+			{
+			  // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+			  // the segments are always the same here...
+			  comparators[i].copy(0, docID1);
+			  comparators[i].Bottom = 0;
+			  int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+			  if (comp != 0)
+			  {
+				return comp;
+			  }
+			}
+			return 0; // no need to docid tiebreak
+		  }
+	  }
+
+	  public override string ToString()
+	  {
+		return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
new file mode 100644
index 0000000..654ba85
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
@@ -0,0 +1,147 @@
+namespace org.apache.lucene.index.sorter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using CollectionTerminatedException = org.apache.lucene.search.CollectionTerminatedException;
+	using Collector = org.apache.lucene.search.Collector;
+	using Scorer = org.apache.lucene.search.Scorer;
+	using Sort = org.apache.lucene.search.Sort;
+	using TopDocsCollector = org.apache.lucene.search.TopDocsCollector;
+	using TotalHitCountCollector = org.apache.lucene.search.TotalHitCountCollector;
+
+	/// <summary>
+	/// A <seealso cref="Collector"/> that early terminates collection of documents on a
+	/// per-segment basis, if the segment was sorted according to the given
+	/// <seealso cref="Sort"/>.
+	/// 
+	/// <para>
+	/// <b>NOTE:</b> the {@code Collector} detects sorted segments according to
+	/// <seealso cref="SortingMergePolicy"/>, so it's best used in conjunction with it. Also,
+	/// it collects up to a specified {@code numDocsToCollect} from each segment, 
+	/// and therefore is mostly suitable for use in conjunction with collectors such as
+	/// <seealso cref="TopDocsCollector"/>, and not e.g. <seealso cref="TotalHitCountCollector"/>.
+	/// </para>
+	/// <para>
+	/// <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
+	/// order as the index order, the returned <seealso cref="TopDocsCollector#topDocs() TopDocs"/>
+	/// will be correct. However the total of {@link TopDocsCollector#getTotalHits()
+	/// hit count} will be underestimated since not all matching documents will have
+	/// been collected.
+	/// </para>
+	/// <para>
+	/// <b>NOTE</b>: This {@code Collector} uses <seealso cref="Sort#toString()"/> to detect
+	/// whether a segment was sorted with the same {@code Sort}. This has
+	/// two implications:
+	/// <ul>
+	/// <li>if a custom comparator is not implemented correctly and returns
+	/// different identifiers for equivalent instances, this collector will not
+	/// detect sorted segments,</li>
+	/// <li>if you suddenly change the <seealso cref="IndexWriter"/>'s
+	/// {@code SortingMergePolicy} to sort according to another criterion and if both
+	/// the old and the new {@code Sort}s have the same identifier, this
+	/// {@code Collector} will incorrectly detect sorted segments.</li>
+	/// </ul>
+	/// 
+	/// @lucene.experimental
+	/// </para>
+	/// </summary>
+	public class EarlyTerminatingSortingCollector : Collector
+	{
+	  /// <summary>
+	  /// The wrapped Collector </summary>
+	  protected internal readonly Collector @in;
+	  /// <summary>
+	  /// Sort used to sort the search results </summary>
+	  protected internal readonly Sort sort;
+	  /// <summary>
+	  /// Number of documents to collect in each segment </summary>
+	  protected internal readonly int numDocsToCollect;
+	  /// <summary>
+	  /// Number of documents to collect in the current segment being processed </summary>
+	  protected internal int segmentTotalCollect;
+	  /// <summary>
+	  /// True if the current segment being processed is sorted by <seealso cref="#sort"/> </summary>
+	  protected internal bool segmentSorted;
+
+	  private int numCollected;
+
+	  /// <summary>
+	  /// Create a new <seealso cref="EarlyTerminatingSortingCollector"/> instance.
+	  /// </summary>
+	  /// <param name="in">
+	  ///          the collector to wrap </param>
+	  /// <param name="sort">
+	  ///          the sort you are sorting the search results on </param>
+	  /// <param name="numDocsToCollect">
+	  ///          the number of documents to collect on each segment. When wrapping
+	  ///          a <seealso cref="TopDocsCollector"/>, this number should be the number of
+	  ///          hits. </param>
+	  public EarlyTerminatingSortingCollector(Collector @in, Sort sort, int numDocsToCollect)
+	  {
+		if (numDocsToCollect <= 0)
+		{
+		  throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
+		}
+		this.@in = @in;
+		this.sort = sort;
+		this.numDocsToCollect = numDocsToCollect;
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+	  public override Scorer Scorer
+	  {
+		  set
+		  {
+			@in.Scorer = value;
+		  }
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void collect(int doc) throws java.io.IOException
+	  public override void collect(int doc)
+	  {
+		@in.collect(doc);
+		if (++numCollected >= segmentTotalCollect)
+		{
+		  throw new CollectionTerminatedException();
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+	  public override AtomicReaderContext NextReader
+	  {
+		  set
+		  {
+			@in.NextReader = value;
+			segmentSorted = SortingMergePolicy.isSorted(value.reader(), sort);
+			segmentTotalCollect = segmentSorted ? numDocsToCollect : int.MaxValue;
+			numCollected = 0;
+		  }
+	  }
+
+	  public override bool acceptsDocsOutOfOrder()
+	  {
+		return !segmentSorted && @in.acceptsDocsOutOfOrder();
+	  }
+
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
new file mode 100644
index 0000000..f315b9c
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
@@ -0,0 +1,404 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.index.sorter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using FieldComparator = org.apache.lucene.search.FieldComparator;
+	using Scorer = org.apache.lucene.search.Scorer;
+	using Sort = org.apache.lucene.search.Sort;
+	using SortField = org.apache.lucene.search.SortField;
+	using TimSorter = org.apache.lucene.util.TimSorter;
+	using MonotonicAppendingLongBuffer = org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+
+	/// <summary>
+	/// Sorts documents of a given index by returning a permutation on the document
+	/// IDs.
+	/// @lucene.experimental
+	/// </summary>
+	internal sealed class Sorter
+	{
+	  internal readonly Sort sort_Renamed;
+
+	  /// <summary>
+	  /// Creates a new Sorter to sort the index with {@code sort} </summary>
+	  internal Sorter(Sort sort)
+	  {
+		if (sort.needsScores())
+		{
+		  throw new System.ArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
+		}
+		this.sort_Renamed = sort;
+	  }
+
+	  /// <summary>
+	  /// A permutation of doc IDs. For every document ID between <tt>0</tt> and
+	  /// <seealso cref="IndexReader#maxDoc()"/>, <code>oldToNew(newToOld(docID))</code> must
+	  /// return <code>docID</code>.
+	  /// </summary>
+	  internal abstract class DocMap
+	  {
+
+		/// <summary>
+		/// Given a doc ID from the original index, return its ordinal in the
+		///  sorted index. 
+		/// </summary>
+		internal abstract int oldToNew(int docID);
+
+		/// <summary>
+		/// Given the ordinal of a doc ID, return its doc ID in the original index. </summary>
+		internal abstract int newToOld(int docID);
+
+		/// <summary>
+		/// Return the number of documents in this map. This must be equal to the
+		///  <seealso cref="AtomicReader#maxDoc() number of documents"/> of the
+		///  <seealso cref="AtomicReader"/> which is sorted. 
+		/// </summary>
+		internal abstract int size();
+	  }
+
+	  /// <summary>
+	  /// Check consistency of a <seealso cref="DocMap"/>, useful for assertions. </summary>
+	  internal static bool isConsistent(DocMap docMap)
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = docMap.size();
+		int maxDoc = docMap.size();
+		for (int i = 0; i < maxDoc; ++i)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newID = docMap.oldToNew(i);
+		  int newID = docMap.oldToNew(i);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int oldID = docMap.newToOld(newID);
+		  int oldID = docMap.newToOld(newID);
+		  Debug.Assert(newID >= 0 && newID < maxDoc, "doc IDs must be in [0-" + maxDoc + "[, got " + newID);
+		  Debug.Assert(i == oldID, "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID);
+		  if (i != oldID || newID < 0 || newID >= maxDoc)
+		  {
+			return false;
+		  }
+		}
+		return true;
+	  }
+
+	  /// <summary>
+	  /// A comparator of doc IDs. </summary>
+	  internal abstract class DocComparator
+	  {
+
+		/// <summary>
+		/// Compare docID1 against docID2. The contract for the return value is the
+		///  same as <seealso cref="Comparator#compare(Object, Object)"/>. 
+		/// </summary>
+		public abstract int compare(int docID1, int docID2);
+
+	  }
+
+	  private sealed class DocValueSorter : TimSorter
+	  {
+
+		internal readonly int[] docs;
+		internal readonly Sorter.DocComparator comparator;
+		internal readonly int[] tmp;
+
+		internal DocValueSorter(int[] docs, Sorter.DocComparator comparator) : base(docs.Length / 64)
+		{
+		  this.docs = docs;
+		  this.comparator = comparator;
+		  tmp = new int[docs.Length / 64];
+		}
+
+		protected internal override int compare(int i, int j)
+		{
+		  return comparator.compare(docs[i], docs[j]);
+		}
+
+		protected internal override void swap(int i, int j)
+		{
+		  int tmpDoc = docs[i];
+		  docs[i] = docs[j];
+		  docs[j] = tmpDoc;
+		}
+
+		protected internal override void copy(int src, int dest)
+		{
+		  docs[dest] = docs[src];
+		}
+
+		protected internal override void save(int i, int len)
+		{
+		  Array.Copy(docs, i, tmp, 0, len);
+		}
+
+		protected internal override void restore(int i, int j)
+		{
+		  docs[j] = tmp[i];
+		}
+
+		protected internal override int compareSaved(int i, int j)
+		{
+		  return comparator.compare(tmp[i], docs[j]);
+		}
+	  }
+
+	  /// <summary>
+	  /// Computes the old-to-new permutation over the given comparator. </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator)
+	  private static Sorter.DocMap sort(int maxDoc, DocComparator comparator)
+	  {
+		// check if the index is sorted
+		bool sorted = true;
+		for (int i = 1; i < maxDoc; ++i)
+		{
+		  if (comparator.compare(i - 1, i) > 0)
+		  {
+			sorted = false;
+			break;
+		  }
+		}
+		if (sorted)
+		{
+		  return null;
+		}
+
+		// sort doc IDs
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] docs = new int[maxDoc];
+		int[] docs = new int[maxDoc];
+		for (int i = 0; i < maxDoc; i++)
+		{
+		  docs[i] = i;
+		}
+
+		DocValueSorter sorter = new DocValueSorter(docs, comparator);
+		// It can be common to sort a reader, add docs, sort it again, ... and in
+		// that case timSort can save a lot of time
+		sorter.sort(0, docs.Length); // docs is now the newToOld mapping
+
+		// The reason why we use MonotonicAppendingLongBuffer here is that it
+		// wastes very little memory if the index is in random order but can save
+		// a lot of memory if the index is already "almost" sorted
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer newToOld = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer();
+		MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();
+		for (int i = 0; i < maxDoc; ++i)
+		{
+		  newToOld.add(docs[i]);
+		}
+		newToOld.freeze();
+
+		for (int i = 0; i < maxDoc; ++i)
+		{
+		  docs[(int) newToOld.get(i)] = i;
+		} // docs is now the oldToNew mapping
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer oldToNew = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer();
+		MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();
+		for (int i = 0; i < maxDoc; ++i)
+		{
+		  oldToNew.add(docs[i]);
+		}
+		oldToNew.freeze();
+
+		return new DocMapAnonymousInnerClassHelper(maxDoc, newToOld, oldToNew);
+	  }
+
+	  private class DocMapAnonymousInnerClassHelper : Sorter.DocMap
+	  {
+		  private int maxDoc;
+		  private MonotonicAppendingLongBuffer newToOld;
+		  private MonotonicAppendingLongBuffer oldToNew;
+
+		  public DocMapAnonymousInnerClassHelper(int maxDoc, MonotonicAppendingLongBuffer newToOld, MonotonicAppendingLongBuffer oldToNew)
+		  {
+			  this.maxDoc = maxDoc;
+			  this.newToOld = newToOld;
+			  this.oldToNew = oldToNew;
+		  }
+
+
+		  public override int oldToNew(int docID)
+		  {
+			return (int) oldToNew.get(docID);
+		  }
+
+		  public override int newToOld(int docID)
+		  {
+			return (int) newToOld.get(docID);
+		  }
+
+		  public override int size()
+		  {
+			return maxDoc;
+		  }
+	  }
+
+	  /// <summary>
+	  /// Returns a mapping from the old document ID to its new location in the
+	  /// sorted index. Implementations can use the auxiliary
+	  /// <seealso cref="#sort(int, DocComparator)"/> to compute the old-to-new permutation
+	  /// given a list of documents and their corresponding values.
+	  /// <para>
+	  /// A return value of <tt>null</tt> is allowed and means that
+	  /// <code>reader</code> is already sorted.
+	  /// </para>
+	  /// <para>
+	  /// <b>NOTE:</b> deleted documents are expected to appear in the mapping as
+	  /// well, they will however be marked as deleted in the sorted view.
+	  /// </para>
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: DocMap sort(org.apache.lucene.index.AtomicReader reader) throws java.io.IOException
+	  internal DocMap sort(AtomicReader reader)
+	  {
+		SortField[] fields = sort_Renamed.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int reverseMul[] = new int[fields.length];
+		int[] reverseMul = new int[fields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> comparators[] = new org.apache.lucene.search.FieldComparator[fields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+		FieldComparator<?>[] comparators = new FieldComparator[fields.Length];
+
+		for (int i = 0; i < fields.Length; i++)
+		{
+		  reverseMul[i] = fields[i].Reverse ? - 1 : 1;
+		  comparators[i] = fields[i].getComparator(1, i);
+		  comparators[i].NextReader = reader.Context;
+		  comparators[i].Scorer = FAKESCORER;
+		}
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final DocComparator comparator = new DocComparator()
+		DocComparator comparator = new DocComparatorAnonymousInnerClassHelper(this, reverseMul, comparators);
+		return sort(reader.maxDoc(), comparator);
+	  }
+
+	  private class DocComparatorAnonymousInnerClassHelper : DocComparator
+	  {
+		  private readonly Sorter outerInstance;
+
+		  private int[] reverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] comparators;
+		  private FieldComparator<?>[] comparators;
+
+		  public DocComparatorAnonymousInnerClassHelper<T1>(Sorter outerInstance, int[] reverseMul, FieldComparator<T1>[] comparators)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.reverseMul = reverseMul;
+			  this.comparators = comparators;
+		  }
+
+		  public override int compare(int docID1, int docID2)
+		  {
+			try
+			{
+			  for (int i = 0; i < comparators.Length; i++)
+			  {
+				// TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+				// the segments are always the same here...
+				comparators[i].copy(0, docID1);
+				comparators[i].Bottom = 0;
+				int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+				if (comp != 0)
+				{
+				  return comp;
+				}
+			  }
+			  return int.compare(docID1, docID2); // docid order tiebreak
+			}
+			catch (IOException e)
+			{
+			  throw new Exception(e);
+			}
+		  }
+	  }
+
+	  /// <summary>
+	  /// Returns the identifier of this <seealso cref="Sorter"/>.
+	  /// <para>This identifier is similar to <seealso cref="Object#hashCode()"/> and should be
+	  /// chosen so that two instances of this class that sort documents likewise
+	  /// will have the same identifier. On the contrary, this identifier should be
+	  /// different on different <seealso cref="Sort sorts"/>.
+	  /// </para>
+	  /// </summary>
+	  public string ID
+	  {
+		  get
+		  {
+			return sort_Renamed.ToString();
+		  }
+	  }
+
+	  public override string ToString()
+	  {
+		return ID;
+	  }
+
+	  internal static readonly Scorer FAKESCORER = new ScorerAnonymousInnerClassHelper();
+
+	  private class ScorerAnonymousInnerClassHelper : Scorer
+	  {
+		  public ScorerAnonymousInnerClassHelper() : base(null)
+		  {
+		  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public float score() throws java.io.IOException
+		  public override float score()
+		  {
+			  throw new System.NotSupportedException();
+		  }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int freq() throws java.io.IOException
+		  public override int freq()
+		  {
+			  throw new System.NotSupportedException();
+		  }
+		  public override int docID()
+		  {
+			  throw new System.NotSupportedException();
+		  }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException
+		  public override int nextDoc()
+		  {
+			  throw new System.NotSupportedException();
+		  }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int advance(int target) throws java.io.IOException
+		  public override int advance(int target)
+		  {
+			  throw new System.NotSupportedException();
+		  }
+		  public override long cost()
+		  {
+			  throw new System.NotSupportedException();
+		  }
+	  }
+
+	}
+
+}
\ No newline at end of file


Mime
View raw message