lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [8/8] git commit: Porting Lucene.Net.Suggest (still not compiling)
Date Mon, 15 Sep 2014 22:24:55 GMT
Porting Lucene.Net.Suggest (still not compiling)


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/0ebac726
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/0ebac726
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/0ebac726

Branch: refs/heads/master
Commit: 0ebac7269dbc076fbd9c33855d59f75716444fab
Parents: 6e90056
Author: Itamar Syn-Hershko <itamar@code972.com>
Authored: Tue Sep 16 01:22:45 2014 +0300
Committer: Itamar Syn-Hershko <itamar@code972.com>
Committed: Tue Sep 16 01:23:19 2014 +0300

----------------------------------------------------------------------
 src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs |    4 +-
 src/Lucene.Net.Core/Index/IndexableField.cs     |    2 +
 src/Lucene.Net.Core/Search/SearcherManager.cs   |    1 +
 src/Lucene.Net.Core/Util/OfflineSorter.cs       | 1358 +++++++++---------
 .../Lucene.Net.Suggest.csproj                   |  108 ++
 .../Properties/AssemblyInfo.cs                  |   36 +
 src/Lucene.Net.Suggest/RectangularArrays.cs     |   29 +
 .../Spell/CombineSuggestion.cs                  |   47 +
 src/Lucene.Net.Suggest/Spell/Dictionary.cs      |   35 +
 .../Spell/DirectSpellChecker.cs                 |  575 ++++++++
 .../Spell/HighFrequencyDictionary.cs            |  151 ++
 .../Spell/JaroWinklerDistance.cs                |  173 +++
 .../Spell/LevensteinDistance.cs                 |  144 ++
 .../Spell/LuceneDictionary.cs                   |   58 +
 .../Spell/LuceneLevenshteinDistance.cs          |  136 ++
 src/Lucene.Net.Suggest/Spell/NGramDistance.cs   |  195 +++
 .../Spell/PlainTextDictionary.cs                |  134 ++
 src/Lucene.Net.Suggest/Spell/SpellChecker.cs    |  748 ++++++++++
 src/Lucene.Net.Suggest/Spell/StringDistance.cs  |   36 +
 src/Lucene.Net.Suggest/Spell/SuggestMode.cs     |   46 +
 src/Lucene.Net.Suggest/Spell/SuggestWord.cs     |   53 +
 .../Spell/SuggestWordFrequencyComparator.cs     |   64 +
 .../Spell/SuggestWordQueue.cs                   |   65 +
 .../Spell/SuggestWordScoreComparator.cs         |   64 +
 .../Spell/TermFreqIterator.cs                   |   68 +
 .../Spell/WordBreakSpellChecker.cs              |  542 +++++++
 src/Lucene.Net.Suggest/StringHelperClass.cs     |   90 ++
 .../Analyzing/AnalyzingInfixSuggester.cs        |  792 ++++++++++
 .../Suggest/Analyzing/AnalyzingSuggester.cs     | 1093 ++++++++++++++
 .../Suggest/Analyzing/BlendedInfixSuggester.cs  |  316 ++++
 .../Suggest/Analyzing/FSTUtil.cs                |  146 ++
 .../Suggest/Analyzing/FreeTextSuggester.cs      |  929 ++++++++++++
 .../Suggest/Analyzing/FuzzySuggester.cs         |  271 ++++
 .../Suggest/Analyzing/SuggestStopFilter.cs      |  138 ++
 .../Suggest/BufferedInputIterator.cs            |  139 ++
 .../Suggest/BufferingTermFreqIteratorWrapper.cs |   89 ++
 .../Suggest/DocumentDictionary.cs               |  278 ++++
 .../Suggest/DocumentValueSourceDictionary.cs    |  169 +++
 .../Suggest/FileDictionary.cs                   |  284 ++++
 .../Suggest/Fst/BytesRefSorter.cs               |   34 +
 .../Suggest/Fst/ExternalRefSorter.cs            |  150 ++
 .../Suggest/Fst/FSTCompletion.cs                |  467 ++++++
 .../Suggest/Fst/FSTCompletionBuilder.cs         |  274 ++++
 .../Suggest/Fst/FSTCompletionLookup.cs          |  337 +++++
 .../Suggest/Fst/WFSTCompletionLookup.cs         |  348 +++++
 .../Suggest/InMemorySorter.cs                   |   70 +
 src/Lucene.Net.Suggest/Suggest/InputIterator.cs |  124 ++
 .../Suggest/Jaspell/JaspellLookup.cs            |  258 ++++
 .../Suggest/Jaspell/JaspellTernarySearchTrie.cs |  986 +++++++++++++
 src/Lucene.Net.Suggest/Suggest/Lookup.cs        |  299 ++++
 .../Suggest/SortedInputIterator.cs              |  353 +++++
 .../Suggest/SortedTermFreqIteratorWrapper.cs    |  230 +++
 .../Suggest/Tst/TSTAutocomplete.cs              |  207 +++
 src/Lucene.Net.Suggest/Suggest/Tst/TSTLookup.cs |  295 ++++
 .../Suggest/Tst/TernaryTreeNode.cs              |   78 +
 .../Suggest/UnsortedInputIterator.cs            |  108 ++
 56 files changed, 13543 insertions(+), 681 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
index 2a7b543..b18e17a 100644
--- a/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
+++ b/src/Lucene.Net.Core/Analysis/AnalyzerWrapper.cs
@@ -62,7 +62,7 @@ namespace Lucene.Net.Analysis
         /// </summary>
         /// <param name="fieldName"> Name of the field which is to be analyzed </param>
         /// <returns> Analyzer for the field with the given name.  Assumed to be non-null </returns>
-        protected internal abstract Analyzer GetWrappedAnalyzer(string fieldName);
+        protected abstract Analyzer GetWrappedAnalyzer(string fieldName);
 
         /// <summary>
         /// Wraps / alters the given TokenStreamComponents, taken from the wrapped
@@ -75,7 +75,7 @@ namespace Lucene.Net.Analysis
         /// <param name="components">
         ///          TokenStreamComponents taken from the wrapped Analyzer </param>
         /// <returns> Wrapped / altered TokenStreamComponents. </returns>
-        protected internal virtual TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
+        protected virtual TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
         {
             return components;
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Index/IndexableField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Index/IndexableField.cs b/src/Lucene.Net.Core/Index/IndexableField.cs
index 25846bc..d26c639 100644
--- a/src/Lucene.Net.Core/Index/IndexableField.cs
+++ b/src/Lucene.Net.Core/Index/IndexableField.cs
@@ -1,3 +1,5 @@
+using Lucene.Net.Search.Similarities;
+
 namespace Lucene.Net.Index
 {
     using System.IO;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Search/SearcherManager.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Search/SearcherManager.cs b/src/Lucene.Net.Core/Search/SearcherManager.cs
index 1c2dee5..8c80a43 100644
--- a/src/Lucene.Net.Core/Search/SearcherManager.cs
+++ b/src/Lucene.Net.Core/Search/SearcherManager.cs
@@ -1,4 +1,5 @@
 using System.Diagnostics;
+using System.IO;
 
 namespace Lucene.Net.Search
 {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Core/Util/OfflineSorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/OfflineSorter.cs b/src/Lucene.Net.Core/Util/OfflineSorter.cs
index aefb07c..f966e77 100644
--- a/src/Lucene.Net.Core/Util/OfflineSorter.cs
+++ b/src/Lucene.Net.Core/Util/OfflineSorter.cs
@@ -1,679 +1,679 @@
-//using System;
-//using System.Collections.Generic;
-//using System.Diagnostics;
-//using System.IO;
-//
-//namespace Lucene.Net.Util
-//{
-//    /*
-//     * Licensed to the Apache Software Foundation (ASF) under one or more
-//     * contributor license agreements.  See the NOTICE file distributed with
-//     * this work for additional information regarding copyright ownership.
-//     * The ASF licenses this file to You under the Apache License, Version 2.0
-//     * (the "License"); you may not use this file except in compliance with
-//     * the License.  You may obtain a copy of the License at
-//     *
-//     *     http://www.apache.org/licenses/LICENSE-2.0
-//     *
-//     * Unless required by applicable law or agreed to in writing, software
-//     * distributed under the License is distributed on an "AS IS" BASIS,
-//     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//     * See the License for the specific language governing permissions and
-//     * limitations under the License.
-//     */
-//
-//    /// <summary>
-//    /// On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
-//    /// fields:
-//    /// <ul>
-//    ///   <li>(two bytes) length of the following byte array,
-//    ///   <li>exactly the above count of bytes for the sequence to be sorted.
-//    /// </ul>
-//    /// </summary>
-//    public sealed class OfflineSorter
-//    {
-//        private bool InstanceFieldsInitialized = false;
-//
-//        private void InitializeInstanceFields()
-//        {
-//            Buffer = new BytesRefArray(BufferBytesUsed);
-//        }
-//
-//        /// <summary>
-//        /// Convenience constant for megabytes </summary>
-//        public const long MB = 1024 * 1024;
-//        /// <summary>
-//        /// Convenience constant for gigabytes </summary>
-//        public static readonly long GB = MB * 1024;
-//
-//        /// <summary>
-//        /// Minimum recommended buffer size for sorting.
-//        /// </summary>
-//        public const long MIN_BUFFER_SIZE_MB = 32;
-//
-//        /// <summary>
-//        /// Absolute minimum required buffer size for sorting.
-//        /// </summary>
-//        public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
-//        private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
-//
-//        /// <summary>
-//        /// Maximum number of temporary files before doing an intermediate merge.
-//        /// </summary>
-//        public const int MAX_TEMPFILES = 128;
-//
-//        /// <summary>
-//        /// A bit more descriptive unit for constructors.
-//        /// </summary>
-//        /// <seealso cref= #automatic() </seealso>
-//        /// <seealso cref= #megabytes(long) </seealso>
-//        public sealed class BufferSize
-//        {
-//            internal readonly int Bytes;
-//
-//            internal BufferSize(long bytes)
-//            {
-//                if (bytes > int.MaxValue)
-//                {
-//                    throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
-//                }
-//
-//                if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-//                {
-//                    throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
-//                }
-//
-//                this.Bytes = (int)bytes;
-//            }
-//
-//            /// <summary>
-//            /// Creates a <seealso cref="BufferSize"/> in MB. The given
-//            /// values must be &gt; 0 and &lt; 2048.
-//            /// </summary>
-//            public static BufferSize Megabytes(long mb)
-//            {
-//                return new BufferSize(mb * MB);
-//            }
-//
-//            /// <summary>
-//            /// Approximately half of the currently available free heap, but no less
-//            /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
-//            /// is insufficient or if there is a large portion of unallocated heap-space available
-//            /// for sorting consult with max allowed heap size.
-//            /// </summary>
-//            public static BufferSize Automatic()
-//            {
-//                var proc = Process.GetCurrentProcess();
-//
-//                // take sizes in "conservative" order
-//                long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
-//                long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
-//                long free = rt.freeMemory(); // unused portion of currently allocated
-//                long totalAvailableBytes = max - total + free;
-//
-//                // by free mem (attempting to not grow the heap for this)
-//                long sortBufferByteSize = free / 2;
-//                const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
-//                if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
-//                {
-//                    if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
-//                    {
-//                        sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
-//                    }
-//                    else
-//                    {
-//                        //heap seems smallish lets be conservative fall back to the free/2
-//                        sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
-//                    }
-//                }
-//                return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Sort info (debugging mostly).
-//        /// </summary>
-//        public class SortInfo
-//        {
-//            internal bool InstanceFieldsInitialized = false;
-//
-//            internal virtual void InitializeInstanceFields()
-//            {
-//                BufferSize = OuterInstance.RamBufferSize.Bytes;
-//            }
-//
-//            private readonly OfflineSorter OuterInstance;
-//
-//            /// <summary>
-//            /// number of temporary files created when merging partitions </summary>
-//            public int TempMergeFiles;
-//            /// <summary>
-//            /// number of partition merges </summary>
-//            public int MergeRounds;
-//            /// <summary>
-//            /// number of lines of data read </summary>
-//            public int Lines;
-//            /// <summary>
-//            /// time spent merging sorted partitions (in milliseconds) </summary>
-//            public long MergeTime;
-//            /// <summary>
-//            /// time spent sorting data (in milliseconds) </summary>
-//            public long SortTime;
-//            /// <summary>
-//            /// total time spent (in milliseconds) </summary>
-//            public long TotalTime;
-//            /// <summary>
-//            /// time spent in i/o read (in milliseconds) </summary>
-//            public long ReadTime;
-//            /// <summary>
-//            /// read buffer size (in bytes) </summary>
-//            public long BufferSize;
-//
-//            /// <summary>
-//            /// create a new SortInfo (with empty statistics) for debugging </summary>
-//            public SortInfo(OfflineSorter outerInstance)
-//            {
-//                this.OuterInstance = outerInstance;
-//
-//                if (!InstanceFieldsInitialized)
-//                {
-//                    InitializeInstanceFields();
-//                    InstanceFieldsInitialized = true;
-//                }
-//            }
-//
-//            public override string ToString()
-//            {
-//                return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
-//            }
-//        }
-//
-//        private readonly BufferSize RamBufferSize;
-//
-//        private readonly Counter BufferBytesUsed = Counter.NewCounter();
-//        private BytesRefArray Buffer;
-//        private SortInfo sortInfo;
-//        private readonly int MaxTempFiles;
-//        private readonly IComparer<BytesRef> comparator;
-//
-//        /// <summary>
-//        /// Default comparator: sorts in binary (codepoint) order </summary>
-//        public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
-//
-//        /// <summary>
-//        /// Defaults constructor.
-//        /// </summary>
-//        /// <seealso cref= #defaultTempDir() </seealso>
-//        /// <seealso cref= BufferSize#automatic() </seealso>
-//        public OfflineSorter()
-//            : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-//        {
-//            if (!InstanceFieldsInitialized)
-//            {
-//                InitializeInstanceFields();
-//                InstanceFieldsInitialized = true;
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Defaults constructor with a custom comparator.
-//        /// </summary>
-//        /// <seealso cref= #defaultTempDir() </seealso>
-//        /// <seealso cref= BufferSize#automatic() </seealso>
-//        public OfflineSorter(IComparer<BytesRef> comparator)
-//            : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
-//        {
-//            if (!InstanceFieldsInitialized)
-//            {
-//                InitializeInstanceFields();
-//                InstanceFieldsInitialized = true;
-//            }
-//        }
-//
-//        /// <summary>
-//        /// All-details constructor.
-//        /// </summary>
-//        public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
-//        {
-//            if (!InstanceFieldsInitialized)
-//            {
-//                InitializeInstanceFields();
-//                InstanceFieldsInitialized = true;
-//            }
-//            if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
-//            {
-//                throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
-//            }
-//
-//            if (maxTempfiles < 2)
-//            {
-//                throw new System.ArgumentException("maxTempFiles must be >= 2");
-//            }
-//
-//            this.RamBufferSize = ramBufferSize;
-//            this.MaxTempFiles = maxTempfiles;
-//            this.comparator = comparator;
-//        }
-//
-//        /// <summary>
-//        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
-//        /// memory may deviate from the hint (may be smaller or larger).
-//        /// </summary>
-//        public SortInfo Sort(FileInfo input, FileInfo output)
-//        {
-//            sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
-//
-//            output.Delete();
-//
-//            var merges = new List<FileInfo>();
-//            bool success2 = false;
-//            try
-//            {
-//                var inputStream = new ByteSequencesReader(input);
-//                bool success = false;
-//                try
-//                {
-//                    int lines = 0;
-//                    while ((lines = ReadPartition(inputStream)) > 0)
-//                    {
-//                        merges.Add(SortPartition(lines));
-//                        sortInfo.TempMergeFiles++;
-//                        sortInfo.Lines += lines;
-//
-//                        // Handle intermediate merges.
-//                        if (merges.Count == MaxTempFiles)
-//                        {
-//                            var intermediate = new FileInfo(Path.GetTempFileName());
-//                            try
-//                            {
-//                                MergePartitions(merges, intermediate);
-//                            }
-//                            finally
-//                            {
-//                                foreach (var file in merges)
-//                                {
-//                                    file.Delete();
-//                                }
-//                                merges.Clear();
-//                                merges.Add(intermediate);
-//                            }
-//                            sortInfo.TempMergeFiles++;
-//                        }
-//                    }
-//                    success = true;
-//                }
-//                finally
-//                {
-//                    if (success)
-//                    {
-//                        IOUtils.Close(inputStream);
-//                    }
-//                    else
-//                    {
-//                        IOUtils.CloseWhileHandlingException(inputStream);
-//                    }
-//                }
-//
-//                // One partition, try to rename or copy if unsuccessful.
-//                if (merges.Count == 1)
-//                {
-//                    FileInfo single = merges[0];
-//                    Copy(single, output);
-//                    try
-//                    {
-//                        File.Delete(single.FullName);
-//                    }
-//                    catch (Exception)
-//                    {
-//                        // ignored
-//                    }
-//                }
-//                else
-//                {
-//                    // otherwise merge the partitions with a priority queue.
-//                    MergePartitions(merges, output);
-//                }
-//                success2 = true;
-//            }
-//            finally
-//            {
-//                foreach (FileInfo file in merges)
-//                {
-//                    file.Delete();
-//                }
-//                if (!success2)
-//                {
-//                    output.Delete();
-//                }
-//            }
-//
-//            sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
-//            return sortInfo;
-//        }
-//
-//        /// <summary>
-//        /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
-//        /// or not available, an IOException is thrown
-//        /// </summary>
-//        public static DirectoryInfo DefaultTempDir()
-//        {
-//            return new DirectoryInfo(Path.GetTempPath());
-//        }
-//
-//        /// <summary>
-//        /// Copies one file to another.
-//        /// </summary>
-//        private static void Copy(FileInfo file, FileInfo output)
-//        {
-//            File.Copy(file.FullName, output.FullName);
-//        }
-//
-//        /// <summary>
-//        /// Sort a single partition in-memory. </summary>
-//        internal FileInfo SortPartition(int len)
-//        {
-//            var data = this.Buffer;
-//            var tempFile = new FileInfo(Path.GetTempFileName());
-//            //var tempFile1 = File.Create(new ());
-//            //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
-//
-//            long start = DateTime.Now.Millisecond;
-//            sortInfo.SortTime += (DateTime.Now.Millisecond - start);
-//
-//            var @out = new ByteSequencesWriter(tempFile);
-//            BytesRef spare;
-//            try
-//            {
-//                BytesRefIterator iter = Buffer.Iterator(comparator);
-//                while ((spare = iter.Next()) != null)
-//                {
-//                    Debug.Assert(spare.Length <= short.MaxValue);
-//                    @out.Write(spare);
-//                }
-//
-//                @out.Dispose();
-//
-//                // Clean up the buffer for the next partition.
-//                data.Clear();
-//                return tempFile;
-//            }
-//            finally
-//            {
-//                IOUtils.Close(@out);
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Merge a list of sorted temporary files (partitions) into an output file </summary>
-//        internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
-//        {
-//            long start = DateTime.Now.Millisecond;
-//
-//            var @out = new ByteSequencesWriter(outputFile);
-//
-//            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
-//
-//            var streams = new ByteSequencesReader[merges.Count];
-//            try
-//            {
-//                // Open streams and read the top for each file
-//                for (int i = 0; i < merges.Count; i++)
-//                {
-//                    streams[i] = new ByteSequencesReader(merges[i]);
-//                    sbyte[] line = streams[i].Read();
-//                    if (line != null)
-//                    {
-//                        queue.InsertWithOverflow(new FileAndTop(i, line));
-//                    }
-//                }
-//
-//                // Unix utility sort() uses ordered array of files to pick the next line from, updating
-//                // it as it reads new lines. The PQ used here is a more elegant solution and has
-//                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
-//                // so it shouldn't make much of a difference (didn't check).
-//                FileAndTop top;
-//                while ((top = queue.Top()) != null)
-//                {
-//                    @out.Write(top.Current);
-//                    if (!streams[top.Fd].Read(top.Current))
-//                    {
-//                        queue.Pop();
-//                    }
-//                    else
-//                    {
-//                        queue.UpdateTop();
-//                    }
-//                }
-//
-//                SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
-//                SortInfo.MergeRounds++;
-//            }
-//            finally
-//            {
-//                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
-//                // happening in closing streams.
-//                try
-//                {
-//                    IOUtils.Close(streams);
-//                }
-//                finally
-//                {
-//                    IOUtils.Close(@out);
-//                }
-//            }
-//        }
-//
-//        private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
-//        {
-//            private readonly OfflineSorter OuterInstance;
-//
-//            public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
-//                : base(size)
-//            {
-//                this.OuterInstance = outerInstance;
-//            }
-//
-//            public override bool LessThan(FileAndTop a, FileAndTop b)
-//            {
-//                return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Read in a single partition of data </summary>
-//        internal int ReadPartition(ByteSequencesReader reader)
-//        {
-//            long start = DateTime.Now.Millisecond;
-//            var scratch = new BytesRef();
-//            while ((scratch.Bytes = reader.Read()) != null)
-//            {
-//                scratch.Length = scratch.Bytes.Length;
-//                Buffer.Append(scratch);
-//                // Account for the created objects.
-//                // (buffer slots do not account to buffer size.)
-//                if (RamBufferSize.Bytes < BufferBytesUsed.Get())
-//                {
-//                    break;
-//                }
-//            }
-//            sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
-//            return Buffer.Size();
-//        }
-//
-//        internal class FileAndTop
-//        {
-//            internal readonly int Fd;
-//            internal readonly BytesRef Current;
-//
-//            internal FileAndTop(int fd, sbyte[] firstLine)
-//            {
-//                this.Fd = fd;
-//                this.Current = new BytesRef(firstLine);
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
-//        /// Complementary to <seealso cref="ByteSequencesReader"/>.
-//        /// </summary>
-//        public class ByteSequencesWriter : IDisposable
-//        {
-//            internal readonly DataOutput Os;
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesWriter to the provided File </summary>
-//            public ByteSequencesWriter(FileInfo file)
-//                : this(new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file))))
-//            {
-//            }
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesWriter to the provided DataOutput </summary>
-//            public ByteSequencesWriter(DataOutput os)
-//            {
-//                this.Os = os;
-//            }
-//
-//            /// <summary>
-//            /// Writes a BytesRef. </summary>
-//            /// <seealso cref= #write(byte[], int, int) </seealso>
-//            public virtual void Write(BytesRef @ref)
-//            {
-//                Debug.Assert(@ref != null);
-//                Write(@ref.Bytes, @ref.Offset, @ref.Length);
-//            }
-//
-//            /// <summary>
-//            /// Writes a byte array. </summary>
-//            /// <seealso cref= #write(byte[], int, int) </seealso>
-//            public virtual void Write(sbyte[] bytes)
-//            {
-//                Write(bytes, 0, bytes.Length);
-//            }
-//
-//            /// <summary>
-//            /// Writes a byte array.
-//            /// <p>
-//            /// The length is written as a <code>short</code>, followed
-//            /// by the bytes.
-//            /// </summary>
-//            public virtual void Write(sbyte[] bytes, int off, int len)
-//            {
-//                Debug.Assert(bytes != null);
-//                Debug.Assert(off >= 0 && off + len <= bytes.Length);
-//                Debug.Assert(len >= 0);
-//                Os.WriteShort(len);
-//                Os.Write(bytes, off, len);
-//            }
-//
-//            /// <summary>
-//            /// Closes the provided <seealso cref="DataOutput"/> if it is <seealso cref="IDisposable"/>.
-//            /// </summary>
-//            public void Dispose()
-//            {
-//                var os = Os as IDisposable;
-//                if (os != null)
-//                {
-//                    os.Dispose();
-//                }
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Utility class to read length-prefixed byte[] entries from an input.
-//        /// Complementary to <seealso cref="ByteSequencesWriter"/>.
-//        /// </summary>
-//        public class ByteSequencesReader : IDisposable
-//        {
-//            internal readonly DataInput inputStream;
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesReader from the provided File </summary>
-//            public ByteSequencesReader(FileInfo file)
-//                : this(new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
-//            {
-//            }
-//
-//            /// <summary>
-//            /// Constructs a ByteSequencesReader from the provided DataInput </summary>
-//            public ByteSequencesReader(DataInput inputStream)
-//            {
-//                this.inputStream = inputStream;
-//            }
-//
-//            /// <summary>
-//            /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
-//            /// storage is resized if needed.
-//            /// </summary>
-//            /// <returns> Returns <code>false</code> if EOF occurred when trying to read
-//            /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
-//            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-//            public virtual bool Read(BytesRef @ref)
-//            {
-//                short length;
-//                try
-//                {
-//                    length = inputStream.ReadShort();
-//                }
-//                catch (EOFException)
-//                {
-//                    return false;
-//                }
-//
-//                @ref.Grow(length);
-//                @ref.Offset = 0;
-//                @ref.Length = length;
-//                inputStream.ReadFully(@ref.Bytes, 0, length);
-//                return true;
-//            }
-//
-//            /// <summary>
-//            /// Reads the next entry and returns it if successful.
-//            /// </summary>
-//            /// <seealso cref= #read(BytesRef)
-//            /// </seealso>
-//            /// <returns> Returns <code>null</code> if EOF occurred before the next entry
-//            /// could be read. </returns>
-//            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
-//            public virtual sbyte[] Read()
-//            {
-//                short length;
-//                try
-//                {
-//                    length = inputStream.ReadShort();
-//                }
-//                catch (EOFException e)
-//                {
-//                    return null;
-//                }
-//
-//                Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
-//                sbyte[] result = new sbyte[length];
-//                inputStream.ReadFully(result);
-//                return result;
-//            }
-//
-//            /// <summary>
-//            /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
-//            /// </summary>
-//            public void Dispose()
-//            {
-//                var @is = inputStream as IDisposable;
-//                if (@is != null)
-//                {
-//                    @is.Dispose();
-//                }
-//            }
-//        }
-//
-//        /// <summary>
-//        /// Returns the comparator in use to sort entries </summary>
-//        public IComparer<BytesRef> Comparator
-//        {
-//            get
-//            {
-//                return comparator;
-//            }
-//        }
-//    }
-//}
\ No newline at end of file
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Util
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
+    /// fields:
+    /// <ul>
+    ///   <li>(two bytes) length of the following byte array,
+    ///   <li>exactly the above count of bytes for the sequence to be sorted.
+    /// </ul>
+    /// </summary>
+    public sealed class OfflineSorter
+    {
+        private bool InstanceFieldsInitialized = false;
+
+        private void InitializeInstanceFields()
+        {
+            Buffer = new BytesRefArray(BufferBytesUsed);
+        }
+
+        /// <summary>
+        /// Convenience constant for megabytes </summary>
+        public const long MB = 1024 * 1024;
+        /// <summary>
+        /// Convenience constant for gigabytes </summary>
+        public static readonly long GB = MB * 1024;
+
+        /// <summary>
+        /// Minimum recommended buffer size for sorting.
+        /// </summary>
+        public const long MIN_BUFFER_SIZE_MB = 32;
+
+        /// <summary>
+        /// Absolute minimum required buffer size for sorting.
+        /// </summary>
+        public static readonly long ABSOLUTE_MIN_SORT_BUFFER_SIZE = MB / 2;
+        private const string MIN_BUFFER_SIZE_MSG = "At least 0.5MB RAM buffer is needed";
+
+        /// <summary>
+        /// Maximum number of temporary files before doing an intermediate merge.
+        /// </summary>
+        public const int MAX_TEMPFILES = 128;
+
+        /// <summary>
+        /// A bit more descriptive unit for constructors.
+        /// </summary>
+        /// <seealso cref= #automatic() </seealso>
+        /// <seealso cref= #megabytes(long) </seealso>
+        public sealed class BufferSize
+        {
+            internal readonly int Bytes;
+
+            internal BufferSize(long bytes)
+            {
+                if (bytes > int.MaxValue)
+                {
+                    throw new System.ArgumentException("Buffer too large for Java (" + (int.MaxValue / MB) + "mb max): " + bytes);
+                }
+
+                if (bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+                {
+                    throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + bytes);
+                }
+
+                this.Bytes = (int)bytes;
+            }
+
+            /// <summary>
+            /// Creates a <seealso cref="BufferSize"/> in MB. The given
+            /// values must be &gt; 0 and &lt; 2048.
+            /// </summary>
+            public static BufferSize Megabytes(long mb)
+            {
+                return new BufferSize(mb * MB);
+            }
+
+            /// <summary>
+            /// Approximately half of the currently available free heap, but no less
+            /// than <seealso cref="#ABSOLUTE_MIN_SORT_BUFFER_SIZE"/>. However if current heap allocation
+            /// is insufficient or if there is a large portion of unallocated heap-space available
+            /// for sorting consult with max allowed heap size.
+            /// </summary>
+            public static BufferSize Automatic()
+            {
+                var proc = Process.GetCurrentProcess();
+
+                // take sizes in "conservative" order
+                long max = proc.PeakVirtualMemorySize64; // max allocated; java has it as Runtime.maxMemory();
+                long total = proc.VirtualMemorySize64; // currently allocated; java has it as Runtime.totalMemory();
+                long free = rt.freeMemory(); // unused portion of currently allocated
+                long totalAvailableBytes = max - total + free;
+
+                // by free mem (attempting to not grow the heap for this)
+                long sortBufferByteSize = free / 2;
+                const long minBufferSizeBytes = MIN_BUFFER_SIZE_MB * MB;
+                if (sortBufferByteSize < minBufferSizeBytes || totalAvailableBytes > 10 * minBufferSizeBytes) // lets see if we need/should to grow the heap
+                {
+                    if (totalAvailableBytes / 2 > minBufferSizeBytes) // there is enough mem for a reasonable buffer
+                    {
+                        sortBufferByteSize = totalAvailableBytes / 2; // grow the heap
+                    }
+                    else
+                    {
+                        //heap seems smallish lets be conservative fall back to the free/2
+                        sortBufferByteSize = Math.Max(ABSOLUTE_MIN_SORT_BUFFER_SIZE, sortBufferByteSize);
+                    }
+                }
+                return new BufferSize(Math.Min((long)int.MaxValue, sortBufferByteSize));
+            }
+        }
+
+        /// <summary>
+        /// Sort info (debugging mostly).
+        /// </summary>
+        public class SortInfo
+        {
+            internal bool InstanceFieldsInitialized = false;
+
+            internal virtual void InitializeInstanceFields()
+            {
+                BufferSize = OuterInstance.RamBufferSize.Bytes;
+            }
+
+            private readonly OfflineSorter OuterInstance;
+
+            /// <summary>
+            /// number of temporary files created when merging partitions </summary>
+            public int TempMergeFiles;
+            /// <summary>
+            /// number of partition merges </summary>
+            public int MergeRounds;
+            /// <summary>
+            /// number of lines of data read </summary>
+            public int Lines;
+            /// <summary>
+            /// time spent merging sorted partitions (in milliseconds) </summary>
+            public long MergeTime;
+            /// <summary>
+            /// time spent sorting data (in milliseconds) </summary>
+            public long SortTime;
+            /// <summary>
+            /// total time spent (in milliseconds) </summary>
+            public long TotalTime;
+            /// <summary>
+            /// time spent in i/o read (in milliseconds) </summary>
+            public long ReadTime;
+            /// <summary>
+            /// read buffer size (in bytes) </summary>
+            public long BufferSize;
+
+            /// <summary>
+            /// create a new SortInfo (with empty statistics) for debugging </summary>
+            public SortInfo(OfflineSorter outerInstance)
+            {
+                this.OuterInstance = outerInstance;
+
+                if (!InstanceFieldsInitialized)
+                {
+                    InitializeInstanceFields();
+                    InstanceFieldsInitialized = true;
+                }
+            }
+
+            public override string ToString()
+            {
+                return string.Format("time=%.2f sec. total (%.2f reading, %.2f sorting, %.2f merging), lines=%d, temp files=%d, merges=%d, soft ram limit=%.2f MB", TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 1000.0d, MergeTime / 1000.0d, Lines, TempMergeFiles, MergeRounds, (double)BufferSize / MB);
+            }
+        }
+
+        private readonly BufferSize RamBufferSize;
+
+        private readonly Counter BufferBytesUsed = Counter.NewCounter();
+        private BytesRefArray Buffer;
+        private SortInfo sortInfo;
+        private readonly int MaxTempFiles;
+        private readonly IComparer<BytesRef> comparator;
+
+        /// <summary>
+        /// Default comparator: sorts in binary (codepoint) order </summary>
+        public static readonly IComparer<BytesRef> DEFAULT_COMPARATOR = BytesRef.UTF8SortedAsUnicodeComparator.Instance;
+
+        /// <summary>
+        /// Defaults constructor.
+        /// </summary>
+        /// <seealso cref= #defaultTempDir() </seealso>
+        /// <seealso cref= BufferSize#automatic() </seealso>
+        public OfflineSorter()
+            : this(DEFAULT_COMPARATOR, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+        }
+
+        /// <summary>
+        /// Defaults constructor with a custom comparator.
+        /// </summary>
+        /// <seealso cref= #defaultTempDir() </seealso>
+        /// <seealso cref= BufferSize#automatic() </seealso>
+        public OfflineSorter(IComparer<BytesRef> comparator)
+            : this(comparator, BufferSize.Automatic(), DefaultTempDir(), MAX_TEMPFILES)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+        }
+
+        /// <summary>
+        /// All-details constructor.
+        /// </summary>
+        public OfflineSorter(IComparer<BytesRef> comparator, BufferSize ramBufferSize, /*DirectoryInfo tempDirectory,*/ int maxTempfiles)
+        {
+            if (!InstanceFieldsInitialized)
+            {
+                InitializeInstanceFields();
+                InstanceFieldsInitialized = true;
+            }
+            if (ramBufferSize.Bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE)
+            {
+                throw new System.ArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.Bytes);
+            }
+
+            if (maxTempfiles < 2)
+            {
+                throw new System.ArgumentException("maxTempFiles must be >= 2");
+            }
+
+            this.RamBufferSize = ramBufferSize;
+            this.MaxTempFiles = maxTempfiles;
+            this.comparator = comparator;
+        }
+
+        /// <summary>
+        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
+        /// memory may deviate from the hint (may be smaller or larger).
+        /// </summary>
+        public SortInfo Sort(FileInfo input, FileInfo output)
+        {
+            sortInfo = new SortInfo(this) {TotalTime = DateTime.Now.Millisecond};
+
+            output.Delete();
+
+            var merges = new List<FileInfo>();
+            bool success2 = false;
+            try
+            {
+                var inputStream = new ByteSequencesReader(input);
+                bool success = false;
+                try
+                {
+                    int lines = 0;
+                    while ((lines = ReadPartition(inputStream)) > 0)
+                    {
+                        merges.Add(SortPartition(lines));
+                        sortInfo.TempMergeFiles++;
+                        sortInfo.Lines += lines;
+
+                        // Handle intermediate merges.
+                        if (merges.Count == MaxTempFiles)
+                        {
+                            var intermediate = new FileInfo(Path.GetTempFileName());
+                            try
+                            {
+                                MergePartitions(merges, intermediate);
+                            }
+                            finally
+                            {
+                                foreach (var file in merges)
+                                {
+                                    file.Delete();
+                                }
+                                merges.Clear();
+                                merges.Add(intermediate);
+                            }
+                            sortInfo.TempMergeFiles++;
+                        }
+                    }
+                    success = true;
+                }
+                finally
+                {
+                    if (success)
+                    {
+                        IOUtils.Close(inputStream);
+                    }
+                    else
+                    {
+                        IOUtils.CloseWhileHandlingException(inputStream);
+                    }
+                }
+
+                // One partition, try to rename or copy if unsuccessful.
+                if (merges.Count == 1)
+                {
+                    FileInfo single = merges[0];
+                    Copy(single, output);
+                    try
+                    {
+                        File.Delete(single.FullName);
+                    }
+                    catch (Exception)
+                    {
+                        // ignored
+                    }
+                }
+                else
+                {
+                    // otherwise merge the partitions with a priority queue.
+                    MergePartitions(merges, output);
+                }
+                success2 = true;
+            }
+            finally
+            {
+                foreach (FileInfo file in merges)
+                {
+                    file.Delete();
+                }
+                if (!success2)
+                {
+                    output.Delete();
+                }
+            }
+
+            sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
+            return sortInfo;
+        }
+
+        /// <summary>
+        /// Returns the default temporary directory. By default, the System's temp folder. If not accessible
+        /// or not available, an IOException is thrown
+        /// </summary>
+        public static DirectoryInfo DefaultTempDir()
+        {
+            return new DirectoryInfo(Path.GetTempPath());
+        }
+
+        /// <summary>
+        /// Copies one file to another.
+        /// </summary>
+        private static void Copy(FileInfo file, FileInfo output)
+        {
+            File.Copy(file.FullName, output.FullName);
+        }
+
+        /// <summary>
+        /// Sort a single partition in-memory. </summary>
+        internal FileInfo SortPartition(int len)
+        {
+            var data = this.Buffer;
+            var tempFile = new FileInfo(Path.GetTempFileName());
+            //var tempFile1 = File.Create(new ());
+            //FileInfo tempFile = FileInfo.createTempFile("sort", "partition", TempDirectory);
+
+            long start = DateTime.Now.Millisecond;
+            sortInfo.SortTime += (DateTime.Now.Millisecond - start);
+
+            var @out = new ByteSequencesWriter(tempFile);
+            BytesRef spare;
+            try
+            {
+                BytesRefIterator iter = Buffer.Iterator(comparator);
+                while ((spare = iter.Next()) != null)
+                {
+                    Debug.Assert(spare.Length <= short.MaxValue);
+                    @out.Write(spare);
+                }
+
+                @out.Dispose();
+
+                // Clean up the buffer for the next partition.
+                data.Clear();
+                return tempFile;
+            }
+            finally
+            {
+                IOUtils.Close(@out);
+            }
+        }
+
+        /// <summary>
+        /// Merge a list of sorted temporary files (partitions) into an output file </summary>
+        internal void MergePartitions(IList<FileInfo> merges, FileInfo outputFile)
+        {
+            long start = DateTime.Now.Millisecond;
+
+            var @out = new ByteSequencesWriter(outputFile);
+
+            PriorityQueue<FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);
+
+            var streams = new ByteSequencesReader[merges.Count];
+            try
+            {
+                // Open streams and read the top for each file
+                for (int i = 0; i < merges.Count; i++)
+                {
+                    streams[i] = new ByteSequencesReader(merges[i]);
+                    sbyte[] line = streams[i].Read();
+                    if (line != null)
+                    {
+                        queue.InsertWithOverflow(new FileAndTop(i, line));
+                    }
+                }
+
+                // Unix utility sort() uses ordered array of files to pick the next line from, updating
+                // it as it reads new lines. The PQ used here is a more elegant solution and has
+                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
+                // so it shouldn't make much of a difference (didn't check).
+                FileAndTop top;
+                while ((top = queue.Top()) != null)
+                {
+                    @out.Write(top.Current);
+                    if (!streams[top.Fd].Read(top.Current))
+                    {
+                        queue.Pop();
+                    }
+                    else
+                    {
+                        queue.UpdateTop();
+                    }
+                }
+
+                SortInfo.MergeTime += DateTime.UtcNow.Ticks - start;
+                SortInfo.MergeRounds++;
+            }
+            finally
+            {
+                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
+                // happening in closing streams.
+                try
+                {
+                    IOUtils.Close(streams);
+                }
+                finally
+                {
+                    IOUtils.Close(@out);
+                }
+            }
+        }
+
+        private class PriorityQueueAnonymousInnerClassHelper : PriorityQueue<FileAndTop>
+        {
+            private readonly OfflineSorter OuterInstance;
+
+            public PriorityQueueAnonymousInnerClassHelper(OfflineSorter outerInstance, int size)
+                : base(size)
+            {
+                this.OuterInstance = outerInstance;
+            }
+
+            public override bool LessThan(FileAndTop a, FileAndTop b)
+            {
+                return OuterInstance.comparator.Compare(a.Current, b.Current) < 0;
+            }
+        }
+
+        /// <summary>
+        /// Read in a single partition of data </summary>
+        internal int ReadPartition(ByteSequencesReader reader)
+        {
+            long start = DateTime.Now.Millisecond;
+            var scratch = new BytesRef();
+            while ((scratch.Bytes = reader.Read()) != null)
+            {
+                scratch.Length = scratch.Bytes.Length;
+                Buffer.Append(scratch);
+                // Account for the created objects.
+                // (buffer slots do not account to buffer size.)
+                if (RamBufferSize.Bytes < BufferBytesUsed.Get())
+                {
+                    break;
+                }
+            }
+            sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
+            return Buffer.Size();
+        }
+
+        internal class FileAndTop
+        {
+            internal readonly int Fd;
+            internal readonly BytesRef Current;
+
+            internal FileAndTop(int fd, sbyte[] firstLine)
+            {
+                this.Fd = fd;
+                this.Current = new BytesRef(firstLine);
+            }
+        }
+
+        /// <summary>
+        /// Utility class to emit length-prefixed byte[] entries to an output stream for sorting.
+        /// Complementary to <seealso cref="ByteSequencesReader"/>.
+        /// </summary>
+        public class ByteSequencesWriter : IDisposable
+        {
+            internal readonly DataOutput Os;
+
+            /// <summary>
+            /// Constructs a ByteSequencesWriter to the provided File </summary>
+            public ByteSequencesWriter(FileInfo file)
+                : this(new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file))))
+            {
+            }
+
+            /// <summary>
+            /// Constructs a ByteSequencesWriter to the provided DataOutput </summary>
+            public ByteSequencesWriter(DataOutput os)
+            {
+                this.Os = os;
+            }
+
+            /// <summary>
+            /// Writes a BytesRef. </summary>
+            /// <seealso cref= #write(byte[], int, int) </seealso>
+            public virtual void Write(BytesRef @ref)
+            {
+                Debug.Assert(@ref != null);
+                Write(@ref.Bytes, @ref.Offset, @ref.Length);
+            }
+
+            /// <summary>
+            /// Writes a byte array. </summary>
+            /// <seealso cref= #write(byte[], int, int) </seealso>
+            public virtual void Write(sbyte[] bytes)
+            {
+                Write(bytes, 0, bytes.Length);
+            }
+
+            /// <summary>
+            /// Writes a byte array.
+            /// <p>
+            /// The length is written as a <code>short</code>, followed
+            /// by the bytes.
+            /// </summary>
+            public virtual void Write(sbyte[] bytes, int off, int len)
+            {
+                Debug.Assert(bytes != null);
+                Debug.Assert(off >= 0 && off + len <= bytes.Length);
+                Debug.Assert(len >= 0);
+                Os.WriteShort(len);
+                Os.Write(bytes, off, len);
+            }
+
+            /// <summary>
+            /// Closes the provided <seealso cref="DataOutput"/> if it is <seealso cref="IDisposable"/>.
+            /// </summary>
+            public void Dispose()
+            {
+                var os = Os as IDisposable;
+                if (os != null)
+                {
+                    os.Dispose();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Utility class to read length-prefixed byte[] entries from an input.
+        /// Complementary to <seealso cref="ByteSequencesWriter"/>.
+        /// </summary>
+        public class ByteSequencesReader : IDisposable
+        {
+            internal readonly DataInput inputStream;
+
+            /// <summary>
+            /// Constructs a ByteSequencesReader from the provided File </summary>
+            public ByteSequencesReader(FileInfo file)
+                : this(new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
+            {
+            }
+
+            /// <summary>
+            /// Constructs a ByteSequencesReader from the provided DataInput </summary>
+            public ByteSequencesReader(DataInput inputStream)
+            {
+                this.inputStream = inputStream;
+            }
+
+            /// <summary>
+            /// Reads the next entry into the provided <seealso cref="BytesRef"/>. The internal
+            /// storage is resized if needed.
+            /// </summary>
+            /// <returns> Returns <code>false</code> if EOF occurred when trying to read
+            /// the header of the next sequence. Returns <code>true</code> otherwise. </returns>
+            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+            public virtual bool Read(BytesRef @ref)
+            {
+                short length;
+                try
+                {
+                    length = inputStream.ReadShort();
+                }
+                catch (EOFException)
+                {
+                    return false;
+                }
+
+                @ref.Grow(length);
+                @ref.Offset = 0;
+                @ref.Length = length;
+                inputStream.ReadFully(@ref.Bytes, 0, length);
+                return true;
+            }
+
+            /// <summary>
+            /// Reads the next entry and returns it if successful.
+            /// </summary>
+            /// <seealso cref= #read(BytesRef)
+            /// </seealso>
+            /// <returns> Returns <code>null</code> if EOF occurred before the next entry
+            /// could be read. </returns>
+            /// <exception cref="EOFException"> if the file ends before the full sequence is read. </exception>
+            public virtual sbyte[] Read()
+            {
+                short length;
+                try
+                {
+                    length = inputStream.ReadShort();
+                }
+                catch (EOFException e)
+                {
+                    return null;
+                }
+
+                Debug.Assert(length >= 0, "Sanity: sequence length < 0: " + length);
+                sbyte[] result = new sbyte[length];
+                inputStream.ReadFully(result);
+                return result;
+            }
+
+            /// <summary>
+            /// Closes the provided <seealso cref="DataInput"/> if it is <seealso cref="IDisposable"/>.
+            /// </summary>
+            public void Dispose()
+            {
+                var @is = inputStream as IDisposable;
+                if (@is != null)
+                {
+                    @is.Dispose();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Returns the comparator in use to sort entries </summary>
+        public IComparer<BytesRef> Comparator
+        {
+            get
+            {
+                return comparator;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj b/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
new file mode 100644
index 0000000..9b86c9c
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Lucene.Net.Suggest.csproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{FBD2EB4D-EAC9-409C-A23D-64D27DF23576}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Search</RootNamespace>
+    <AssemblyName>Lucene.Net.Suggest</AssemblyName>
+    <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <TargetFrameworkProfile />
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Spell\CombineSuggestion.cs" />
+    <Compile Include="Spell\Dictionary.cs" />
+    <Compile Include="Spell\DirectSpellChecker.cs" />
+    <Compile Include="Spell\HighFrequencyDictionary.cs" />
+    <Compile Include="Spell\JaroWinklerDistance.cs" />
+    <Compile Include="Spell\LevensteinDistance.cs" />
+    <Compile Include="Spell\LuceneDictionary.cs" />
+    <Compile Include="Spell\LuceneLevenshteinDistance.cs" />
+    <Compile Include="Spell\NGramDistance.cs" />
+    <Compile Include="Spell\PlainTextDictionary.cs" />
+    <Compile Include="Spell\SpellChecker.cs" />
+    <Compile Include="Spell\StringDistance.cs" />
+    <Compile Include="Spell\SuggestMode.cs" />
+    <Compile Include="Spell\SuggestWord.cs" />
+    <Compile Include="Spell\SuggestWordFrequencyComparator.cs" />
+    <Compile Include="Spell\SuggestWordQueue.cs" />
+    <Compile Include="Spell\SuggestWordScoreComparator.cs" />
+    <Compile Include="Spell\TermFreqIterator.cs" />
+    <Compile Include="Spell\WordBreakSpellChecker.cs" />
+    <Compile Include="Suggest\Analyzing\AnalyzingInfixSuggester.cs" />
+    <Compile Include="Suggest\Analyzing\AnalyzingSuggester.cs" />
+    <Compile Include="Suggest\Analyzing\BlendedInfixSuggester.cs" />
+    <Compile Include="Suggest\Analyzing\FreeTextSuggester.cs" />
+    <Compile Include="Suggest\Analyzing\FSTUtil.cs" />
+    <Compile Include="Suggest\Analyzing\FuzzySuggester.cs" />
+    <Compile Include="Suggest\Analyzing\SuggestStopFilter.cs" />
+    <Compile Include="Suggest\BufferedInputIterator.cs" />
+    <Compile Include="Suggest\BufferingTermFreqIteratorWrapper.cs" />
+    <Compile Include="Suggest\DocumentDictionary.cs" />
+    <Compile Include="Suggest\DocumentValueSourceDictionary.cs" />
+    <Compile Include="Suggest\FileDictionary.cs" />
+    <Compile Include="Suggest\Fst\BytesRefSorter.cs" />
+    <Compile Include="Suggest\Fst\ExternalRefSorter.cs" />
+    <Compile Include="Suggest\Fst\FSTCompletion.cs" />
+    <Compile Include="Suggest\Fst\FSTCompletionBuilder.cs" />
+    <Compile Include="Suggest\Fst\FSTCompletionLookup.cs" />
+    <Compile Include="Suggest\Fst\WFSTCompletionLookup.cs" />
+    <Compile Include="Suggest\InMemorySorter.cs" />
+    <Compile Include="Suggest\InputIterator.cs" />
+    <Compile Include="Suggest\Jaspell\JaspellLookup.cs" />
+    <Compile Include="Suggest\Jaspell\JaspellTernarySearchTrie.cs" />
+    <Compile Include="Suggest\Lookup.cs" />
+    <Compile Include="Suggest\SortedInputIterator.cs" />
+    <Compile Include="Suggest\SortedTermFreqIteratorWrapper.cs" />
+    <Compile Include="Suggest\Tst\TernaryTreeNode.cs" />
+    <Compile Include="Suggest\Tst\TSTAutocomplete.cs" />
+    <Compile Include="Suggest\Tst\TSTLookup.cs" />
+    <Compile Include="Suggest\UnsortedInputIterator.cs" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup />
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs b/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..55818e7
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following 
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Suggest")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Suggest")]
+[assembly: AssemblyCopyright("Copyright ©  2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible 
+// to COM components.  If you need to access a type in this assembly from 
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("0e1499f7-850e-4583-8994-623eb2480200")]
+
+// Version information for an assembly consists of the following four values:
+//
+//      Major Version
+//      Minor Version 
+//      Build Number
+//      Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers 
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/RectangularArrays.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/RectangularArrays.cs b/src/Lucene.Net.Suggest/RectangularArrays.cs
new file mode 100644
index 0000000..f0cb588
--- /dev/null
+++ b/src/Lucene.Net.Suggest/RectangularArrays.cs
@@ -0,0 +1,29 @@
+//----------------------------------------------------------------------------------------
+//	Copyright © 2007 - 2014 Tangible Software Solutions Inc.
+//	This class can be used by anyone provided that the copyright notice remains intact.
+//
+//	This class provides the logic to simulate Java rectangular arrays, which are jagged
+//	arrays with inner arrays of the same length. A size of -1 indicates unknown length.
+//----------------------------------------------------------------------------------------
+internal static partial class RectangularArrays
+{
+    internal static int[][] ReturnRectangularIntArray(int Size1, int Size2)
+    {
+        int[][] Array;
+        if (Size1 > -1)
+        {
+            Array = new int[Size1][];
+            if (Size2 > -1)
+            {
+                for (int Array1 = 0; Array1 < Size1; Array1++)
+                {
+                    Array[Array1] = new int[Size2];
+                }
+            }
+        }
+        else
+            Array = null;
+
+        return Array;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs b/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
new file mode 100644
index 0000000..de534d5
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/CombineSuggestion.cs
@@ -0,0 +1,47 @@
+namespace Lucene.Net.Search.Spell
+{
+
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// <para>A suggestion generated by combining one or more original query terms</para>
+    /// </summary>
+    public class CombineSuggestion
+    {
+        /// <summary>
+        /// <para>The indexes from the passed-in array of terms used to make this word combination</para>
+        /// </summary>
+        public readonly int[] originalTermIndexes;
+
+        /// <summary>
+        /// <para>The word combination suggestion</para>
+        /// </summary>
+        public readonly SuggestWord suggestion;
+
+        /// <summary>
+        /// Creates a new CombineSuggestion from a <code>suggestion</code> and
+        /// an array of term ids (referencing the indexes to the original terms that
+        /// form this combined suggestion)
+        /// </summary>
+        public CombineSuggestion(SuggestWord suggestion, int[] originalTermIndexes)
+        {
+            this.suggestion = suggestion;
+            this.originalTermIndexes = originalTermIndexes;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/0ebac726/src/Lucene.Net.Suggest/Spell/Dictionary.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Suggest/Spell/Dictionary.cs b/src/Lucene.Net.Suggest/Spell/Dictionary.cs
new file mode 100644
index 0000000..e5d91ce
--- /dev/null
+++ b/src/Lucene.Net.Suggest/Spell/Dictionary.cs
@@ -0,0 +1,35 @@
+using Lucene.Net.Search.Suggest;
+
+namespace Lucene.Net.Search.Spell
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    /// <summary>
+    /// A simple interface representing a Dictionary. A Dictionary
+    /// here is a list of entries, where every entry consists of
+    /// term, weight and payload.
+    /// 
+    /// </summary>
+    public interface Dictionary
+    {
+
+        /// <summary>
+        /// Returns an iterator over all the entries </summary>
+        /// <returns> Iterator </returns>
+        InputIterator EntryIterator { get; }
+    }
+}
\ No newline at end of file


Mime
View raw message