lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [20/72] [abbrv] [partial] lucenenet git commit: Lucene.Net.Tests: Removed \core directory and put its contents in root directory
Date Sun, 26 Feb 2017 23:37:08 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
new file mode 100644
index 0000000..706987e
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestPostingsOffsets.cs
@@ -0,0 +1,580 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Lucene.Net.Documents;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Index
+{
+    using Lucene.Net.Randomized.Generators;
+    using NUnit.Framework;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using Analyzer = Lucene.Net.Analysis.Analyzer;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using CannedTokenStream = Lucene.Net.Analysis.CannedTokenStream;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using English = Lucene.Net.Util.English;
+    using Field = Field;
+    using FieldType = FieldType;
+    using Int32Field = Int32Field;
+    using IOUtils = Lucene.Net.Util.IOUtils;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+    using MockPayloadAnalyzer = Lucene.Net.Analysis.MockPayloadAnalyzer;
+    using StringField = StringField;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+    using TextField = TextField;
+    using Token = Lucene.Net.Analysis.Token;
+    using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+    // TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
+    // not all codecs store prx separate...
+    // TODO: fix sep codec to index offsets so we can greatly reduce this list!
+    [SuppressCodecs("Lucene3x", "MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom")]
+    [TestFixture]
+    public class TestPostingsOffsets : LuceneTestCase
+    {
+        internal IndexWriterConfig Iwc;
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
+        }
+
+        [Test]
+        public virtual void TestBasic()
+        {
+            Directory dir = NewDirectory();
+
+            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+            Document doc = new Document();
+
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+            if (Random().NextBoolean())
+            {
+                ft.StoreTermVectors = true;
+                ft.StoreTermVectorPositions = Random().NextBoolean();
+                ft.StoreTermVectorOffsets = Random().NextBoolean();
+            }
+            Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) };
+            doc.Add(new Field("content", new CannedTokenStream(tokens), ft));
+
+            w.AddDocument(doc);
+            IndexReader r = w.Reader;
+            w.Dispose();
+
+            DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a"));
+            Assert.IsNotNull(dp);
+            Assert.AreEqual(0, dp.NextDoc());
+            Assert.AreEqual(2, dp.Freq);
+            Assert.AreEqual(0, dp.NextPosition());
+            Assert.AreEqual(0, dp.StartOffset);
+            Assert.AreEqual(6, dp.EndOffset);
+            Assert.AreEqual(2, dp.NextPosition());
+            Assert.AreEqual(9, dp.StartOffset);
+            Assert.AreEqual(17, dp.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b"));
+            Assert.IsNotNull(dp);
+            Assert.AreEqual(0, dp.NextDoc());
+            Assert.AreEqual(1, dp.Freq);
+            Assert.AreEqual(1, dp.NextPosition());
+            Assert.AreEqual(8, dp.StartOffset);
+            Assert.AreEqual(9, dp.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+            dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c"));
+            Assert.IsNotNull(dp);
+            Assert.AreEqual(0, dp.NextDoc());
+            Assert.AreEqual(1, dp.Freq);
+            Assert.AreEqual(3, dp.NextPosition());
+            Assert.AreEqual(19, dp.StartOffset);
+            Assert.AreEqual(50, dp.EndOffset);
+            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestSkipping()
+        {
+            DoTestNumbers(false);
+        }
+
+        [Test]
+        public virtual void TestPayloads()
+        {
+            DoTestNumbers(true);
+        }
+
+        public virtual void DoTestNumbers(bool withPayloads)
+        {
+            Directory dir = NewDirectory();
+            Analyzer analyzer = withPayloads ? (Analyzer)new MockPayloadAnalyzer() : new MockAnalyzer(Random());
+            Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+            Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
+            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+
+            FieldType ft = new FieldType(TextField.TYPE_STORED);
+            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+            if (Random().NextBoolean())
+            {
+                ft.StoreTermVectors = true;
+                ft.StoreTermVectorOffsets = Random().NextBoolean();
+                ft.StoreTermVectorPositions = Random().NextBoolean();
+            }
+
+            int numDocs = AtLeast(500);
+            for (int i = 0; i < numDocs; i++)
+            {
+                Document doc = new Document();
+                doc.Add(new Field("numbers", English.IntToEnglish(i), ft));
+                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
+                doc.Add(new StringField("id", "" + i, Field.Store.NO));
+                w.AddDocument(doc);
+            }
+
+            IndexReader reader = w.Reader;
+            w.Dispose();
+
+            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };
+
+            foreach (string term in terms)
+            {
+                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
+                int doc;
+                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                {
+                    string storedNumbers = reader.Document(doc).Get("numbers");
+                    int freq = dp.Freq;
+                    for (int i = 0; i < freq; i++)
+                    {
+                        dp.NextPosition();
+                        int start = dp.StartOffset;
+                        Debug.Assert(start >= 0);
+                        int end = dp.EndOffset;
+                        Debug.Assert(end >= 0 && end >= start);
+                        // check that the offsets correspond to the term in the src text
+                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term));
+                        if (withPayloads)
+                        {
+                            // check that we have a payload and it starts with "pos"
+                            Assert.IsNotNull(dp.GetPayload());
+                            BytesRef payload = dp.GetPayload();
+                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
+                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+                    }
+                }
+            }
+
+            // check we can skip correctly
+            int numSkippingTests = AtLeast(50);
+
+            for (int j = 0; j < numSkippingTests; j++)
+            {
+                int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999));
+                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
+                int doc = dp.Advance(num);
+                Assert.AreEqual(num, doc);
+                int freq = dp.Freq;
+                for (int i = 0; i < freq; i++)
+                {
+                    string storedNumbers = reader.Document(doc).Get("numbers");
+                    dp.NextPosition();
+                    int start = dp.StartOffset;
+                    Debug.Assert(start >= 0);
+                    int end = dp.EndOffset;
+                    Debug.Assert(end >= 0 && end >= start);
+                    // check that the offsets correspond to the term in the src text
+                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred"));
+                    if (withPayloads)
+                    {
+                        // check that we have a payload and it starts with "pos"
+                        Assert.IsNotNull(dp.GetPayload());
+                        BytesRef payload = dp.GetPayload();
+                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:"));
+                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+                }
+            }
+
+            // check that other fields (without offsets) work correctly
+
+            for (int i = 0; i < numDocs; i++)
+            {
+                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
+                Assert.AreEqual(i, dp.NextDoc());
+                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
+            }
+
+            reader.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestRandom()
+        {
+            // token -> docID -> tokens
+            IDictionary<string, IDictionary<int?, IList<Token>>> actualTokens = new Dictionary<string, IDictionary<int?, IList<Token>>>();
+
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc);
+
+            int numDocs = AtLeast(20);
+            //final int numDocs = AtLeast(5);
+
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+
+            // TODO: randomize what IndexOptions we use; also test
+            // changing this up in one IW buffered segment...:
+            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+            if (Random().NextBoolean())
+            {
+                ft.StoreTermVectors = true;
+                ft.StoreTermVectorOffsets = Random().NextBoolean();
+                ft.StoreTermVectorPositions = Random().NextBoolean();
+            }
+
+            for (int docCount = 0; docCount < numDocs; docCount++)
+            {
+                Document doc = new Document();
+                doc.Add(new Int32Field("id", docCount, Field.Store.NO));
+                IList<Token> tokens = new List<Token>();
+                int numTokens = AtLeast(100);
+                //final int numTokens = AtLeast(20);
+                int pos = -1;
+                int offset = 0;
+                //System.out.println("doc id=" + docCount);
+                for (int tokenCount = 0; tokenCount < numTokens; tokenCount++)
+                {
+                    string text;
+                    if (Random().NextBoolean())
+                    {
+                        text = "a";
+                    }
+                    else if (Random().NextBoolean())
+                    {
+                        text = "b";
+                    }
+                    else if (Random().NextBoolean())
+                    {
+                        text = "c";
+                    }
+                    else
+                    {
+                        text = "d";
+                    }
+
+                    int posIncr = Random().NextBoolean() ? 1 : Random().Next(5);
+                    if (tokenCount == 0 && posIncr == 0)
+                    {
+                        posIncr = 1;
+                    }
+                    int offIncr = Random().NextBoolean() ? 0 : Random().Next(5);
+                    int tokenOffset = Random().Next(5);
+
+                    Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset);
+                    if (!actualTokens.ContainsKey(text))
+                    {
+                        actualTokens[text] = new Dictionary<int?, IList<Token>>();
+                    }
+                    IDictionary<int?, IList<Token>> postingsByDoc = actualTokens[text];
+                    if (!postingsByDoc.ContainsKey(docCount))
+                    {
+                        postingsByDoc[docCount] = new List<Token>();
+                    }
+                    postingsByDoc[docCount].Add(token);
+                    tokens.Add(token);
+                    pos += posIncr;
+                    // stuff abs position into type:
+                    token.Type = "" + pos;
+                    offset += offIncr + tokenOffset;
+                    //System.out.println("  " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset + "/" + token.EndOffset + " (freq=" + postingsByDoc.Get(docCount).Size() + ")");
+                }
+                doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft));
+                w.AddDocument(doc);
+            }
+            DirectoryReader r = w.Reader;
+            w.Dispose();
+
+            string[] terms = new string[] { "a", "b", "c", "d" };
+            foreach (AtomicReaderContext ctx in r.Leaves)
+            {
+                // TODO: improve this
+                AtomicReader sub = (AtomicReader)ctx.Reader;
+                //System.out.println("\nsub=" + sub);
+                TermsEnum termsEnum = sub.Fields.GetTerms("content").GetIterator(null);
+                DocsEnum docs = null;
+                DocsAndPositionsEnum docsAndPositions = null;
+                DocsAndPositionsEnum docsAndPositionsAndOffsets = null;
+                FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(sub, "id", false);
+                foreach (string term in terms)
+                {
+                    //System.out.println("  term=" + term);
+                    if (termsEnum.SeekExact(new BytesRef(term)))
+                    {
+                        docs = termsEnum.Docs(null, docs);
+                        Assert.IsNotNull(docs);
+                        int doc;
+                        //System.out.println("    doc/freq");
+                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                        {
+                            IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+                            //System.out.println("      doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq");
+                            Assert.IsNotNull(expected);
+                            Assert.AreEqual(expected.Count, docs.Freq);
+                        }
+
+                        // explicitly exclude offsets here
+                        docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS);
+                        Assert.IsNotNull(docsAndPositions);
+                        //System.out.println("    doc/freq/pos");
+                        while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                        {
+                            IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
+                            Assert.IsNotNull(expected);
+                            Assert.AreEqual(expected.Count, docsAndPositions.Freq);
+                            foreach (Token token in expected)
+                            {
+                                int pos = Convert.ToInt32(token.Type);
+                                //System.out.println("        pos=" + pos);
+                                Assert.AreEqual(pos, docsAndPositions.NextPosition());
+                            }
+                        }
+
+                        docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions);
+                        Assert.IsNotNull(docsAndPositionsAndOffsets);
+                        //System.out.println("    doc/freq/pos/offs");
+                        while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                        {
+                            IList<Token> expected = actualTokens[term][docIDToID.Get(doc)];
+                            //System.out.println("      doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq");
+                            Assert.IsNotNull(expected);
+                            Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq);
+                            foreach (Token token in expected)
+                            {
+                                int pos = Convert.ToInt32(token.Type);
+                                //System.out.println("        pos=" + pos);
+                                Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition());
+                                Assert.AreEqual(token.StartOffset, docsAndPositionsAndOffsets.StartOffset);
+                                Assert.AreEqual(token.EndOffset, docsAndPositionsAndOffsets.EndOffset);
+                            }
+                        }
+                    }
+                }
+                // TODO: test advance:
+            }
+            r.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestWithUnindexedFields()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc);
+            for (int i = 0; i < 100; i++)
+            {
+                Document doc = new Document();
+                // ensure at least one doc is indexed with offsets
+                if (i < 99 && Random().Next(2) == 0)
+                {
+                    // stored only
+                    FieldType ft = new FieldType();
+                    ft.IsIndexed = false;
+                    ft.IsStored = true;
+                    doc.Add(new Field("foo", "boo!", ft));
+                }
+                else
+                {
+                    FieldType ft = new FieldType(TextField.TYPE_STORED);
+                    ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+                    if (Random().NextBoolean())
+                    {
+                        // store some term vectors for the checkindex cross-check
+                        ft.StoreTermVectors = true;
+                        ft.StoreTermVectorPositions = true;
+                        ft.StoreTermVectorOffsets = true;
+                    }
+                    doc.Add(new Field("foo", "bar", ft));
+                }
+                riw.AddDocument(doc);
+            }
+            CompositeReader ir = riw.Reader;
+            AtomicReader slow = SlowCompositeReaderWrapper.Wrap(ir);
+            FieldInfos fis = slow.FieldInfos;
+            Assert.AreEqual(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, fis.FieldInfo("foo").IndexOptions);
+            slow.Dispose();
+            ir.Dispose();
+            riw.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestAddFieldTwice()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            FieldType customType3 = new FieldType(TextField.TYPE_STORED);
+            customType3.StoreTermVectors = true;
+            customType3.StoreTermVectorPositions = true;
+            customType3.StoreTermVectorOffsets = true;
+            customType3.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+            doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
+            doc.Add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
+            iw.AddDocument(doc);
+            iw.Dispose();
+            dir.Dispose(); // checkindex
+        }
+
+        // NOTE: the next two tests aren't that good as we need an EvilToken...
+        [Test]
+        public virtual void TestNegativeOffsets()
+        {
+            try
+            {
+                CheckTokens(new Token[] { MakeToken("foo", 1, -1, -1) });
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.ArgumentException expected)
+#pragma warning restore 168
+            {
+                //expected
+            }
+        }
+
+        [Test]
+        public virtual void TestIllegalOffsets()
+        {
+            try
+            {
+                CheckTokens(new Token[] { MakeToken("foo", 1, 1, 0) });
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.ArgumentException expected)
+#pragma warning restore 168
+            {
+                //expected
+            }
+        }
+
+        [Test]
+        public virtual void TestBackwardsOffsets()
+        {
+            try
+            {
+                CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 1, 4, 7), MakeToken("foo", 0, 3, 6) });
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.ArgumentException expected)
+#pragma warning restore 168
+            {
+                // expected
+            }
+        }
+
+        [Test]
+        public virtual void TestStackedTokens()
+        {
+            CheckTokens(new Token[] { MakeToken("foo", 1, 0, 3), MakeToken("foo", 0, 0, 3), MakeToken("foo", 0, 0, 3) });
+        }
+
+        [Test]
+        public virtual void TestLegalbutVeryLargeOffsets()
+        {
+            Directory dir = NewDirectory();
+            IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
+            Document doc = new Document();
+            Token t1 = new Token("foo", 0, int.MaxValue - 500);
+            if (Random().NextBoolean())
+            {
+                t1.Payload = new BytesRef("test");
+            }
+            Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue);
+            TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 });
+            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+            // store some term vectors for the checkindex cross-check
+            ft.StoreTermVectors = true;
+            ft.StoreTermVectorPositions = true;
+            ft.StoreTermVectorOffsets = true;
+            Field field = new Field("foo", tokenStream, ft);
+            doc.Add(field);
+            iw.AddDocument(doc);
+            iw.Dispose();
+            dir.Dispose();
+        }
+
+        // TODO: more tests with other possibilities
+
+        private void CheckTokens(Token[] tokens)
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Iwc);
+            bool success = false;
+            try
+            {
+                FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+                ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+                // store some term vectors for the checkindex cross-check
+                ft.StoreTermVectors = true;
+                ft.StoreTermVectorPositions = true;
+                ft.StoreTermVectorOffsets = true;
+
+                Document doc = new Document();
+                doc.Add(new Field("body", new CannedTokenStream(tokens), ft));
+                riw.AddDocument(doc);
+                success = true;
+            }
+            finally
+            {
+                if (success)
+                {
+                    IOUtils.Close(riw, dir);
+                }
+                else
+                {
+                    IOUtils.CloseWhileHandlingException(riw, dir);
+                }
+            }
+        }
+
+        private Token MakeToken(string text, int posIncr, int startOffset, int endOffset)
+        {
+            Token t = new Token();
+            t.Append(text);
+            t.PositionIncrement = posIncr;
+            t.SetOffset(startOffset, endOffset);
+            return t;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
new file mode 100644
index 0000000..031d5c0
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs
@@ -0,0 +1,142 @@
+using Lucene.Net.Support;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+    using Lucene.Net.Util;
+    using NUnit.Framework;
+
+    /*
+             * Licensed to the Apache Software Foundation (ASF) under one or more
+             * contributor license agreements.  See the NOTICE file distributed with
+             * this work for additional information regarding copyright ownership.
+             * The ASF licenses this file to You under the Apache License, Version 2.0
+             * (the "License"); you may not use this file except in compliance with
+             * the License.  You may obtain a copy of the License at
+             *
+             *     http://www.apache.org/licenses/LICENSE-2.0
+             *
+             * Unless required by applicable law or agreed to in writing, software
+             * distributed under the License is distributed on an "AS IS" BASIS,
+             * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+             * See the License for the specific language governing permissions and
+             * limitations under the License.
+             */
+
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+    //using MergedIterator = Lucene.Net.Util.MergedIterator;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+
+    [TestFixture]
+    public class TestPrefixCodedTerms : LuceneTestCase
+    {
+        [Test]
+        public virtual void TestEmpty()
+        {
+            PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+            PrefixCodedTerms pb = b.Finish();
+            Assert.IsFalse(pb.GetEnumerator().MoveNext());
+        }
+
+        [Test]
+        public virtual void TestOne()
+        {
+            Term term = new Term("foo", "bogus");
+            PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+            b.Add(term);
+            PrefixCodedTerms pb = b.Finish();
+            IEnumerator<Term> iterator = pb.GetEnumerator();
+            Assert.IsTrue(iterator.MoveNext());
+            Assert.AreEqual(term, iterator.Current);
+        }
+
+        [Test]
+        public virtual void TestRandom()
+        {
+            SortedSet<Term> terms = new SortedSet<Term>();
+            int nterms = AtLeast(10000);
+            for (int i = 0; i < nterms; i++)
+            {
+                Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random()));
+                terms.Add(term);
+            }
+
+            PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+            foreach (Term @ref in terms)
+            {
+                b.Add(@ref);
+            }
+            PrefixCodedTerms pb = b.Finish();
+
+            IEnumerator<Term> expected = terms.GetEnumerator();
+            foreach (Term t in pb)
+            {
+                Assert.IsTrue(expected.MoveNext());
+                Assert.AreEqual(expected.Current, t);
+            }
+            Assert.IsFalse(expected.MoveNext());
+        }
+
+        [Test]
+        public virtual void TestMergeOne()
+        {
+            Term t1 = new Term("foo", "a");
+            PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
+            b1.Add(t1);
+            PrefixCodedTerms pb1 = b1.Finish();
+
+            Term t2 = new Term("foo", "b");
+            PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
+            b2.Add(t2);
+            PrefixCodedTerms pb2 = b2.Finish();
+
+            IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator());
+            Assert.IsTrue(merged.MoveNext());
+            Assert.AreEqual(t1, merged.Current);
+            Assert.IsTrue(merged.MoveNext());
+            Assert.AreEqual(t2, merged.Current);
+        }
+
+        [Test]
+        public virtual void TestMergeRandom()
+        {
+            PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt(Random(), 2, 10)];
+            SortedSet<Term> superSet = new SortedSet<Term>();
+
+            for (int i = 0; i < pb.Length; i++)
+            {
+                SortedSet<Term> terms = new SortedSet<Term>();
+                int nterms = TestUtil.NextInt(Random(), 0, 10000);
+                for (int j = 0; j < nterms; j++)
+                {
+                    Term term = new Term(TestUtil.RandomUnicodeString(Random(), 2), TestUtil.RandomUnicodeString(Random(), 4));
+                    terms.Add(term);
+                }
+                superSet.AddAll(terms);
+
+                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
+                foreach (Term @ref in terms)
+                {
+                    b.Add(@ref);
+                }
+                pb[i] = b.Finish();
+            }
+
+            List<IEnumerator<Term>> subs = new List<IEnumerator<Term>>();
+            for (int i = 0; i < pb.Length; i++)
+            {
+                subs.Add(pb[i].GetEnumerator());
+            }
+
+            IEnumerator<Term> expected = superSet.GetEnumerator();
+            IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray());
+            while (actual.MoveNext())
+            {
+                Assert.IsTrue(expected.MoveNext());
+                Assert.AreEqual(expected.Current, actual.Current);
+            }
+            Assert.IsFalse(expected.MoveNext());
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestReaderClosed.cs b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
new file mode 100644
index 0000000..99df942
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestReaderClosed.cs
@@ -0,0 +1,118 @@
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+    using Directory = Lucene.Net.Store.Directory;
+    using Document = Documents.Document;
+    using Field = Field;
+    using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+    using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
+    using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+
+    [TestFixture]
+    public class TestReaderClosed : LuceneTestCase
+    {
+        private IndexReader Reader;
+        private Directory Dir;
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            Dir = NewDirectory();
+            RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)));
+
+            Document doc = new Document();
+            Field field = NewStringField("field", "", Field.Store.NO);
+            doc.Add(field);
+
+            // we generate aweful prefixes: good for testing.
+            // but for preflex codec, the test can be very slow, so use less iterations.
+            int num = AtLeast(10);
+            for (int i = 0; i < num; i++)
+            {
+                field.SetStringValue(TestUtil.RandomUnicodeString(Random(), 10));
+                writer.AddDocument(doc);
+            }
+            Reader = writer.Reader;
+            writer.Dispose();
+        }
+
+        [Test]
+        public virtual void Test()
+        {
+            Assert.IsTrue(Reader.RefCount > 0);
+            IndexSearcher searcher = NewSearcher(Reader);
+            TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true);
+            searcher.Search(query, 5);
+            Reader.Dispose();
+            try
+            {
+                searcher.Search(query, 5);
+            }
+#pragma warning disable 168
+            catch (AlreadyClosedException ace)
+#pragma warning restore 168
+            {
+                // expected
+            }
+        }
+
+        // LUCENE-3800
+        [Test]
+        public virtual void TestReaderChaining()
+        {
+            Assert.IsTrue(Reader.RefCount > 0);
+            IndexReader wrappedReader = SlowCompositeReaderWrapper.Wrap(Reader);
+            wrappedReader = new ParallelAtomicReader((AtomicReader)wrappedReader);
+
+            IndexSearcher searcher = NewSearcher(wrappedReader);
+            TermRangeQuery query = TermRangeQuery.NewStringRange("field", "a", "z", true, true);
+            searcher.Search(query, 5);
+            Reader.Dispose(); // close original child reader
+            try
+            {
+                searcher.Search(query, 5);
+            }
+            catch (AlreadyClosedException ace)
+            {
+                Assert.AreEqual("this IndexReader cannot be used anymore as one of its child readers was closed", ace.Message);
+            }
+            finally
+            {
+                // shutdown executor: in case of wrap-wrap-wrapping
+                searcher.IndexReader.Dispose();
+            }
+        }
+
+        [TearDown]
+        public override void TearDown()
+        {
+            Dir.Dispose();
+            base.TearDown();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollback.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestRollback.cs b/src/Lucene.Net.Tests/Index/TestRollback.cs
new file mode 100644
index 0000000..f613e47
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestRollback.cs
@@ -0,0 +1,67 @@
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using Directory = Lucene.Net.Store.Directory;
+    using Document = Documents.Document;
+    using Field = Field;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+
+    [TestFixture]
+    public class TestRollback : LuceneTestCase
+    {
+        // LUCENE-2536
+        [Test]
+        public virtual void TestRollbackIntegrityWithBufferFlush()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter rw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            for (int i = 0; i < 5; i++)
+            {
+                Document doc = new Document();
+                doc.Add(NewStringField("pk", Convert.ToString(i), Field.Store.YES));
+                rw.AddDocument(doc);
+            }
+            rw.Dispose();
+
+            // If buffer size is small enough to cause a flush, errors ensue...
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2).SetOpenMode(OpenMode.APPEND));
+
+            for (int i = 0; i < 3; i++)
+            {
+                Document doc = new Document();
+                string value = Convert.ToString(i);
+                doc.Add(NewStringField("pk", value, Field.Store.YES));
+                doc.Add(NewStringField("text", "foo", Field.Store.YES));
+                w.UpdateDocument(new Term("pk", value), doc);
+            }
+            w.Rollback();
+
+            IndexReader r = DirectoryReader.Open(dir);
+            Assert.AreEqual(5, r.NumDocs, "index should contain same number of docs post rollback");
+            r.Dispose();
+            dir.Dispose();
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
new file mode 100644
index 0000000..8989662
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestRollingUpdates.cs
@@ -0,0 +1,285 @@
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    using Codecs.Memory;
+    //using MemoryPostingsFormat = Lucene.Net.Codecs.memory.MemoryPostingsFormat;
+
+    using Lucene.Net.Randomized.Generators;
+    using Lucene.Net.Store;
+    using Lucene.Net.Support;
+    using Lucene.Net.Util;
+    using NUnit.Framework;
+    using Codec = Lucene.Net.Codecs.Codec;
+    using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+    using TermQuery = Lucene.Net.Search.TermQuery;
+    using TopDocs = Lucene.Net.Search.TopDocs;
+
+    [TestFixture]
+    public class TestRollingUpdates : LuceneTestCase
+    {
+        // Just updates the same set of N docs over and over, to
+        // stress out deletions
+
+        [Test]
+        public virtual void TestRollingUpdates_Mem()
+        {
+            Random random = new Random(Random().Next());
+            BaseDirectoryWrapper dir = NewDirectory();
+            LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues());
+
+            //provider.register(new MemoryCodec());
+            if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean())
+            {
+                Codec.Default =
+                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(Random().nextBoolean(), random.NextFloat()));
+            }
+
+            MockAnalyzer analyzer = new MockAnalyzer(Random());
+            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
+
+            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+            int SIZE = AtLeast(20);
+            int id = 0;
+            IndexReader r = null;
+            IndexSearcher s = null;
+            int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble())));
+            if (VERBOSE)
+            {
+                Console.WriteLine("TEST: numUpdates=" + numUpdates);
+            }
+            int updateCount = 0;
+            // TODO: sometimes update ids not in order...
+            for (int docIter = 0; docIter < numUpdates; docIter++)
+            {
+                Documents.Document doc = docs.NextDoc();
+                string myID = "" + id;
+                if (id == SIZE - 1)
+                {
+                    id = 0;
+                }
+                else
+                {
+                    id++;
+                }
+                if (VERBOSE)
+                {
+                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
+                }
+                ((Field)doc.GetField("docid")).SetStringValue(myID);
+
+                Term idTerm = new Term("docid", myID);
+
+                bool doUpdate;
+                if (s != null && updateCount < SIZE)
+                {
+                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
+                    Assert.AreEqual(1, hits.TotalHits);
+                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
+                    if (VERBOSE)
+                    {
+                        if (doUpdate)
+                        {
+                            Console.WriteLine("  tryDeleteDocument failed");
+                        }
+                        else
+                        {
+                            Console.WriteLine("  tryDeleteDocument succeeded");
+                        }
+                    }
+                }
+                else
+                {
+                    doUpdate = true;
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("  no searcher: doUpdate=true");
+                    }
+                }
+
+                updateCount++;
+
+                if (doUpdate)
+                {
+                    w.UpdateDocument(idTerm, doc);
+                }
+                else
+                {
+                    w.AddDocument(doc);
+                }
+
+                if (docIter >= SIZE && Random().Next(50) == 17)
+                {
+                    if (r != null)
+                    {
+                        r.Dispose();
+                    }
+
+                    bool applyDeletions = Random().NextBoolean();
+
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
+                    }
+
+                    r = w.GetReader(applyDeletions);
+                    if (applyDeletions)
+                    {
+                        s = NewSearcher(r);
+                    }
+                    else
+                    {
+                        s = null;
+                    }
+                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
+                    updateCount = 0;
+                }
+            }
+
+            if (r != null)
+            {
+                r.Dispose();
+            }
+
+            w.Commit();
+            Assert.AreEqual(SIZE, w.NumDocs);
+
+            w.Dispose();
+
+            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");
+
+            docs.Dispose();
+
+            // LUCENE-4455:
+            SegmentInfos infos = new SegmentInfos();
+            infos.Read(dir);
+            long totalBytes = 0;
+            foreach (SegmentCommitInfo sipc in infos.Segments)
+            {
+                totalBytes += sipc.SizeInBytes();
+            }
+            long totalBytes2 = 0;
+            foreach (string fileName in dir.ListAll())
+            {
+                if (!fileName.StartsWith(IndexFileNames.SEGMENTS))
+                {
+                    totalBytes2 += dir.FileLength(fileName);
+                }
+            }
+            Assert.AreEqual(totalBytes2, totalBytes);
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestUpdateSameDoc()
+        {
+            Directory dir = NewDirectory();
+
+            LineFileDocs docs = new LineFileDocs(Random());
+            for (int r = 0; r < 3; r++)
+            {
+                IndexWriter w = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
+                int numUpdates = AtLeast(20);
+                int numThreads = TestUtil.NextInt(Random(), 2, 6);
+                IndexingThread[] threads = new IndexingThread[numThreads];
+                for (int i = 0; i < numThreads; i++)
+                {
+                    threads[i] = new IndexingThread(docs, w, numUpdates, NewStringField);
+                    threads[i].Start();
+                }
+
+                for (int i = 0; i < numThreads; i++)
+                {
+                    threads[i].Join();
+                }
+
+                w.Dispose();
+            }
+
+            IndexReader open = DirectoryReader.Open(dir);
+            Assert.AreEqual(1, open.NumDocs);
+            open.Dispose();
+            docs.Dispose();
+            dir.Dispose();
+        }
+
+        internal class IndexingThread : ThreadClass
+        {
+            internal readonly LineFileDocs Docs;
+            internal readonly IndexWriter Writer;
+            internal readonly int Num;
+
+            private readonly Func<string, string, Field.Store, Field> NewStringField;
+
+            /// <param name="newStringField">
+            /// LUCENENET specific
+            /// Passed in because <see cref="LuceneTestCase.NewStringField(string, string, Field.Store)"/>
+            /// is no longer static.
+            /// </param>
+            public IndexingThread(LineFileDocs docs, IndexWriter writer, int num, Func<string, string, Field.Store, Field> newStringField)
+                : base()
+            {
+                this.Docs = docs;
+                this.Writer = writer;
+                this.Num = num;
+                NewStringField = newStringField;
+            }
+
+            public override void Run()
+            {
+                try
+                {
+                    DirectoryReader open = null;
+                    for (int i = 0; i < Num; i++)
+                    {
+                        Documents.Document doc = new Documents.Document(); // docs.NextDoc();
+                        doc.Add(NewStringField("id", "test", Field.Store.NO));
+                        Writer.UpdateDocument(new Term("id", "test"), doc);
+                        if (Random().Next(3) == 0)
+                        {
+                            if (open == null)
+                            {
+                                open = DirectoryReader.Open(Writer, true);
+                            }
+                            DirectoryReader reader = DirectoryReader.OpenIfChanged(open);
+                            if (reader != null)
+                            {
+                                open.Dispose();
+                                open = reader;
+                            }
+                            Assert.AreEqual(1, open.NumDocs, "iter: " + i + " numDocs: " + open.NumDocs + " del: " + open.NumDeletedDocs + " max: " + open.MaxDoc);
+                        }
+                    }
+                    if (open != null)
+                    {
+                        open.Dispose();
+                    }
+                }
+                catch (Exception e)
+                {
+                    throw new Exception(e.Message, e);
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
new file mode 100644
index 0000000..ca9637a
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSameTokenSamePosition.cs
@@ -0,0 +1,110 @@
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Documents;
+using NUnit.Framework;
+
+namespace Lucene.Net.Index
+{
+    using Directory = Lucene.Net.Store.Directory;
+    using Document = Documents.Document;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using TextField = TextField;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+    [TestFixture]
+    public class TestSameTokenSamePosition : LuceneTestCase
+    {
+        /// <summary>
+        /// Attempt to reproduce an assertion error that happens
+        /// only with the trunk version around April 2011.
+        /// </summary>
+        [Test]
+        public virtual void Test()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            Document doc = new Document();
+            doc.Add(new TextField("eng", new BugReproTokenStream()));
+            riw.AddDocument(doc);
+            riw.Dispose();
+            dir.Dispose();
+        }
+
+        /// <summary>
+        /// Same as the above, but with more docs
+        /// </summary>
+        [Test]
+        public virtual void TestMoreDocs()
+        {
+            Directory dir = NewDirectory();
+            RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
+            for (int i = 0; i < 100; i++)
+            {
+                Document doc = new Document();
+                doc.Add(new TextField("eng", new BugReproTokenStream()));
+                riw.AddDocument(doc);
+            }
+            riw.Dispose();
+            dir.Dispose();
+        }
+    }
+
+    internal sealed class BugReproTokenStream : TokenStream
+    {
+        private readonly ICharTermAttribute TermAtt;
+        private readonly IOffsetAttribute OffsetAtt;
+        private readonly IPositionIncrementAttribute PosIncAtt;
+        private readonly int TokenCount = 4;
+        private int NextTokenIndex = 0;
+        private readonly string[] Terms = new string[] { "six", "six", "drunken", "drunken" };
+        private readonly int[] Starts = new int[] { 0, 0, 4, 4 };
+        private readonly int[] Ends = new int[] { 3, 3, 11, 11 };
+        private readonly int[] Incs = new int[] { 1, 0, 1, 0 };
+
+        public BugReproTokenStream()
+        {
+            TermAtt = AddAttribute<ICharTermAttribute>();
+            OffsetAtt = AddAttribute<IOffsetAttribute>();
+            PosIncAtt = AddAttribute<IPositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (NextTokenIndex < TokenCount)
+            {
+                TermAtt.SetEmpty().Append(Terms[NextTokenIndex]);
+                OffsetAtt.SetOffset(Starts[NextTokenIndex], Ends[NextTokenIndex]);
+                PosIncAtt.PositionIncrement = Incs[NextTokenIndex];
+                NextTokenIndex++;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+
+        public override void Reset()
+        {
+            base.Reset();
+            this.NextTokenIndex = 0;
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
new file mode 100644
index 0000000..30786b5
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentMerger.cs
@@ -0,0 +1,207 @@
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using Codec = Lucene.Net.Codecs.Codec;
+    using Constants = Lucene.Net.Util.Constants;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using FixedBitSet = Lucene.Net.Util.FixedBitSet;
+    using InfoStream = Lucene.Net.Util.InfoStream;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+
+    [TestFixture]
+    public class TestSegmentMerger : LuceneTestCase
+    {
+        //The variables for the new merged segment
+        private Directory MergedDir;
+
+        private string MergedSegment = "test";
+
+        //First segment to be merged
+        private Directory Merge1Dir;
+
+        private Document Doc1;
+        private SegmentReader Reader1;
+
+        //Second Segment to be merged
+        private Directory Merge2Dir;
+
+        private Document Doc2;
+        private SegmentReader Reader2;
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            this.Doc1 = new Document();
+            this.Doc2 = new Document();
+            MergedDir = NewDirectory();
+            Merge1Dir = NewDirectory();
+            Merge2Dir = NewDirectory();
+            DocHelper.SetupDoc(Doc1);
+            SegmentCommitInfo info1 = DocHelper.WriteDoc(Random(), Merge1Dir, Doc1);
+            DocHelper.SetupDoc(Doc2);
+            SegmentCommitInfo info2 = DocHelper.WriteDoc(Random(), Merge2Dir, Doc2);
+            Reader1 = new SegmentReader(info1, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+            Reader2 = new SegmentReader(info2, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+        }
+
+        [TearDown]
+        public override void TearDown()
+        {
+            Reader1.Dispose();
+            Reader2.Dispose();
+            MergedDir.Dispose();
+            Merge1Dir.Dispose();
+            Merge2Dir.Dispose();
+            base.TearDown();
+        }
+
+        [Test]
+        public virtual void Test()
+        {
+            Assert.IsTrue(MergedDir != null);
+            Assert.IsTrue(Merge1Dir != null);
+            Assert.IsTrue(Merge2Dir != null);
+            Assert.IsTrue(Reader1 != null);
+            Assert.IsTrue(Reader2 != null);
+        }
+
+        [Test]
+        public virtual void TestMerge()
+        {
+            Codec codec = Codec.Default;
+            SegmentInfo si = new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, -1, false, codec, null);
+
+            SegmentMerger merger = new SegmentMerger(Arrays.AsList<AtomicReader>(Reader1, Reader2), si, InfoStream.Default, MergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, CheckAbort.NONE, new FieldInfos.FieldNumbers(), NewIOContext(Random()), true);
+            MergeState mergeState = merger.Merge();
+            int docsMerged = mergeState.SegmentInfo.DocCount;
+            Assert.IsTrue(docsMerged == 2);
+            //Should be able to open a new SegmentReader against the new directory
+            SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo(new SegmentInfo(MergedDir, Constants.LUCENE_MAIN_VERSION, MergedSegment, docsMerged, false, codec, null), 0, -1L, -1L), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));
+            Assert.IsTrue(mergedReader != null);
+            Assert.IsTrue(mergedReader.NumDocs == 2);
+            Document newDoc1 = mergedReader.Document(0);
+            Assert.IsTrue(newDoc1 != null);
+            //There are 2 unstored fields on the document
+            Assert.IsTrue(DocHelper.NumFields(newDoc1) == DocHelper.NumFields(Doc1) - DocHelper.Unstored.Count);
+            Document newDoc2 = mergedReader.Document(1);
+            Assert.IsTrue(newDoc2 != null);
+            Assert.IsTrue(DocHelper.NumFields(newDoc2) == DocHelper.NumFields(Doc2) - DocHelper.Unstored.Count);
+
+            DocsEnum termDocs = TestUtil.Docs(Random(), mergedReader, DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(mergedReader), null, 0);
+            Assert.IsTrue(termDocs != null);
+            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+            int tvCount = 0;
+            foreach (FieldInfo fieldInfo in mergedReader.FieldInfos)
+            {
+                if (fieldInfo.HasVectors)
+                {
+                    tvCount++;
+                }
+            }
+
+            //System.out.println("stored size: " + stored.Size());
+            Assert.AreEqual(3, tvCount, "We do not have 3 fields that were indexed with term vector");
+
+            Terms vector = mergedReader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);
+            Assert.IsNotNull(vector);
+            Assert.AreEqual(3, vector.Count);
+            TermsEnum termsEnum = vector.GetIterator(null);
+
+            int i = 0;
+            while (termsEnum.Next() != null)
+            {
+                string term = termsEnum.Term.Utf8ToString();
+                int freq = (int)termsEnum.TotalTermFreq;
+                //System.out.println("Term: " + term + " Freq: " + freq);
+                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
+                Assert.IsTrue(DocHelper.FIELD_2_FREQS[i] == freq);
+                i++;
+            }
+
+            TestSegmentReader.CheckNorms(mergedReader);
+            mergedReader.Dispose();
+        }
+
+        private static bool Equals(MergeState.DocMap map1, MergeState.DocMap map2)
+        {
+            if (map1.MaxDoc != map2.MaxDoc)
+            {
+                return false;
+            }
+            for (int i = 0; i < map1.MaxDoc; ++i)
+            {
+                if (map1.Get(i) != map2.Get(i))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        [Test]
+        public virtual void TestBuildDocMap()
+        {
+            int maxDoc = TestUtil.NextInt(Random(), 1, 128);
+            int numDocs = TestUtil.NextInt(Random(), 0, maxDoc);
+            int numDeletedDocs = maxDoc - numDocs;
+            FixedBitSet liveDocs = new FixedBitSet(maxDoc);
+            for (int i = 0; i < numDocs; ++i)
+            {
+                while (true)
+                {
+                    int docID = Random().Next(maxDoc);
+                    if (!liveDocs.Get(docID))
+                    {
+                        liveDocs.Set(docID);
+                        break;
+                    }
+                }
+            }
+
+            MergeState.DocMap docMap = MergeState.DocMap.Build(maxDoc, liveDocs);
+
+            Assert.AreEqual(maxDoc, docMap.MaxDoc);
+            Assert.AreEqual(numDocs, docMap.NumDocs);
+            Assert.AreEqual(numDeletedDocs, docMap.NumDeletedDocs);
+            // assert the mapping is compact
+            for (int i = 0, del = 0; i < maxDoc; ++i)
+            {
+                if (!liveDocs.Get(i))
+                {
+                    Assert.AreEqual(-1, docMap.Get(i));
+                    ++del;
+                }
+                else
+                {
+                    Assert.AreEqual(i - del, docMap.Get(i));
+                }
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentReader.cs b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
new file mode 100644
index 0000000..b98287d
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentReader.cs
@@ -0,0 +1,277 @@
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using Document = Documents.Document;
+    using IOContext = Lucene.Net.Store.IOContext;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+
+    [TestFixture]
+    public class TestSegmentReader : LuceneTestCase
+    {
+        private Directory Dir;
+        private Document TestDoc;
+        private SegmentReader Reader;
+
+        //TODO: Setup the reader w/ multiple documents
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            Dir = NewDirectory();
+            TestDoc = new Document();
+            DocHelper.SetupDoc(TestDoc);
+            SegmentCommitInfo info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
+            Reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.READ);
+        }
+
+        [TearDown]
+        public override void TearDown()
+        {
+            Reader.Dispose();
+            Dir.Dispose();
+            base.TearDown();
+        }
+
+        [Test]
+        public virtual void Test()
+        {
+            Assert.IsTrue(Dir != null);
+            Assert.IsTrue(Reader != null);
+            Assert.IsTrue(DocHelper.NameValues.Count > 0);
+            Assert.IsTrue(DocHelper.NumFields(TestDoc) == DocHelper.All.Count);
+        }
+
+        [Test]
+        public virtual void TestDocument()
+        {
+            Assert.IsTrue(Reader.NumDocs == 1);
+            Assert.IsTrue(Reader.MaxDoc >= 1);
+            Document result = Reader.Document(0);
+            Assert.IsTrue(result != null);
+            //There are 2 unstored fields on the document that are not preserved across writing
+            Assert.IsTrue(DocHelper.NumFields(result) == DocHelper.NumFields(TestDoc) - DocHelper.Unstored.Count);
+
+            IList<IIndexableField> fields = result.Fields;
+            foreach (IIndexableField field in fields)
+            {
+                Assert.IsTrue(field != null);
+                Assert.IsTrue(DocHelper.NameValues.ContainsKey(field.Name));
+            }
+        }
+
+        [Test]
+        public virtual void TestGetFieldNameVariations()
+        {
+            ICollection<string> allFieldNames = new HashSet<string>();
+            ICollection<string> indexedFieldNames = new HashSet<string>();
+            ICollection<string> notIndexedFieldNames = new HashSet<string>();
+            ICollection<string> tvFieldNames = new HashSet<string>();
+            ICollection<string> noTVFieldNames = new HashSet<string>();
+
+            foreach (FieldInfo fieldInfo in Reader.FieldInfos)
+            {
+                string name = fieldInfo.Name;
+                allFieldNames.Add(name);
+                if (fieldInfo.IsIndexed)
+                {
+                    indexedFieldNames.Add(name);
+                }
+                else
+                {
+                    notIndexedFieldNames.Add(name);
+                }
+                if (fieldInfo.HasVectors)
+                {
+                    tvFieldNames.Add(name);
+                }
+                else if (fieldInfo.IsIndexed)
+                {
+                    noTVFieldNames.Add(name);
+                }
+            }
+
+            Assert.IsTrue(allFieldNames.Count == DocHelper.All.Count);
+            foreach (string s in allFieldNames)
+            {
+                Assert.IsTrue(DocHelper.NameValues.ContainsKey(s) == true || s.Equals(""));
+            }
+
+            Assert.IsTrue(indexedFieldNames.Count == DocHelper.Indexed.Count);
+            foreach (string s in indexedFieldNames)
+            {
+                Assert.IsTrue(DocHelper.Indexed.ContainsKey(s) == true || s.Equals(""));
+            }
+
+            Assert.IsTrue(notIndexedFieldNames.Count == DocHelper.Unindexed.Count);
+            //Get all indexed fields that are storing term vectors
+            Assert.IsTrue(tvFieldNames.Count == DocHelper.Termvector.Count);
+
+            Assert.IsTrue(noTVFieldNames.Count == DocHelper.Notermvector.Count);
+        }
+
+        [Test]
+        public virtual void TestTerms()
+        {
+            Fields fields = MultiFields.GetFields(Reader);
+            foreach (string field in fields)
+            {
+                Terms terms = fields.GetTerms(field);
+                Assert.IsNotNull(terms);
+                TermsEnum termsEnum = terms.GetIterator(null);
+                while (termsEnum.Next() != null)
+                {
+                    BytesRef term = termsEnum.Term;
+                    Assert.IsTrue(term != null);
+                    string fieldValue = (string)DocHelper.NameValues[field];
+                    Assert.IsTrue(fieldValue.IndexOf(term.Utf8ToString()) != -1);
+                }
+            }
+
+            DocsEnum termDocs = TestUtil.Docs(Random(), Reader, DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"), MultiFields.GetLiveDocs(Reader), null, 0);
+            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+            termDocs = TestUtil.Docs(Random(), Reader, DocHelper.NO_NORMS_KEY, new BytesRef(DocHelper.NO_NORMS_TEXT), MultiFields.GetLiveDocs(Reader), null, 0);
+
+            Assert.IsTrue(termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+
+            DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(Reader, MultiFields.GetLiveDocs(Reader), DocHelper.TEXT_FIELD_1_KEY, new BytesRef("field"));
+            // NOTE: prior rev of this test was failing to first
+            // call next here:
+            Assert.IsTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.IsTrue(positions.DocID == 0);
+            Assert.IsTrue(positions.NextPosition() >= 0);
+        }
+
+        [Test]
+        public virtual void TestNorms()
+        {
+            //TODO: Not sure how these work/should be tested
+            /*
+                try {
+                  byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY);
+                  System.out.println("Norms: " + norms);
+                  Assert.IsTrue(norms != null);
+                } catch (IOException e) {
+                  e.printStackTrace();
+                  Assert.IsTrue(false);
+                }
+            */
+
+            CheckNorms(Reader);
+        }
+
+        public static void CheckNorms(AtomicReader reader)
+        {
+            // test omit norms
+            for (int i = 0; i < DocHelper.Fields.Length; i++)
+            {
+                IIndexableField f = DocHelper.Fields[i];
+                if (f.FieldType.IsIndexed)
+                {
+                    Assert.AreEqual(reader.GetNormValues(f.Name) != null, !f.FieldType.OmitNorms);
+                    Assert.AreEqual(reader.GetNormValues(f.Name) != null, !DocHelper.NoNorms.ContainsKey(f.Name));
+                    if (reader.GetNormValues(f.Name) == null)
+                    {
+                        // test for norms of null
+                        NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name);
+                        Assert.IsNull(norms);
+                    }
+                }
+            }
+        }
+
+        [Test]
+        public virtual void TestTermVectors()
+        {
+            Terms result = Reader.GetTermVectors(0).GetTerms(DocHelper.TEXT_FIELD_2_KEY);
+            Assert.IsNotNull(result);
+            Assert.AreEqual(3, result.Count);
+            TermsEnum termsEnum = result.GetIterator(null);
+            while (termsEnum.Next() != null)
+            {
+                string term = termsEnum.Term.Utf8ToString();
+                int freq = (int)termsEnum.TotalTermFreq;
+                Assert.IsTrue(DocHelper.FIELD_2_TEXT.IndexOf(term) != -1);
+                Assert.IsTrue(freq > 0);
+            }
+
+            Fields results = Reader.GetTermVectors(0);
+            Assert.IsTrue(results != null);
+            Assert.AreEqual(3, results.Count, "We do not have 3 term freq vectors");
+        }
+
+        [Test]
+        public virtual void TestOutOfBoundsAccess()
+        {
+            int numDocs = Reader.MaxDoc;
+            try
+            {
+                Reader.Document(-1);
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+            {
+            }
+
+            try
+            {
+                Reader.GetTermVectors(-1);
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+            {
+            }
+
+            try
+            {
+                Reader.Document(numDocs);
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+            {
+            }
+
+            try
+            {
+                Reader.GetTermVectors(numDocs);
+                Assert.Fail();
+            }
+#pragma warning disable 168
+            catch (System.IndexOutOfRangeException expected)
+#pragma warning restore 168
+            {
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
new file mode 100644
index 0000000..f876774
--- /dev/null
+++ b/src/Lucene.Net.Tests/Index/TestSegmentTermDocs.cs
@@ -0,0 +1,274 @@
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+    using NUnit.Framework;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using Directory = Lucene.Net.Store.Directory;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Document = Documents.Document;
+    using Field = Field;
+    using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
+
+    /*
+         * Licensed to the Apache Software Foundation (ASF) under one or more
+         * contributor license agreements.  See the NOTICE file distributed with
+         * this work for additional information regarding copyright ownership.
+         * The ASF licenses this file to You under the Apache License, Version 2.0
+         * (the "License"); you may not use this file except in compliance with
+         * the License.  You may obtain a copy of the License at
+         *
+         *     http://www.apache.org/licenses/LICENSE-2.0
+         *
+         * Unless required by applicable law or agreed to in writing, software
+         * distributed under the License is distributed on an "AS IS" BASIS,
+         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+         * See the License for the specific language governing permissions and
+         * limitations under the License.
+         */
+
+    using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer;
+    using TestUtil = Lucene.Net.Util.TestUtil;
+
+    [TestFixture]
+    public class TestSegmentTermDocs : LuceneTestCase
+    {
+        private Document TestDoc;
+        private Directory Dir;
+        private SegmentCommitInfo Info;
+
+        [SetUp]
+        public override void SetUp()
+        {
+            base.SetUp();
+            TestDoc = new Document();
+            Dir = NewDirectory();
+            DocHelper.SetupDoc(TestDoc);
+            Info = DocHelper.WriteDoc(Random(), Dir, TestDoc);
+        }
+
+        [TearDown]
+        public override void TearDown()
+        {
+            Dir.Dispose();
+            base.TearDown();
+        }
+
+        [Test]
+        public virtual void Test()
+        {
+            Assert.IsTrue(Dir != null);
+        }
+
+        [Test]
+        public virtual void TestTermDocs()
+        {
+            TestTermDocs(1);
+        }
+
+        public virtual void TestTermDocs(int indexDivisor)
+        {
+            //After adding the document, we should be able to read it back in
+            SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+            Assert.IsTrue(reader != null);
+            Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor);
+
+            TermsEnum terms = reader.Fields.GetTerms(DocHelper.TEXT_FIELD_2_KEY).GetIterator(null);
+            terms.SeekCeil(new BytesRef("field"));
+            DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS);
+            if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
+            {
+                int docId = termDocs.DocID;
+                Assert.IsTrue(docId == 0);
+                int freq = termDocs.Freq;
+                Assert.IsTrue(freq == 3);
+            }
+            reader.Dispose();
+        }
+
+        [Test]
+        public virtual void TestBadSeek()
+        {
+            TestBadSeek(1);
+        }
+
+        public virtual void TestBadSeek(int indexDivisor)
+        {
+            {
+                //After adding the document, we should be able to read it back in
+                SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+                Assert.IsTrue(reader != null);
+                DocsEnum termDocs = TestUtil.Docs(Random(), reader, "textField2", new BytesRef("bad"), reader.LiveDocs, null, 0);
+
+                Assert.IsNull(termDocs);
+                reader.Dispose();
+            }
+            {
+                //After adding the document, we should be able to read it back in
+                SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random()));
+                Assert.IsTrue(reader != null);
+                DocsEnum termDocs = TestUtil.Docs(Random(), reader, "junk", new BytesRef("bad"), reader.LiveDocs, null, 0);
+                Assert.IsNull(termDocs);
+                reader.Dispose();
+            }
+        }
+
+        [Test]
+        public virtual void TestSkipTo()
+        {
+            TestSkipTo(1);
+        }
+
+        public virtual void TestSkipTo(int indexDivisor)
+        {
+            Directory dir = NewDirectory();
+            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+
+            Term ta = new Term("content", "aaa");
+            for (int i = 0; i < 10; i++)
+            {
+                AddDoc(writer, "aaa aaa aaa aaa");
+            }
+
+            Term tb = new Term("content", "bbb");
+            for (int i = 0; i < 16; i++)
+            {
+                AddDoc(writer, "bbb bbb bbb bbb");
+            }
+
+            Term tc = new Term("content", "ccc");
+            for (int i = 0; i < 50; i++)
+            {
+                AddDoc(writer, "ccc ccc ccc ccc");
+            }
+
+            // assure that we deal with a single segment
+            writer.ForceMerge(1);
+            writer.Dispose();
+
+            IndexReader reader = DirectoryReader.Open(dir, indexDivisor);
+
+            DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+            // without optimization (assumption skipInterval == 16)
+
+            // with next
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(0, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(1, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(2, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(4, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(9, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);
+
+            // without next
+            tdocs = TestUtil.Docs(Random(), reader, ta.Field, new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0);
+
+            Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(0, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(4, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(9, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS);
+
+            // exactly skipInterval documents and therefore with optimization
+
+            // with next
+            tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(10, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(11, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(12, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(15, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(24, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(25, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);
+
+            // without next
+            tdocs = TestUtil.Docs(Random(), reader, tb.Field, new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(10, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(15, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(24, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(25, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS);
+
+            // much more than skipInterval documents and therefore with optimization
+
+            // with next
+            tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS);
+
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(26, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(27, tdocs.DocID);
+            Assert.AreEqual(4, tdocs.Freq);
+            Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(28, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(40, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(57, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(74, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(75, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);
+
+            //without next
+            tdocs = TestUtil.Docs(Random(), reader, tc.Field, new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0);
+            Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(26, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(40, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(57, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(74, tdocs.DocID);
+            Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS);
+            Assert.AreEqual(75, tdocs.DocID);
+            Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS);
+
+            reader.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public virtual void TestIndexDivisor()
+        {
+            TestDoc = new Document();
+            DocHelper.SetupDoc(TestDoc);
+            DocHelper.WriteDoc(Random(), Dir, TestDoc);
+            TestTermDocs(2);
+            TestBadSeek(2);
+            TestSkipTo(2);
+        }
+
+        private void AddDoc(IndexWriter writer, string value)
+        {
+            Document doc = new Document();
+            doc.Add(NewTextField("content", value, Field.Store.NO));
+            writer.AddDocument(doc);
+        }
+    }
+}
\ No newline at end of file


Mime
View raw message