lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [30/58] [abbrv] lucenenet git commit: WIP on Grouping
Date Thu, 10 Nov 2016 11:33:41 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
new file mode 100644
index 0000000..2a0b307
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AllGroupHeadsCollectorTest.cs
@@ -0,0 +1,718 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using static Lucene.Net.Index.FieldInfo;
+using Lucene.Net.Search.Grouping.Terms;
+using System.Collections;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Search.Grouping
+{
+    public class AllGroupHeadsCollectorTest : LuceneTestCase
+    {
+        private static readonly DocValuesType_e[] vts = new DocValuesType_e[]{
+            DocValuesType_e.BINARY, DocValuesType_e.SORTED
+        };
+
+        [Test]
+        public void TestBasic()
+        {
+            string groupField = "author";
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(
+                Random(),
+            dir,
+            NewIndexWriterConfig(TEST_VERSION_CURRENT,
+                new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+            bool canUseIDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+            DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];
+
+            // 0
+            Document doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "random text", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "1", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "1", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 1
+            doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "some more random text blob", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "2", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "2", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 2
+            doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "some more random textual data", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "3", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "3", Field.Store.NO));
+            w.AddDocument(doc);
+            w.Commit(); // To ensure a second segment
+
+            // 3
+            doc = new Document();
+            AddGroupField(doc, groupField, "author2", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "some random text", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "4", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "4", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 4
+            doc = new Document();
+            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "some more random text", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "5", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "5", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 5
+            doc = new Document();
+            AddGroupField(doc, groupField, "author3", canUseIDV, valueType);
+            doc.Add(NewTextField("content", "random blob", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 6 -- no author field
+            doc = new Document();
+            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "6", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "6", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 7 -- no author field
+            doc = new Document();
+            doc.Add(NewTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+            doc.Add(NewStringField("id_1", "7", Field.Store.NO));
+            doc.Add(NewStringField("id_2", "7", Field.Store.NO));
+            w.AddDocument(doc);
+
+            IndexReader reader = w.Reader;
+            IndexSearcher indexSearcher = NewSearcher(reader);
+
+            w.Dispose();
+            int maxDoc = reader.MaxDoc;
+
+            Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type_e.INT, true));
+            var allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupHeadsCollector);
+            assertTrue(ArrayContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+            assertTrue(OpenBitSetContains(new int[] { 2, 3, 4 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
+            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupHeadsCollector);
+            assertTrue(ArrayContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+            assertTrue(OpenBitSetContains(new int[] { 1, 5 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+            // STRING sort type triggers different implementation
+            Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type_e.STRING, true));
+            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+            assertTrue(ArrayContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+            assertTrue(OpenBitSetContains(new int[] { 2, 3, 5, 7 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+            Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type_e.STRING, false));
+            allGroupHeadsCollector = CreateRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
+            // 7 b/c higher doc id wins, even if order of field is in not in reverse.
+            assertTrue(ArrayContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads()));
+            assertTrue(OpenBitSetContains(new int[] { 0, 3, 4, 6 }, allGroupHeadsCollector.RetrieveGroupHeads(maxDoc), maxDoc));
+
+            indexSearcher.IndexReader.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void TestRandom()
+        {
+            int numberOfRuns = TestUtil.NextInt(Random(), 3, 6);
+            for (int iter = 0; iter < numberOfRuns; iter++)
+            {
+                if (VERBOSE)
+                {
+                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
+                }
+
+                int numDocs = TestUtil.NextInt(Random(), 100, 1000) * RANDOM_MULTIPLIER;
+                int numGroups = TestUtil.NextInt(Random(), 1, numDocs);
+
+                if (VERBOSE)
+                {
+                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
+                }
+
+                List<BytesRef> groups = new List<BytesRef>();
+                for (int i = 0; i < numGroups; i++)
+                {
+                    string randomValue;
+                    do
+                    {
+                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
+                        // For that reason we don't generate empty string groups.
+                        randomValue = TestUtil.RandomRealisticUnicodeString(Random());
+                    } while ("".equals(randomValue));
+                    groups.Add(new BytesRef(randomValue));
+                }
+                string[] contentStrings = new string[TestUtil.NextInt(Random(), 2, 20)];
+                if (VERBOSE)
+                {
+                    Console.WriteLine("TEST: create fake content");
+                }
+                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
+                {
+                    StringBuilder sb = new StringBuilder();
+                    sb.append("real").append(Random().nextInt(3)).append(' ');
+                    int fakeCount = Random().nextInt(10);
+                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
+                    {
+                        sb.append("fake ");
+                    }
+                    contentStrings[contentIDX] = sb.toString();
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("  content=" + sb.toString());
+                    }
+                }
+
+                Directory dir = NewDirectory();
+                RandomIndexWriter w = new RandomIndexWriter(
+                    Random(),
+                    dir,
+                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
+                        new MockAnalyzer(Random())));
+                bool preFlex = "Lucene3x".equals(w.w.Config.Codec.Name);
+                bool canUseIDV = !preFlex;
+                DocValuesType_e valueType = vts[Random().nextInt(vts.Length)];
+
+                Document doc = new Document();
+                Document docNoGroup = new Document();
+                Field group = NewStringField("group", "", Field.Store.NO);
+                doc.Add(group);
+                Field valuesField = null;
+                if (canUseIDV)
+                {
+                    switch (valueType)
+                    {
+                        case DocValuesType_e.BINARY:
+                            valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
+                            break;
+                        case DocValuesType_e.SORTED:
+                            valuesField = new SortedDocValuesField("group_dv", new BytesRef());
+                            break;
+                            //default:
+                            //    fail("unhandled type");
+                    }
+                    doc.Add(valuesField);
+                }
+                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
+                doc.Add(sort1);
+                docNoGroup.Add(sort1);
+                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
+                doc.Add(sort2);
+                docNoGroup.Add(sort2);
+                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
+                doc.Add(sort3);
+                docNoGroup.Add(sort3);
+                Field content = NewTextField("content", "", Field.Store.NO);
+                doc.Add(content);
+                docNoGroup.Add(content);
+                IntField id = new IntField("id", 0, Field.Store.NO);
+                doc.Add(id);
+                docNoGroup.Add(id);
+                GroupDoc[] groupDocs = new GroupDoc[numDocs];
+                for (int i = 0; i < numDocs; i++)
+                {
+                    BytesRef groupValue;
+                    if (Random().nextInt(24) == 17)
+                    {
+                        // So we test the "doc doesn't have the group'd
+                        // field" case:
+                        groupValue = null;
+                    }
+                    else
+                    {
+                        groupValue = groups[Random().nextInt(groups.size())];
+                    }
+
+                    GroupDoc groupDoc = new GroupDoc(
+                        i,
+                        groupValue,
+                        groups[Random().nextInt(groups.size())],
+                        groups[Random().nextInt(groups.size())],
+                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
+                        contentStrings[Random().nextInt(contentStrings.Length)]
+                    );
+
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
+                    }
+
+                    groupDocs[i] = groupDoc;
+                    if (groupDoc.group != null)
+                    {
+                        group.StringValue = (groupDoc.group.Utf8ToString());
+                        if (canUseIDV)
+                        {
+                            valuesField.BytesValue = (new BytesRef(groupDoc.group.Utf8ToString()));
+                        }
+                    }
+                    sort1.StringValue = (groupDoc.sort1.Utf8ToString());
+                    sort2.StringValue = (groupDoc.sort2.Utf8ToString());
+                    sort3.StringValue = (groupDoc.sort3.Utf8ToString());
+                    content.StringValue = (groupDoc.content);
+                    id.IntValue = (groupDoc.id);
+                    if (groupDoc.group == null)
+                    {
+                        w.AddDocument(docNoGroup);
+                    }
+                    else
+                    {
+                        w.AddDocument(doc);
+                    }
+                }
+
+                DirectoryReader r = w.Reader;
+                w.Dispose();
+
+                // NOTE: intentional but temporary field cache insanity!
+                FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.GetInts(SlowCompositeReaderWrapper.Wrap(r), "id", false);
+                int[] fieldIdToDocID = new int[numDocs];
+                for (int i = 0; i < numDocs; i++)
+                {
+                    int fieldId = docIdToFieldId.Get(i);
+                    fieldIdToDocID[fieldId] = i;
+                }
+
+                try
+                {
+                    IndexSearcher s = NewSearcher(r);
+                    if (typeof(SlowCompositeReaderWrapper).IsAssignableFrom(s.IndexReader.GetType()))
+                    {
+                        canUseIDV = false;
+                    }
+                    else
+                    {
+                        canUseIDV = !preFlex;
+                    }
+
+                    for (int contentID = 0; contentID < 3; contentID++)
+                    {
+                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
+                        foreach (ScoreDoc hit in hits)
+                        {
+                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
+                            assertTrue(gd.score == 0.0);
+                            gd.score = hit.Score;
+                            int docId = gd.id;
+                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
+                        }
+                    }
+
+                    foreach (GroupDoc gd in groupDocs)
+                    {
+                        assertTrue(gd.score != 0.0);
+                    }
+
+                    for (int searchIter = 0; searchIter < 100; searchIter++)
+                    {
+
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("TEST: searchIter=" + searchIter);
+                        }
+
+                        string searchTerm = "real" + Random().nextInt(3);
+                        bool sortByScoreOnly = Random().nextBoolean();
+                        Sort sortWithinGroup = GetRandomSort(sortByScoreOnly);
+                        var allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
+                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
+                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
+                        int[] actualGroupHeads = allGroupHeadsCollector.RetrieveGroupHeads();
+                        // The actual group heads contains Lucene ids. Need to change them into our id value.
+                        for (int i = 0; i < actualGroupHeads.Length; i++)
+                        {
+                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
+                        }
+                        // Allows us the easily iterate and assert the actual and expected results.
+                        Array.Sort(expectedGroupHeads);
+                        Array.Sort(actualGroupHeads);
+
+                        if (VERBOSE)
+                        {
+                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
+                            Console.WriteLine("Sort within group: " + sortWithinGroup);
+                            Console.WriteLine("Num group: " + numGroups);
+                            Console.WriteLine("Num doc: " + numDocs);
+                            Console.WriteLine("\n=== Expected: \n");
+                            foreach (int expectedDocId in expectedGroupHeads)
+                            {
+                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
+                                string expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
+                                Console.WriteLine(
+                                    string.Format(CultureInfo.InvariantCulture,
+                                    "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
+                                    expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
+                                    expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
+                                );
+                            }
+                            Console.WriteLine("\n=== Actual: \n");
+                            foreach (int actualDocId in actualGroupHeads)
+                            {
+                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
+                                string actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
+                                Console.WriteLine(
+                                    string.Format(CultureInfo.InvariantCulture,
+                                    "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
+                                    actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
+                                    actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
+                                );
+                            }
+                            Console.WriteLine("\n===================================================================================");
+                        }
+
+                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
+                    }
+                }
+                finally
+                {
+                    QueryUtils.PurgeFieldCache(r);
+                }
+
+                r.Dispose();
+                dir.Dispose();
+            }
+        }
+
+
+        private bool ArrayContains(int[] expected, int[] actual)
+        {
+            Array.Sort(actual); // in some cases the actual docs aren't sorted by docid. This method expects that.
+            if (expected.Length != actual.Length)
+            {
+                return false;
+            }
+
+            foreach (int e in expected)
+            {
+                bool found = false;
+                foreach (int a in actual)
+                {
+                    if (e == a)
+                    {
+                        found = true;
+                        break;
+                    }
+                }
+
+                if (!found)
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        private bool OpenBitSetContains(int[] expectedDocs, FixedBitSet actual, int maxDoc)
+        {
+            if (expectedDocs.Length != actual.Cardinality())
+            {
+                return false;
+            }
+
+            FixedBitSet expected = new FixedBitSet(maxDoc);
+            foreach (int expectedDoc in expectedDocs)
+            {
+                expected.Set(expectedDoc);
+            }
+
+            int docId;
+            DocIdSetIterator iterator = expected.GetIterator();
+            while ((docId = iterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+            {
+                if (!actual.Get(docId))
+                {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        private int[] CreateExpectedGroupHeads(string searchTerm, GroupDoc[] groupDocs, Sort docSort, bool sortByScoreOnly, int[] fieldIdToDocID)
+        {
+            IDictionary<BytesRef, List<GroupDoc>> groupHeads = new HashMap<BytesRef, List<GroupDoc>>();
+            foreach (GroupDoc groupDoc in groupDocs)
+            {
+                if (!groupDoc.content.StartsWith(searchTerm))
+                {
+                    continue;
+                }
+
+                if (!groupHeads.ContainsKey(groupDoc.group))
+                {
+                    List<GroupDoc> list = new List<GroupDoc>();
+                    list.Add(groupDoc);
+                    groupHeads[groupDoc.group] = list;
+                    continue;
+                }
+                groupHeads[groupDoc.group].Add(groupDoc);
+            }
+
+            int[] allGroupHeads = new int[groupHeads.Count];
+            int i = 0;
+            foreach (BytesRef groupValue in groupHeads.Keys)
+            {
+                List<GroupDoc> docs = groupHeads[groupValue];
+                docs.Sort(GetComparator(docSort, sortByScoreOnly, fieldIdToDocID));
+                //Collections.Sort(docs, getComparator(docSort, sortByScoreOnly, fieldIdToDocID));
+                allGroupHeads[i++] = docs[0].id;
+            }
+
+            return allGroupHeads;
+        }
+
+        private Sort GetRandomSort(bool scoreOnly)
+        {
+            List<SortField> sortFields = new List<SortField>();
+            if (Random().nextInt(7) == 2 || scoreOnly)
+            {
+                sortFields.Add(SortField.FIELD_SCORE);
+            }
+            else
+            {
+                if (Random().nextBoolean())
+                {
+                    if (Random().nextBoolean())
+                    {
+                        sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean()));
+                    }
+                    else
+                    {
+                        sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean()));
+                    }
+                }
+                else if (Random().nextBoolean())
+                {
+                    sortFields.Add(new SortField("sort1", SortField.Type_e.STRING, Random().nextBoolean()));
+                    sortFields.Add(new SortField("sort2", SortField.Type_e.STRING, Random().nextBoolean()));
+                }
+            }
+            // Break ties:
+            if (Random().nextBoolean() && !scoreOnly)
+            {
+                sortFields.Add(new SortField("sort3", SortField.Type_e.STRING));
+            }
+            else if (!scoreOnly)
+            {
+                sortFields.Add(new SortField("id", SortField.Type_e.INT));
+            }
+            return new Sort(sortFields.ToArray(/*new SortField[sortFields.size()]*/));
+        }
+
+        internal class ComparatorAnonymousHelper : IComparer<GroupDoc>
+        {
+            private readonly AllGroupHeadsCollectorTest outerInstance;
+            private readonly SortField[] sortFields;
+            private readonly bool sortByScoreOnly;
+            private readonly int[] fieldIdToDocID;
+
+            public ComparatorAnonymousHelper(AllGroupHeadsCollectorTest outerInstance, SortField[] sortFields, bool sortByScoreOnly, int[] fieldIdToDocID)
+            {
+                this.outerInstance = outerInstance;
+                this.sortFields = sortFields;
+                this.sortByScoreOnly = sortByScoreOnly;
+                this.fieldIdToDocID = fieldIdToDocID;
+            }
+
+            public int Compare(GroupDoc d1, GroupDoc d2)
+            {
+                foreach (SortField sf in sortFields)
+                {
+                    int cmp;
+                    if (sf.Type == SortField.Type_e.SCORE)
+                    {
+                        if (d1.score > d2.score)
+                        {
+                            cmp = -1;
+                        }
+                        else if (d1.score < d2.score)
+                        {
+                            cmp = 1;
+                        }
+                        else
+                        {
+                            cmp = sortByScoreOnly ? fieldIdToDocID[d1.id] - fieldIdToDocID[d2.id] : 0;
+                        }
+                    }
+                    else if (sf.Field.equals("sort1"))
+                    {
+                        cmp = d1.sort1.CompareTo(d2.sort1);
+                    }
+                    else if (sf.Field.equals("sort2"))
+                    {
+                        cmp = d1.sort2.CompareTo(d2.sort2);
+                    }
+                    else if (sf.Field.equals("sort3"))
+                    {
+                        cmp = d1.sort3.CompareTo(d2.sort3);
+                    }
+                    else
+                    {
+                        assertEquals(sf.Field, "id");
+                        cmp = d1.id - d2.id;
+                    }
+                    if (cmp != 0)
+                    {
+                        return sf.Reverse ? -cmp : cmp;
+                    }
+                }
+                // Our sort always fully tie breaks:
+                fail();
+                return 0;
+            }
+        }
+
+        private IComparer<GroupDoc> GetComparator(Sort sort, bool sortByScoreOnly, int[] fieldIdToDocID)
+        {
+            SortField[] sortFields = sort.GetSort();
+            return new ComparatorAnonymousHelper(this, sortFields, sortByScoreOnly, fieldIdToDocID);
+            //    return new Comparator<GroupDoc>() {
+            //      @Override
+            //      public int compare(GroupDoc d1, GroupDoc d2)
+            //{
+            //    for (SortField sf : sortFields)
+            //    {
+            //        final int cmp;
+            //        if (sf.getType() == SortField.Type.SCORE)
+            //        {
+            //            if (d1.score > d2.score)
+            //            {
+            //                cmp = -1;
+            //            }
+            //            else if (d1.score < d2.score)
+            //            {
+            //                cmp = 1;
+            //            }
+            //            else
+            //            {
+            //                cmp = sortByScoreOnly ? fieldIdToDocID[d1.id] - fieldIdToDocID[d2.id] : 0;
+            //            }
+            //        }
+            //        else if (sf.getField().equals("sort1"))
+            //        {
+            //            cmp = d1.sort1.compareTo(d2.sort1);
+            //        }
+            //        else if (sf.getField().equals("sort2"))
+            //        {
+            //            cmp = d1.sort2.compareTo(d2.sort2);
+            //        }
+            //        else if (sf.getField().equals("sort3"))
+            //        {
+            //            cmp = d1.sort3.compareTo(d2.sort3);
+            //        }
+            //        else
+            //        {
+            //            assertEquals(sf.getField(), "id");
+            //            cmp = d1.id - d2.id;
+            //        }
+            //        if (cmp != 0)
+            //        {
+            //            return sf.getReverse() ? -cmp : cmp;
+            //        }
+            //    }
+            //    // Our sort always fully tie breaks:
+            //    fail();
+            //    return 0;
+            //}
+            //    };
+        }
+
+        private AbstractAllGroupHeadsCollector CreateRandomCollector(string groupField, Sort sortWithinGroup, bool canUseIDV, DocValuesType_e valueType)
+        {
+            AbstractAllGroupHeadsCollector collector;
+            if (Random().nextBoolean())
+            {
+                ValueSource vs = new BytesRefFieldSource(groupField);
+                collector = new FunctionAllGroupHeadsCollector(vs, new Hashtable(), sortWithinGroup);
+            }
+            else
+            {
+                collector = TermAllGroupHeadsCollector.Create(groupField, sortWithinGroup);
+            }
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("Selected implementation: " + collector.GetType().Name);
+            }
+
+            return collector;
+        }
+
+        private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV, DocValuesType_e valueType)
+        {
+            doc.Add(new TextField(groupField, value, Field.Store.YES));
+            if (canUseIDV)
+            {
+                Field valuesField = null;
+                switch (valueType)
+                {
+                    case DocValuesType_e.BINARY:
+                        valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
+                        break;
+                    case DocValuesType_e.SORTED:
+                        valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
+                        break;
+                        //default:
+                        //    fail("unhandled type");
+                }
+                doc.Add(valuesField);
+            }
+        }
+
+        internal class GroupDoc
+        {
+            internal readonly int id;
+            internal readonly BytesRef group;
+            internal readonly BytesRef sort1;
+            internal readonly BytesRef sort2;
+            internal readonly BytesRef sort3;
+            // content must be "realN ..."
+            internal readonly string content;
+            internal float score;
+
+            public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, BytesRef sort3, String content)
+            {
+                this.id = id;
+                this.group = group;
+                this.sort1 = sort1;
+                this.sort2 = sort2;
+                this.sort3 = sort3;
+                this.content = content;
+            }
+
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
new file mode 100644
index 0000000..c76a4da
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AllGroupsCollectorTest.cs
@@ -0,0 +1,138 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.Collections;
+using Lucene.Net.Search.Grouping.Terms;
+
+namespace Lucene.Net.Search.Grouping
+{
+    public class AllGroupsCollectorTest : LuceneTestCase
+    {
+        [Test]
+        public void TestTotalGroupCount()
+        {
+
+            string groupField = "author";
+            FieldType customType = new FieldType();
+            customType.Stored = true;
+
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(
+                Random(),
+                dir,
+                NewIndexWriterConfig(TEST_VERSION_CURRENT,
+                    new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
+            bool canUseIDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+
+            // 0
+            Document doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV);
+            doc.Add(new TextField("content", "random text", Field.Store.YES));
+            doc.Add(new Field("id", "1", customType));
+            w.AddDocument(doc);
+
+            // 1
+            doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV);
+            doc.Add(new TextField("content", "some more random text blob", Field.Store.YES));
+            doc.Add(new Field("id", "2", customType));
+            w.AddDocument(doc);
+
+            // 2
+            doc = new Document();
+            AddGroupField(doc, groupField, "author1", canUseIDV);
+            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
+            doc.Add(new Field("id", "3", customType));
+            w.AddDocument(doc);
+            w.Commit(); // To ensure a second segment
+
+            // 3
+            doc = new Document();
+            AddGroupField(doc, groupField, "author2", canUseIDV);
+            doc.Add(new TextField("content", "some random text", Field.Store.YES));
+            doc.Add(new Field("id", "4", customType));
+            w.AddDocument(doc);
+
+            // 4
+            doc = new Document();
+            AddGroupField(doc, groupField, "author3", canUseIDV);
+            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
+            doc.Add(new Field("id", "5", customType));
+            w.AddDocument(doc);
+
+            // 5
+            doc = new Document();
+            AddGroupField(doc, groupField, "author3", canUseIDV);
+            doc.Add(new TextField("content", "random blob", Field.Store.YES));
+            doc.Add(new Field("id", "6", customType));
+            w.AddDocument(doc);
+
+            // 6 -- no author field
+            doc = new Document();
+            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
+            doc.Add(new Field("id", "6", customType));
+            w.AddDocument(doc);
+
+            IndexSearcher indexSearcher = NewSearcher(w.Reader);
+            w.Dispose();
+
+            AbstractAllGroupsCollector allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), allGroupsCollector);
+            assertEquals(4, allGroupsCollector.GroupCount);
+
+            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), allGroupsCollector);
+            assertEquals(3, allGroupsCollector.GroupCount);
+
+            allGroupsCollector = CreateRandomCollector(groupField, canUseIDV);
+            indexSearcher.Search(new TermQuery(new Term("content", "blob")), allGroupsCollector);
+            assertEquals(2, allGroupsCollector.GroupCount);
+
+            indexSearcher.IndexReader.Dispose();
+            dir.Dispose();
+        }
+
+        private void AddGroupField(Document doc, string groupField, string value, bool canUseIDV)
+        {
+            doc.Add(new TextField(groupField, value, Field.Store.YES));
+            if (canUseIDV)
+            {
+                doc.Add(new SortedDocValuesField(groupField, new BytesRef(value)));
+            }
+        }
+
+        private AbstractAllGroupsCollector CreateRandomCollector(string groupField, bool canUseIDV)
+        {
+            AbstractAllGroupsCollector selected;
+            if (Random().nextBoolean())
+            {
+                selected = new TermAllGroupsCollector(groupField);
+            }
+            else
+            {
+                ValueSource vs = new BytesRefFieldSource(groupField);
+                selected = new FunctionAllGroupsCollector(vs, new Hashtable());
+            }
+
+            if (VERBOSE)
+            {
+                Console.WriteLine("Selected implementation: " + selected.GetType().Name);
+            }
+
+            return selected;
+        }
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
new file mode 100644
index 0000000..854050b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/DistinctValuesCollectorTest.cs
@@ -0,0 +1,648 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Search.Grouping.Function;
+using Lucene.Net.Search.Grouping.Term;
+using Lucene.Net.Index;
+using Lucene.Net.Queries.Function.ValueSources;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Mutable;
+using NUnit.Framework;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Lucene.Net.Search.Grouping;
+using Lucene.Net.Search.Grouping.Terms;
+
+namespace Lucene.Net.Search.Grouping
+{
+    public class DistinctValuesCollectorTest : AbstractGroupingTestCase
+    {
+        private readonly static NullComparator nullComparator = new NullComparator();
+
+        private readonly string groupField = "author";
+        private readonly string dvGroupField = "author_dv";
+        private readonly string countField = "publisher";
+        private readonly string dvCountField = "publisher_dv";
+
+        internal class ComparerAnonymousHelper1 : IComparer<AbstractGroupCount<IComparable<object>>>
+        {
+            private readonly DistinctValuesCollectorTest outerInstance;
+
+            public ComparerAnonymousHelper1(DistinctValuesCollectorTest outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public int Compare(AbstractGroupCount<IComparable<object>> groupCount1, AbstractGroupCount<IComparable<object>> groupCount2)
+            {
+                if (groupCount1.groupValue == null)
+                {
+                    if (groupCount2.groupValue == null)
+                    {
+                        return 0;
+                    }
+                    return -1;
+                }
+                else if (groupCount2.groupValue == null)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return groupCount1.groupValue.CompareTo(groupCount2.groupValue);
+                }
+            }
+        }
+
+        [Test]
+        public void TestSimple()
+        {
+            Random random = Random();
+            FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
+                FieldInfo.DocValuesType_e.NUMERIC,
+                FieldInfo.DocValuesType_e.BINARY,
+                FieldInfo.DocValuesType_e.SORTED,
+            };
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(
+                random,
+                dir,
+                NewIndexWriterConfig(TEST_VERSION_CURRENT,
+                    new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy()));
+            bool canUseDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+            FieldInfo.DocValuesType_e? dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;
+
+            Document doc = new Document();
+            addField(doc, groupField, "1", dvType);
+            addField(doc, countField, "1", dvType);
+            doc.Add(new TextField("content", "random text", Field.Store.NO));
+            doc.Add(new StringField("id", "1", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 1
+            doc = new Document();
+            addField(doc, groupField, "1", dvType);
+            addField(doc, countField, "1", dvType);
+            doc.Add(new TextField("content", "some more random text blob", Field.Store.NO));
+            doc.Add(new StringField("id", "2", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 2
+            doc = new Document();
+            addField(doc, groupField, "1", dvType);
+            addField(doc, countField, "2", dvType);
+            doc.Add(new TextField("content", "some more random textual data", Field.Store.NO));
+            doc.Add(new StringField("id", "3", Field.Store.NO));
+            w.AddDocument(doc);
+            w.Commit(); // To ensure a second segment
+
+            // 3
+            doc = new Document();
+            addField(doc, groupField, "2", dvType);
+            doc.Add(new TextField("content", "some random text", Field.Store.NO));
+            doc.Add(new StringField("id", "4", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 4
+            doc = new Document();
+            addField(doc, groupField, "3", dvType);
+            addField(doc, countField, "1", dvType);
+            doc.Add(new TextField("content", "some more random text", Field.Store.NO));
+            doc.Add(new StringField("id", "5", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 5
+            doc = new Document();
+            addField(doc, groupField, "3", dvType);
+            addField(doc, countField, "1", dvType);
+            doc.Add(new TextField("content", "random blob", Field.Store.NO));
+            doc.Add(new StringField("id", "6", Field.Store.NO));
+            w.AddDocument(doc);
+
+            // 6 -- no author field
+            doc = new Document();
+            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
+            addField(doc, countField, "1", dvType);
+            doc.Add(new StringField("id", "6", Field.Store.NO));
+            w.AddDocument(doc);
+
+            IndexSearcher indexSearcher = NewSearcher(w.Reader);
+            w.Dispose();
+
+            var cmp = new ComparerAnonymousHelper1(this);
+
+            //    Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>> cmp = new Comparator<AbstractDistinctValuesCollector.GroupCount<Comparable<Object>>>() {
+
+            //      @Override
+            //      public int compare(AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount1, AbstractDistinctValuesCollector.GroupCount<Comparable<Object>> groupCount2)
+            //    {
+            //        if (groupCount1.groupValue == null)
+            //        {
+            //            if (groupCount2.groupValue == null)
+            //            {
+            //                return 0;
+            //            }
+            //            return -1;
+            //        }
+            //        else if (groupCount2.groupValue == null)
+            //        {
+            //            return 1;
+            //        }
+            //        else
+            //        {
+            //            return groupCount1.groupValue.compareTo(groupCount2.groupValue);
+            //        }
+            //    }
+
+            //};
+
+            // === Search for content:random
+            AbstractFirstPassGroupingCollector<IComparable<object>> firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), firstCollector);
+            Collector distinctValuesCollector
+                = createDistinctCountCollector(firstCollector, groupField, countField, dvType.GetValueOrDefault());
+            indexSearcher.Search(new TermQuery(new Term("content", "random")), distinctValuesCollector);
+
+            var gcs = distinctValuesCollector.GetGroups();
+            //Collections.sort(gcs, cmp);
+            gcs.Sort(cmp);
+            assertEquals(4, gcs.Count);
+
+            compareNull(gcs[0].groupValue);
+            List<IComparable> countValues = new List<IComparable>(gcs[0].uniqueValues);
+            assertEquals(1, countValues.size());
+            compare("1", countValues[0]);
+
+            compare("1", gcs[1].groupValue);
+            countValues = new List<IComparable>(gcs[1].uniqueValues);
+            //Collections.sort(countValues, nullComparator);
+            countValues.Sort(nullComparator);
+            assertEquals(2, countValues.size());
+            compare("1", countValues[0]);
+            compare("2", countValues[1]);
+
+            compare("2", gcs[2].groupValue);
+            countValues = new List<IComparable>(gcs[2].uniqueValues);
+            assertEquals(1, countValues.size());
+            compareNull(countValues[0]);
+
+            compare("3", gcs[3].groupValue);
+            countValues = new List<IComparable>(gcs[3].uniqueValues);
+            assertEquals(1, countValues.size());
+            compare("1", countValues[0]);
+
+            // === Search for content:some
+            firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), firstCollector);
+            distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+            indexSearcher.Search(new TermQuery(new Term("content", "some")), distinctValuesCollector);
+
+            gcs = distinctValuesCollector.getGroups();
+            //Collections.sort(gcs, cmp);
+            gcs.Sort(cmp);
+            assertEquals(3, gcs.Count);
+
+            compare("1", gcs.get(0).groupValue);
+            countValues = new List<IComparable>(gcs[0].uniqueValues);
+            assertEquals(2, countValues.size());
+            //Collections.sort(countValues, nullComparator);
+            countValues.Sort(nullComparator);
+            compare("1", countValues[0]);
+            compare("2", countValues[1]);
+
+            compare("2", gcs[1].groupValue);
+            countValues = new List<IComparable>(gcs[1].uniqueValues);
+            assertEquals(1, countValues.size());
+            compareNull(countValues[0]);
+
+            compare("3", gcs.get(2).groupValue);
+            countValues = new List<IComparable>(gcs.get(2).uniqueValues);
+            assertEquals(1, countValues.size());
+            compare("1", countValues[0]);
+
+            // === Search for content:blob
+            firstCollector = createRandomFirstPassCollector(dvType, new Sort(), groupField, 10);
+            indexSearcher.search(new TermQuery(new Term("content", "blob")), firstCollector);
+            distinctValuesCollector = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+            indexSearcher.search(new TermQuery(new Term("content", "blob")), distinctValuesCollector);
+
+            gcs = distinctValuesCollector.getGroups();
+            //Collections.sort(gcs, cmp);
+            gcs.Sort(cmp);
+            assertEquals(2, gcs.Count);
+
+            compare("1", gcs[0].groupValue);
+            countValues = new List<IComparable>(gcs[0].uniqueValues);
+            // B/c the only one document matched with blob inside the author 1 group
+            assertEquals(1, countValues.Count);
+            compare("1", countValues[0]);
+
+            compare("3", gcs[1].groupValue);
+            countValues = new List<IComparable>(gcs[1].uniqueValues);
+            assertEquals(1, countValues.Count);
+            compare("1", countValues[0]);
+
+            indexSearcher.IndexReader.Dispose();
+            dir.Dispose();
+        }
+
+        [Test]
+        public void testRandom()
+        {
+            Random random = Random();
+            int numberOfRuns = TestUtil.NextInt(random, 3, 6);
+            for (int indexIter = 0; indexIter < numberOfRuns; indexIter++)
+            {
+                IndexContext context = createIndexContext();
+                for (int searchIter = 0; searchIter < 100; searchIter++)
+                {
+                    IndexSearcher searcher = NewSearcher(context.indexReader);
+                    bool useDv = context.dvType != null && random.nextBoolean();
+                    FieldInfo.DocValuesType_e? dvType = useDv ? context.dvType : (FieldInfo.DocValuesType_e?)null;
+                    string term = context.contentStrings[random.nextInt(context.contentStrings.Length)];
+                    Sort groupSort = new Sort(new SortField("id", SortField.Type_e.STRING));
+                    int topN = 1 + random.nextInt(10);
+
+                    List<AbstractGroupCount<IComparable>> expectedResult = createExpectedResult(context, term, groupSort, topN);
+
+                    AbstractFirstPassGroupingCollector < Comparable <?>> firstCollector = createRandomFirstPassCollector(dvType, groupSort, groupField, topN);
+                    searcher.Search(new TermQuery(new Term("content", term)), firstCollector);
+                    AbstractDistinctValuesCollector <? extends AbstractDistinctValuesCollector.GroupCount < Comparable <?>>> distinctValuesCollector
+                        = createDistinctCountCollector(firstCollector, groupField, countField, dvType);
+                    searcher.Search(new TermQuery(new Term("content", term)), distinctValuesCollector);
+
+                    List<AbstractGroupCount<IComparable>> actualResult = (List<AbstractGroupCount<IComparable>>)distinctValuesCollector.Groups;
+
+                    if (VERBOSE)
+                    {
+                        Console.WriteLine("Index iter=" + indexIter);
+                        Console.WriteLine("Search iter=" + searchIter);
+                        Console.WriteLine("1st pass collector class name=" + firstCollector.GetType().Name);
+                        Console.WriteLine("2nd pass collector class name=" + distinctValuesCollector.GetType().Name);
+                        Console.WriteLine("Search term=" + term);
+                        Console.WriteLine("DVType=" + dvType);
+                        Console.WriteLine("1st pass groups=" + firstCollector.GetTopGroups(0, false));
+                        Console.WriteLine("Expected:");
+                        printGroups(expectedResult);
+                        Console.WriteLine("Actual:");
+                        printGroups(actualResult);
+                    }
+
+                    assertEquals(expectedResult.Count, actualResult.Count);
+                    for (int i = 0; i < expectedResult.size(); i++)
+                    {
+                        AbstractDistinctValuesCollector.GroupCount < Comparable <?>> expected = expectedResult.get(i);
+                        AbstractDistinctValuesCollector.GroupCount < Comparable <?>> actual = actualResult.get(i);
+                        assertValues(expected.groupValue, actual.groupValue);
+                        assertEquals(expected.uniqueValues.size(), actual.uniqueValues.size());
+                        List < Comparable <?>> expectedUniqueValues = new ArrayList<>(expected.uniqueValues);
+                        Collections.sort(expectedUniqueValues, nullComparator);
+                        List < Comparable <?>> actualUniqueValues = new ArrayList<>(actual.uniqueValues);
+                        Collections.sort(actualUniqueValues, nullComparator);
+                        for (int j = 0; j < expectedUniqueValues.size(); j++)
+                        {
+                            assertValues(expectedUniqueValues.get(j), actualUniqueValues.get(j));
+                        }
+                    }
+                }
+                context.indexReader.Dispose();
+                context.directory.Dispose();
+            }
+        }
+
+        private void printGroups(List<AbstractDistinctValuesCollector.GroupCount<IComparable>> results)
+        {
+            for (int i = 0; i < results.size(); i++)
+            {
+                var group = results[i];
+                object gv = group.groupValue;
+                if (gv is BytesRef)
+                {
+                    Console.WriteLine(i + ": groupValue=" + ((BytesRef)gv).Utf8ToString());
+                }
+                else
+                {
+                    Console.WriteLine(i + ": groupValue=" + gv);
+                }
+                foreach (object o in group.uniqueValues)
+                {
+                    if (o is BytesRef)
+                    {
+                        Console.WriteLine("  " + ((BytesRef)o).Utf8ToString());
+                    }
+                    else
+                    {
+                        Console.WriteLine("  " + o);
+                    }
+                }
+            }
+        }
+
+        private void assertValues(object expected, object actual)
+        {
+            if (expected == null)
+            {
+                compareNull(actual);
+            }
+            else
+            {
+                compare(((BytesRef)expected).Utf8ToString(), actual);
+            }
+        }
+
+        private void compare(string expected, object groupValue)
+        {
+            if (typeof(BytesRef).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals(expected, ((BytesRef)groupValue).Utf8ToString());
+            }
+            else if (typeof(double).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals(double.Parse(expected, CultureInfo.InvariantCulture), groupValue);
+            }
+            else if (typeof(long).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals(long.Parse(expected, CultureInfo.InvariantCulture), groupValue);
+            }
+            else if (typeof(MutableValue).IsAssignableFrom(groupValue.GetType()))
+            {
+                MutableValueStr mutableValue = new MutableValueStr();
+                mutableValue.Value = new BytesRef(expected);
+                assertEquals(mutableValue, groupValue);
+            }
+            else
+            {
+                fail();
+            }
+        }
+
+        private void compareNull(object groupValue)
+        {
+            if (groupValue == null)
+            {
+                return; // term based impl...
+            }
+            // DV based impls..
+            if (typeof(BytesRef).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals("", ((BytesRef)groupValue).Utf8ToString());
+            }
+            else if (typeof(double).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals(0.0d, groupValue);
+            }
+            else if (typeof(long).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertEquals(0L, groupValue);
+                // Function based impl
+            }
+            else if (typeof(MutableValue).IsAssignableFrom(groupValue.GetType()))
+            {
+                assertFalse(((MutableValue)groupValue).Exists);
+            }
+            else
+            {
+                fail();
+            }
+        }
+
+        private void addField(Document doc, string field, string value, FieldInfo.DocValuesType_e? type)
+        {
+            doc.Add(new StringField(field, value, Field.Store.YES));
+            if (type == null)
+            {
+                return;
+            }
+            string dvField = field + "_dv";
+
+            Field valuesField = null;
+            switch (type)
+            {
+                case FieldInfo.DocValuesType_e.NUMERIC:
+                    valuesField = new NumericDocValuesField(dvField, int.Parse(value, CultureInfo.InvariantCulture));
+                    break;
+                case FieldInfo.DocValuesType_e.BINARY:
+                    valuesField = new BinaryDocValuesField(dvField, new BytesRef(value));
+                    break;
+                case FieldInfo.DocValuesType_e.SORTED:
+                    valuesField = new SortedDocValuesField(dvField, new BytesRef(value));
+                    break;
+            }
+            doc.Add(valuesField);
+        }
+
+        private AbstractDistinctValuesCollector<AbstractGroupCount<T>> createDistinctCountCollector<T>(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
+                                                                            string groupField,
+                                                                            string countField,
+                                                                            FieldInfo.DocValuesType_e dvType)
+                  where T : IComparable
+        {
+            Random random = Random();
+            ICollection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.GetTopGroups(0, false);
+            if (typeof(FunctionFirstPassGroupingCollector).IsAssignableFrom(firstPassGroupingCollector.GetType()))
+            {
+                return (AbstractDistinctValuesCollector)new FunctionDistinctValuesCollector(new Hashtable(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), searchGroups as ICollection<SearchGroup<MutableValue>>);
+            }
+            else
+            {
+                return (AbstractDistinctValuesCollector)new TermDistinctValuesCollector(groupField, countField, searchGroups as ICollection<SearchGroup<BytesRef>>);
+            }
+        }
+
+        private AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector<T>(FieldInfo.DocValuesType_e dvType, Sort groupSort, string groupField, int topNGroups)
+        {
+            Random random = Random();
+            if (dvType != null)
+            {
+                if (random.nextBoolean())
+                {
+                    return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+                }
+                else
+                {
+                    return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+                }
+            }
+            else
+            {
+                if (random.nextBoolean())
+                {
+                    return (AbstractFirstPassGroupingCollector<T>)new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new Hashtable(), groupSort, topNGroups);
+                }
+                else
+                {
+                    return (AbstractFirstPassGroupingCollector<T>)new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
+                }
+            }
+        }
+
+        internal class GroupCount : AbstractGroupCount<BytesRef>
+        {
+            internal GroupCount(BytesRef groupValue, ICollection<BytesRef> uniqueValues)
+                : base(groupValue)
+            {
+                this.uniqueValues.UnionWith(uniqueValues);
+            }
+        }
+
+        private List<AbstractGroupCount<IComparable>> createExpectedResult(IndexContext context, string term, Sort groupSort, int topN)
+        {
+
+
+            List<AbstractGroupCount<IComparable>> result = new List<AbstractGroupCount<IComparable>>();
+            IDictionary<string, ISet<string>> groupCounts = context.searchTermToGroupCounts[term];
+            int i = 0;
+            foreach (string group in groupCounts.Keys)
+            {
+                if (topN <= i++)
+                {
+                    break;
+                }
+                ISet<BytesRef> uniqueValues = new HashSet<BytesRef>();
+                foreach (string val in groupCounts[group])
+                {
+                    uniqueValues.Add(val != null ? new BytesRef(val) : null);
+                }
+                result.Add(new GroupCount(group != null ? new BytesRef(group) : (BytesRef)null, uniqueValues));
+            }
+            return result;
+        }
+
+        private IndexContext createIndexContext()
+        {
+            Random random = Random();
+            FieldInfo.DocValuesType_e[] dvTypes = new FieldInfo.DocValuesType_e[]{
+        FieldInfo.DocValuesType_e.BINARY,
+        FieldInfo.DocValuesType_e.SORTED
+    };
+
+            Directory dir = NewDirectory();
+            RandomIndexWriter w = new RandomIndexWriter(
+                random,
+                dir,
+                NewIndexWriterConfig(TEST_VERSION_CURRENT,
+                new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())
+              );
+
+            bool canUseDV = !"Lucene3x".equals(w.w.Config.Codec.Name);
+            FieldInfo.DocValuesType_e? dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.Length)] : (FieldInfo.DocValuesType_e?)null;
+
+            int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
+            string[] groupValues = new string[numDocs / 5];
+            string[] countValues = new string[numDocs / 10];
+            for (int i = 0; i < groupValues.Length; i++)
+            {
+                groupValues[i] = GenerateRandomNonEmptyString();
+            }
+            for (int i = 0; i < countValues.Length; i++)
+            {
+                countValues[i] = GenerateRandomNonEmptyString();
+            }
+
+            List<string> contentStrings = new List<string>();
+            IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts = new Dictionary<string, IDictionary<string, ISet<string>>>();
+            for (int i = 1; i <= numDocs; i++)
+            {
+                string groupValue = random.nextInt(23) == 14 ? null : groupValues[random.nextInt(groupValues.Length)];
+                string countValue = random.nextInt(21) == 13 ? null : countValues[random.nextInt(countValues.Length)];
+                string content = "random" + random.nextInt(numDocs / 20);
+                //IDictionary<string, ISet<string>> groupToCounts = searchTermToGroupCounts[content];
+                //      if (groupToCounts == null)
+                IDictionary<string, ISet<string>> groupToCounts;
+                if (!searchTermToGroupCounts.TryGetValue(content, out groupToCounts))
+                {
+                    // Groups sort always DOCID asc...
+                    searchTermToGroupCounts[content] = groupToCounts = new LurchTable<string, ISet<string>>(16);
+                    contentStrings.Add(content);
+                }
+
+                //ISet<string> countsVals = groupToCounts.get(groupValue);
+                //if (countsVals == null)
+                ISet<string> countsVals;
+                if (!groupToCounts.TryGetValue(groupValue, out countsVals))
+                {
+                    groupToCounts[groupValue] = countsVals = new HashSet<string>();
+                }
+                countsVals.Add(countValue);
+
+                Document doc = new Document();
+                doc.Add(new StringField("id", string.Format(CultureInfo.InvariantCulture, "{0:D9}", i), Field.Store.YES));
+                if (groupValue != null)
+                {
+                    addField(doc, groupField, groupValue, dvType);
+                }
+                if (countValue != null)
+                {
+                    addField(doc, countField, countValue, dvType);
+                }
+                doc.Add(new TextField("content", content, Field.Store.YES));
+                w.AddDocument(doc);
+            }
+
+            DirectoryReader reader = w.Reader;
+            if (VERBOSE)
+            {
+                for (int docID = 0; docID < reader.MaxDoc; docID++)
+                {
+                    Document doc = reader.Document(docID);
+                    Console.WriteLine("docID=" + docID + " id=" + doc.Get("id") + " content=" + doc.Get("content") + " author=" + doc.Get("author") + " publisher=" + doc.Get("publisher"));
+                }
+            }
+
+            w.Dispose();
+            return new IndexContext(dir, reader, dvType.GetValueOrDefault(), searchTermToGroupCounts, contentStrings.ToArray(/*new String[contentStrings.size()]*/));
+        }
+
+        internal class IndexContext
+        {
+
+            internal readonly Directory directory;
+            internal readonly DirectoryReader indexReader;
+            internal readonly FieldInfo.DocValuesType_e dvType;
+            internal readonly IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts;
+            internal readonly string[] contentStrings;
+
+            internal IndexContext(Directory directory, DirectoryReader indexReader, FieldInfo.DocValuesType_e dvType,
+                         IDictionary<string, IDictionary<string, ISet<string>>> searchTermToGroupCounts, string[] contentStrings)
+            {
+                this.directory = directory;
+                this.indexReader = indexReader;
+                this.dvType = dvType;
+                this.searchTermToGroupCounts = searchTermToGroupCounts;
+                this.contentStrings = contentStrings;
+            }
+        }
+
+        internal class NullComparator : IComparer<IComparable>
+        {
+
+            public int Compare(IComparable a, IComparable b)
+            {
+                if (a == b)
+                {
+                    return 0;
+                }
+                else if (a == null)
+                {
+                    return -1;
+                }
+                else if (b == null)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return a.CompareTo(b);
+                }
+            }
+
+        }
+    }
+}


Mime
View raw message