lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [31/58] [abbrv] lucenenet git commit: WIP on Grouping
Date Thu, 10 Nov 2016 11:47:45 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs b/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
new file mode 100644
index 0000000..fbbec34
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermAllGroupHeadsCollector.cs
@@ -0,0 +1,807 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// A base implementation of <see cref="AbstractAllGroupHeadsCollector{GH}"/> for retrieving the most relevant groups when grouping
+    /// on a string based group field. More specifically this all concrete implementations of this base implementation
+    /// use <see cref="Index.SortedDocValues"/>.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    /// <typeparam name="GH"></typeparam>
+    public abstract class TermAllGroupHeadsCollector<GH> : AbstractAllGroupHeadsCollector<GH> where GH : AbstractGroupHead /*AbstractAllGroupHeadsCollector<GH>.GroupHead*/
+    {
+        internal readonly string groupField;
+        internal readonly BytesRef scratchBytesRef = new BytesRef();
+
+        internal SortedDocValues groupIndex;
+        internal AtomicReaderContext readerContext;
+
+        protected TermAllGroupHeadsCollector(string groupField, int numberOfSorts)
+            : base(numberOfSorts)
+        {
+            this.groupField = groupField;
+        }
+    }
+
+    public class TermAllGroupHeadsCollector
+    {
+        private static readonly int DEFAULT_INITIAL_SIZE = 128;
+
+        // Disallow creation
+        private TermAllGroupHeadsCollector() { }
+
+        /**
+         * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
+         * This factory method decides with implementation is best suited.
+         *
+         * Delegates to {@link #create(String, org.apache.lucene.search.Sort, int)} with an initialSize of 128.
+         *
+         * @param groupField      The field to group by
+         * @param sortWithinGroup The sort within each group
+         * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
+         */
+        public static AbstractAllGroupHeadsCollector Create(string groupField, Sort sortWithinGroup)
+        {
+            return Create(groupField, sortWithinGroup, DEFAULT_INITIAL_SIZE);
+        }
+
+        /**
+         * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
+         * This factory method decides with implementation is best suited.
+         *
+         * @param groupField      The field to group by
+         * @param sortWithinGroup The sort within each group
+         * @param initialSize The initial allocation size of the internal int set and group list which should roughly match
+         *                    the total number of expected unique groups. Be aware that the heap usage is
+         *                    4 bytes * initialSize.
+         * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
+         */
+        public static AbstractAllGroupHeadsCollector Create(string groupField, Sort sortWithinGroup, int initialSize)
+        {
+            bool sortAllScore = true;
+            bool sortAllFieldValue = true;
+
+            foreach (SortField sortField in sortWithinGroup.GetSort())
+            {
+                if (sortField.Type == SortField.Type_e.SCORE)
+                {
+                    sortAllFieldValue = false;
+                }
+                else if (NeedGeneralImpl(sortField))
+                {
+                    return new GeneralAllGroupHeadsCollector(groupField, sortWithinGroup);
+                }
+                else
+                {
+                    sortAllScore = false;
+                }
+            }
+
+            if (sortAllScore)
+            {
+                return new ScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+            }
+            else if (sortAllFieldValue)
+            {
+                return new OrdAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+            }
+            else
+            {
+                return new OrdScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
+            }
+        }
+
+        // Returns when a sort field needs the general impl.
+        private static bool NeedGeneralImpl(SortField sortField)
+        {
+            SortField.Type_e sortType = sortField.Type;
+            // Note (MvG): We can also make an optimized impl when sorting is SortField.DOC
+            return sortType != SortField.Type_e.STRING_VAL && sortType != SortField.Type_e.STRING && sortType != SortField.Type_e.SCORE;
+        }
+    }
+
+    // A general impl that works for any group sort.
+    internal class GeneralAllGroupHeadsCollector : TermAllGroupHeadsCollector<GeneralAllGroupHeadsCollector.GroupHead>
+        {
+
+            private readonly Sort sortWithinGroup;
+            private readonly IDictionary<BytesRef, GroupHead> groups;
+
+            internal Scorer scorer;
+
+            internal GeneralAllGroupHeadsCollector(string groupField, Sort sortWithinGroup)
+                : base(groupField, sortWithinGroup.GetSort().Length)
+            {
+                this.sortWithinGroup = sortWithinGroup;
+                groups = new HashMap<BytesRef, GroupHead>();
+
+                SortField[] sortFields = sortWithinGroup.GetSort();
+                for (int i = 0; i < sortFields.Length; i++)
+                {
+                    reversed[i] = sortFields[i].Reverse ? -1 : 1;
+                }
+            }
+
+            protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+            {
+                int ord = groupIndex.GetOrd(doc);
+                BytesRef groupValue;
+                if (ord == -1)
+                {
+                    groupValue = null;
+                }
+                else
+                {
+                    groupIndex.LookupOrd(ord, scratchBytesRef);
+                    groupValue = scratchBytesRef;
+                }
+                GroupHead groupHead;
+                if (!groups.TryGetValue(groupValue, out groupHead))
+                {
+                    groupHead = new GroupHead(this, groupValue, sortWithinGroup, doc);
+                    groups[groupValue == null ? null : BytesRef.DeepCopyOf(groupValue)] = groupHead;
+                    temporalResult.stop = true;
+                }
+                else
+                {
+                    temporalResult.stop = false;
+                }
+                temporalResult.groupHead = groupHead;
+            }
+
+            protected override ICollection<GroupHead> GetCollectedGroupHeads()
+            {
+                return groups.Values;
+            }
+
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    this.readerContext = value;
+                    groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+                    foreach (GroupHead groupHead in groups.Values)
+                    {
+                        for (int i = 0; i < groupHead.comparators.Length; i++)
+                        {
+                            groupHead.comparators[i] = groupHead.comparators[i].SetNextReader(value);
+                        }
+                    }
+                }
+            }
+
+            public override Scorer Scorer
+            {
+                set
+                {
+                    this.scorer = value;
+                    foreach (GroupHead groupHead in groups.Values)
+                    {
+                        foreach (FieldComparator comparator in groupHead.comparators)
+                        {
+                            comparator.Scorer = value;
+                        }
+                    }
+                }
+            }
+
+            internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+            {
+                private readonly GeneralAllGroupHeadsCollector outerInstance;
+                public readonly BytesRef groupValue;
+
+                internal readonly FieldComparator[] comparators;
+
+                internal GroupHead(GeneralAllGroupHeadsCollector outerInstance, BytesRef groupValue, Sort sort, int doc)
+                    : base(doc + outerInstance.readerContext.DocBase)
+                {
+                    this.outerInstance = outerInstance;
+                    SortField[] sortFields = sort.GetSort();
+                    comparators = new FieldComparator[sortFields.Length];
+                    for (int i = 0; i < sortFields.Length; i++)
+                    {
+                        comparators[i] = sortFields[i].GetComparator(1, i).SetNextReader(outerInstance.readerContext);
+                        comparators[i].Scorer = outerInstance.scorer;
+                        comparators[i].Copy(0, doc);
+                        comparators[i].Bottom = 0;
+                    }
+                }
+
+                public override int Compare(int compIDX, int doc)
+                {
+                    return comparators[compIDX].CompareBottom(doc);
+                }
+
+                public override void UpdateDocHead(int doc)
+                {
+                    foreach (FieldComparator comparator in comparators)
+                    {
+                        comparator.Copy(0, doc);
+                        comparator.Bottom = 0;
+                    }
+                    this.Doc = doc + outerInstance.readerContext.DocBase;
+                }
+            }
+        }
+
+
+        // AbstractAllGroupHeadsCollector optimized for ord fields and scores.
+        internal class OrdScoreAllGroupHeadsCollector : TermAllGroupHeadsCollector<OrdScoreAllGroupHeadsCollector.GroupHead>
+        {
+            //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+            private readonly SentinelIntSet ordSet;
+            private readonly IList<GroupHead> collectedGroups;
+            private readonly SortField[] fields;
+
+            private SortedDocValues[] sortsIndex;
+            private Scorer scorer;
+            private GroupHead[] segmentGroupHeads;
+
+            internal OrdScoreAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+                : base(groupField, sortWithinGroup.GetSort().Length)
+            {
+                //this.outerInstance = outerInstance;
+                ordSet = new SentinelIntSet(initialSize, -2);
+                collectedGroups = new List<GroupHead>(initialSize);
+
+                SortField[] sortFields = sortWithinGroup.GetSort();
+                fields = new SortField[sortFields.Length];
+                sortsIndex = new SortedDocValues[sortFields.Length];
+                for (int i = 0; i < sortFields.Length; i++)
+                {
+                    reversed[i] = sortFields[i].Reverse ? -1 : 1;
+                    fields[i] = sortFields[i];
+                }
+            }
+
+            protected override ICollection<GroupHead> GetCollectedGroupHeads()
+            {
+                return collectedGroups;
+            }
+
+            public override Scorer Scorer
+            {
+                set
+                {
+                    this.scorer = value;
+                }
+            }
+
+
+            protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+            {
+                int key = groupIndex.GetOrd(doc);
+                GroupHead groupHead;
+                if (!ordSet.Exists(key))
+                {
+                    ordSet.Put(key);
+                    BytesRef term;
+                    if (key == -1)
+                    {
+                        term = null;
+                    }
+                    else
+                    {
+                        term = new BytesRef();
+                        groupIndex.LookupOrd(key, term);
+                    }
+                    groupHead = new GroupHead(this, doc, term);
+                    collectedGroups.Add(groupHead);
+                    segmentGroupHeads[key + 1] = groupHead;
+                    temporalResult.stop = true;
+                }
+                else
+                {
+                    temporalResult.stop = false;
+                    groupHead = segmentGroupHeads[key + 1];
+                }
+                temporalResult.groupHead = groupHead;
+            }
+
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    this.readerContext = value;
+                    groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+                    for (int i = 0; i < fields.Length; i++)
+                    {
+                        if (fields[i].Type == SortField.Type_e.SCORE)
+                        {
+                            continue;
+                        }
+
+                        sortsIndex[i] = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, fields[i].Field);
+                    }
+
+                    // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+                    ordSet.Clear();
+                    segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+                    foreach (GroupHead collectedGroup in collectedGroups)
+                    {
+                        int ord;
+                        if (collectedGroup.groupValue == null)
+                        {
+                            ord = -1;
+                        }
+                        else
+                        {
+                            ord = groupIndex.LookupTerm(collectedGroup.groupValue);
+                        }
+                        if (collectedGroup.groupValue == null || ord >= 0)
+                        {
+                            ordSet.Put(ord);
+                            segmentGroupHeads[ord + 1] = collectedGroup;
+
+                            for (int i = 0; i < sortsIndex.Length; i++)
+                            {
+                                if (fields[i].Type == SortField.Type_e.SCORE)
+                                {
+                                    continue;
+                                }
+                                int sortOrd;
+                                if (collectedGroup.sortValues[i] == null)
+                                {
+                                    sortOrd = -1;
+                                }
+                                else
+                                {
+                                    sortOrd = sortsIndex[i].LookupTerm(collectedGroup.sortValues[i]);
+                                }
+                                collectedGroup.sortOrds[i] = sortOrd;
+                            }
+                        }
+                    }
+                }
+
+            }
+
+            internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+            {
+                private readonly OrdScoreAllGroupHeadsCollector outerInstance;
+                public readonly BytesRef groupValue;
+
+                internal BytesRef[] sortValues;
+                internal int[] sortOrds;
+                internal float[] scores;
+
+                internal GroupHead(OrdScoreAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+                    : base(doc + outerInstance.readerContext.DocBase)
+                {
+                    this.outerInstance = outerInstance;
+                    this.groupValue = groupValue;
+
+                    sortValues = new BytesRef[outerInstance.sortsIndex.Length];
+                    sortOrds = new int[outerInstance.sortsIndex.Length];
+                    scores = new float[outerInstance.sortsIndex.Length];
+                    for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+                    {
+                        if (outerInstance.fields[i].Type == SortField.Type_e.SCORE)
+                        {
+                            scores[i] = outerInstance.scorer.Score();
+                        }
+                        else
+                        {
+                            sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+                            sortValues[i] = new BytesRef();
+                            if (sortOrds[i] != -1)
+                            {
+                                outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+                            }
+                        }
+                    }
+                }
+
+                public override int Compare(int compIDX, int doc)
+                {
+                    if (outerInstance.fields[compIDX].Type == SortField.Type_e.SCORE)
+                    {
+                        float score = outerInstance.scorer.Score();
+                        if (scores[compIDX] < score)
+                        {
+                            return 1;
+                        }
+                        else if (scores[compIDX] > score)
+                        {
+                            return -1;
+                        }
+                        return 0;
+                    }
+                    else
+                    {
+                        if (sortOrds[compIDX] < 0)
+                        {
+                            // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
+                            if (outerInstance.sortsIndex[compIDX].GetOrd(doc) == -1)
+                            {
+                                outerInstance.scratchBytesRef.Length = 0;
+                            }
+                            else
+                            {
+                                outerInstance.sortsIndex[compIDX].Get(doc, outerInstance.scratchBytesRef);
+                            }
+                            return sortValues[compIDX].CompareTo(outerInstance.scratchBytesRef);
+                        }
+                        else
+                        {
+                            return sortOrds[compIDX] - outerInstance.sortsIndex[compIDX].GetOrd(doc);
+                        }
+                    }
+                }
+
+                public override void UpdateDocHead(int doc)
+                {
+                    for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+                    {
+                        if (outerInstance.fields[i].Type == Search.SortField.Type_e.SCORE)
+                        {
+                            scores[i] = outerInstance.scorer.Score();
+                        }
+                        else
+                        {
+                            sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+                            if (sortOrds[i] == -1)
+                            {
+                                sortValues[i].Length = 0;
+                            }
+                            else
+                            {
+                                outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+                            }
+                        }
+                    }
+                    this.Doc = doc + outerInstance.readerContext.DocBase;
+                }
+            }
+        }
+
+
+        // AbstractAllGroupHeadsCollector optimized for ord fields.
+        internal class OrdAllGroupHeadsCollector : TermAllGroupHeadsCollector<OrdAllGroupHeadsCollector.GroupHead>
+        {
+            //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+            private readonly SentinelIntSet ordSet;
+            private readonly IList<GroupHead> collectedGroups;
+            private readonly SortField[] fields;
+
+            private SortedDocValues[] sortsIndex;
+            private GroupHead[] segmentGroupHeads;
+
+            internal OrdAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+                        : base(groupField, sortWithinGroup.GetSort().Length)
+            {
+                //this.outerInstance = outerInstance;
+                ordSet = new SentinelIntSet(initialSize, -2);
+                collectedGroups = new List<GroupHead>(initialSize);
+
+                SortField[] sortFields = sortWithinGroup.GetSort();
+                fields = new SortField[sortFields.Length];
+                sortsIndex = new SortedDocValues[sortFields.Length];
+                for (int i = 0; i < sortFields.Length; i++)
+                {
+                    reversed[i] = sortFields[i].Reverse ? -1 : 1;
+                    fields[i] = sortFields[i];
+                }
+            }
+
+            protected override ICollection<GroupHead> GetCollectedGroupHeads()
+            {
+                return collectedGroups;
+            }
+
+            public override Scorer Scorer
+            {
+                set
+                {
+                }
+            }
+
+
+            protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+            {
+                int key = groupIndex.GetOrd(doc);
+                GroupHead groupHead;
+                if (!ordSet.Exists(key))
+                {
+                    ordSet.Put(key);
+                    BytesRef term;
+                    if (key == -1)
+                    {
+                        term = null;
+                    }
+                    else
+                    {
+                        term = new BytesRef();
+                        groupIndex.LookupOrd(key, term);
+                    }
+                    groupHead = new GroupHead(this, doc, term);
+                    collectedGroups.Add(groupHead);
+                    segmentGroupHeads[key + 1] = groupHead;
+                    temporalResult.stop = true;
+                }
+                else
+                {
+                    temporalResult.stop = false;
+                    groupHead = segmentGroupHeads[key + 1];
+                }
+                temporalResult.groupHead = groupHead;
+            }
+
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    this.readerContext = value;
+                    groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+                    for (int i = 0; i < fields.Length; i++)
+                    {
+                        sortsIndex[i] = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, fields[i].Field);
+                    }
+
+                    // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+                    ordSet.Clear();
+                    segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+                    foreach (GroupHead collectedGroup in collectedGroups)
+                    {
+                        int groupOrd;
+                        if (collectedGroup.groupValue == null)
+                        {
+                            groupOrd = -1;
+                        }
+                        else
+                        {
+                            groupOrd = groupIndex.LookupTerm(collectedGroup.groupValue);
+                        }
+                        if (collectedGroup.groupValue == null || groupOrd >= 0)
+                        {
+                            ordSet.Put(groupOrd);
+                            segmentGroupHeads[groupOrd + 1] = collectedGroup;
+
+                            for (int i = 0; i < sortsIndex.Length; i++)
+                            {
+                                int sortOrd;
+                                if (collectedGroup.sortOrds[i] == -1)
+                                {
+                                    sortOrd = -1;
+                                }
+                                else
+                                {
+                                    sortOrd = sortsIndex[i].LookupTerm(collectedGroup.sortValues[i]);
+                                }
+                                collectedGroup.sortOrds[i] = sortOrd;
+                            }
+                        }
+                    }
+                }
+            }
+
+            internal class GroupHead : AbstractGroupHead /* AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+            {
+                private readonly OrdAllGroupHeadsCollector outerInstance;
+                public readonly BytesRef groupValue;
+                internal BytesRef[] sortValues;
+                internal int[] sortOrds;
+
+                internal GroupHead(OrdAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+                    : base(doc + outerInstance.readerContext.DocBase)
+                {
+                    this.outerInstance = outerInstance;
+                    this.groupValue = groupValue;
+
+                    sortValues = new BytesRef[outerInstance.sortsIndex.Length];
+                    sortOrds = new int[outerInstance.sortsIndex.Length];
+                    for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+                    {
+                        sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+                        sortValues[i] = new BytesRef();
+                        if (sortOrds[i] != -1)
+                        {
+                            outerInstance.sortsIndex[i].Get(doc, sortValues[i]);
+                        }
+                    }
+                }
+
+                public override int Compare(int compIDX, int doc)
+                {
+                    if (sortOrds[compIDX] < 0)
+                    {
+                        // The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
+                        if (outerInstance.sortsIndex[compIDX].GetOrd(doc) == -1)
+                        {
+                            outerInstance.scratchBytesRef.Length = 0;
+                        }
+                        else
+                        {
+                            outerInstance.sortsIndex[compIDX].Get(doc, outerInstance.scratchBytesRef);
+                        }
+                        return sortValues[compIDX].CompareTo(outerInstance.scratchBytesRef);
+                    }
+                    else
+                    {
+                        return sortOrds[compIDX] - outerInstance.sortsIndex[compIDX].GetOrd(doc);
+                    }
+                }
+
+                public override void UpdateDocHead(int doc)
+                {
+                    for (int i = 0; i < outerInstance.sortsIndex.Length; i++)
+                    {
+                        sortOrds[i] = outerInstance.sortsIndex[i].GetOrd(doc);
+                        if (sortOrds[i] == -1)
+                        {
+                            sortValues[i].Length = 0;
+                        }
+                        else
+                        {
+                            outerInstance.sortsIndex[i].LookupOrd(sortOrds[i], sortValues[i]);
+                        }
+                    }
+                    this.Doc = doc + outerInstance.readerContext.DocBase;
+                }
+
+            }
+
+        }
+
+
+        // AbstractAllGroupHeadsCollector optimized for scores.
+        internal class ScoreAllGroupHeadsCollector : TermAllGroupHeadsCollector<ScoreAllGroupHeadsCollector.GroupHead>
+        {
+            //private readonly TermAllGroupHeadsCollector<GH> outerInstance;
+            private readonly SentinelIntSet ordSet;
+            private readonly IList<GroupHead> collectedGroups;
+            private readonly SortField[] fields;
+
+            private Scorer scorer;
+            private GroupHead[] segmentGroupHeads;
+
+            internal ScoreAllGroupHeadsCollector(/*TermAllGroupHeadsCollector<GH> outerInstance,*/ string groupField, Sort sortWithinGroup, int initialSize)
+                        : base(groupField, sortWithinGroup.GetSort().Length)
+            {
+                //this.outerInstance = outerInstance;
+                ordSet = new SentinelIntSet(initialSize, -2);
+                collectedGroups = new List<GroupHead>(initialSize);
+
+                SortField[] sortFields = sortWithinGroup.GetSort();
+                fields = new SortField[sortFields.Length];
+                for (int i = 0; i < sortFields.Length; i++)
+                {
+                    reversed[i] = sortFields[i].Reverse ? -1 : 1;
+                    fields[i] = sortFields[i];
+                }
+            }
+
+            protected override ICollection<GroupHead> GetCollectedGroupHeads()
+            {
+                return collectedGroups;
+            }
+
+            public override Scorer Scorer
+            {
+                set
+                {
+                    this.scorer = value;
+                }
+            }
+
+            protected override void RetrieveGroupHeadAndAddIfNotExist(int doc)
+            {
+                int key = groupIndex.GetOrd(doc);
+                GroupHead groupHead;
+                if (!ordSet.Exists(key))
+                {
+                    ordSet.Put(key);
+                    BytesRef term;
+                    if (key == -1)
+                    {
+                        term = null;
+                    }
+                    else
+                    {
+                        term = new BytesRef();
+                        groupIndex.LookupOrd(key, term);
+                    }
+                    groupHead = new GroupHead(this, doc, term);
+                    collectedGroups.Add(groupHead);
+                    segmentGroupHeads[key + 1] = groupHead;
+                    temporalResult.stop = true;
+                }
+                else
+                {
+                    temporalResult.stop = false;
+                    groupHead = segmentGroupHeads[key + 1];
+                }
+                temporalResult.groupHead = groupHead;
+            }
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    this.readerContext = value;
+                    groupIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+                    // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+                    ordSet.Clear();
+                    segmentGroupHeads = new GroupHead[groupIndex.ValueCount + 1];
+                    foreach (GroupHead collectedGroup in collectedGroups)
+                    {
+                        int ord;
+                        if (collectedGroup.groupValue == null)
+                        {
+                            ord = -1;
+                        }
+                        else
+                        {
+                            ord = groupIndex.LookupTerm(collectedGroup.groupValue);
+                        }
+                        if (collectedGroup.groupValue == null || ord >= 0)
+                        {
+                            ordSet.Put(ord);
+                            segmentGroupHeads[ord + 1] = collectedGroup;
+                        }
+                    }
+                }
+            }
+
+            internal class GroupHead : AbstractGroupHead /*AbstractAllGroupHeadsCollector.GroupHead<BytesRef>*/
+            {
+                private readonly ScoreAllGroupHeadsCollector outerInstance;
+                public readonly BytesRef groupValue;
+                internal float[] scores;
+
+                internal GroupHead(ScoreAllGroupHeadsCollector outerInstance, int doc, BytesRef groupValue)
+                    : base(doc + outerInstance.readerContext.DocBase)
+                {
+                    this.outerInstance = outerInstance;
+                    this.groupValue = groupValue;
+
+                    scores = new float[outerInstance.fields.Length];
+                    float score = outerInstance.scorer.Score();
+                    for (int i = 0; i < scores.Length; i++)
+                    {
+                        scores[i] = score;
+                    }
+                }
+
+                public override int Compare(int compIDX, int doc)
+                {
+                    float score = outerInstance.scorer.Score();
+                    if (scores[compIDX] < score)
+                    {
+                        return 1;
+                    }
+                    else if (scores[compIDX] > score)
+                    {
+                        return -1;
+                    }
+                    return 0;
+                }
+
+                public override void UpdateDocHead(int doc)
+                {
+                    float score = outerInstance.scorer.Score();
+                    for (int i = 0; i < scores.Length; i++)
+                    {
+                        scores[i] = score;
+                    }
+                    this.Doc = doc + outerInstance.readerContext.DocBase;
+                }
+
+            }
+
+        
+
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs b/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
new file mode 100644
index 0000000..7693d93
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermAllGroupsCollector.cs
@@ -0,0 +1,120 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// A collector that collects all groups that match the
+    /// query. Only the group value is collected, and the order
+    /// is undefined.  This collector does not determine
+    /// the most relevant document of a group.
+    /// 
+    /// <para>
+    /// Implementation detail: an int hash set (SentinelIntSet)
+    /// is used to detect if a group is already added to the
+    /// total count.  For each segment the int set is cleared and filled
+    /// with previous counted groups that occur in the new
+    /// segment.
+    /// </para>
+    /// @lucene.experimental
+    /// </summary>
+    public class TermAllGroupsCollector : AbstractAllGroupsCollector<BytesRef>
+    {
+        private static readonly int DEFAULT_INITIAL_SIZE = 128;
+
+        private readonly String groupField;
+        private readonly SentinelIntSet ordSet;
+        private readonly IList<BytesRef> groups;
+
+        private SortedDocValues index;
+
+        /// <summary>
+        /// Expert: Constructs a <see cref="AbstractAllGroupsCollector{BytesRef}"/>
+        /// </summary>
+        /// <param name="groupField">The field to group by</param>
+        /// <param name="initialSize">
+        /// The initial allocation size of the
+        /// internal int set and group list
+        /// which should roughly match the total
+        /// number of expected unique groups. Be aware that the
+        /// heap usage is 4 bytes * initialSize.
+        /// </param>
+        public TermAllGroupsCollector(string groupField, int initialSize)
+        {
+            ordSet = new SentinelIntSet(initialSize, -2);
+            groups = new List<BytesRef>(initialSize);
+            this.groupField = groupField;
+        }
+
+        /// <summary>
+        /// Constructs a <see cref="AbstractAllGroupsCollector{BytesRef}"/>. This sets the
+        /// initial allocation size for the internal int set and group
+        /// list to 128.
+        /// </summary>
+        /// <param name="groupField">The field to group by</param>
+        public TermAllGroupsCollector(string groupField)
+            : this(groupField, DEFAULT_INITIAL_SIZE)
+        {
+        }
+
+        public override void Collect(int doc)
+        {
+            int key = index.GetOrd(doc);
+            if (!ordSet.Exists(key))
+            {
+                ordSet.Put(key);
+                BytesRef term;
+                if (key == -1)
+                {
+                    term = null;
+                }
+                else
+                {
+                    term = new BytesRef();
+                    index.LookupOrd(key, term);
+                }
+                groups.Add(term);
+            }
+        }
+
+        public override ICollection<BytesRef> Groups
+        {
+            get
+            {
+                return groups;
+            }
+        }
+
+        public override AtomicReaderContext NextReader
+        {
+            set
+            {
+                index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+                // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+                ordSet.Clear();
+                foreach (BytesRef countedGroup in groups)
+                {
+                    if (countedGroup == null)
+                    {
+                        ordSet.Put(-1);
+                    }
+                    else
+                    {
+                        int ord = index.LookupTerm(countedGroup);
+                        if (ord >= 0)
+                        {
+                            ordSet.Put(ord);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
new file mode 100644
index 0000000..d6f6bab
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermDistinctValuesCollector.cs
@@ -0,0 +1,144 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// A term based implementation of <see cref="AbstractDistinctValuesCollector{TermDistinctValuesCollector.GroupCount}"/> that relies
+    /// on <see cref="SortedDocValues"/> to count the distinct values per group.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public class TermDistinctValuesCollector : AbstractDistinctValuesCollector<TermDistinctValuesCollector.GroupCount>
+    {
+        private readonly string groupField;
+        private readonly string countField;
+        private readonly List<GroupCount> groups;
+        private readonly SentinelIntSet ordSet;
+        private readonly GroupCount[] groupCounts;
+
+        private SortedDocValues groupFieldTermIndex;
+        private SortedDocValues countFieldTermIndex;
+
+        /**
+         * Constructs {@link TermDistinctValuesCollector} instance.
+         *
+         * @param groupField The field to group by
+         * @param countField The field to count distinct values for
+         * @param groups The top N groups, collected during the first phase search
+         */
+        public TermDistinctValuesCollector(string groupField, string countField, ICollection<SearchGroup<BytesRef>> groups)
+        {
+            this.groupField = groupField;
+            this.countField = countField;
+            this.groups = new List<GroupCount>(groups.Count);
+            foreach (SearchGroup<BytesRef> group in groups)
+            {
+                this.groups.Add(new GroupCount(group.groupValue));
+            }
+            ordSet = new SentinelIntSet(groups.Count, -2);
+            groupCounts = new GroupCount[ordSet.Keys.Length];
+        }
+
+        public override void Collect(int doc)
+        {
+            int slot = ordSet.Find(groupFieldTermIndex.GetOrd(doc));
+            if (slot < 0)
+            {
+                return;
+            }
+
+            GroupCount gc = groupCounts[slot];
+            int countOrd = countFieldTermIndex.GetOrd(doc);
+            if (DoesNotContainOrd(countOrd, gc.ords))
+            {
+                if (countOrd == -1)
+                {
+                    gc.uniqueValues.Add(null);
+                }
+                else
+                {
+                    BytesRef br = new BytesRef();
+                    countFieldTermIndex.LookupOrd(countOrd, br);
+                    gc.uniqueValues.Add(br);
+                }
+
+                gc.ords = Arrays.CopyOf(gc.ords, gc.ords.Length + 1);
+                gc.ords[gc.ords.Length - 1] = countOrd;
+                if (gc.ords.Length > 1)
+                {
+                    Array.Sort(gc.ords);
+                }
+            }
+        }
+
+        private bool DoesNotContainOrd(int ord, int[] ords)
+        {
+            if (ords.Length == 0)
+            {
+                return true;
+            }
+            else if (ords.Length == 1)
+            {
+                return ord != ords[0];
+            }
+            return Array.BinarySearch(ords, ord) < 0;
+        }
+
+        public override List<GroupCount> GetGroups()
+        {
+            return groups;
+        }
+
+        public override AtomicReaderContext NextReader
+        {
+            set
+            {
+                groupFieldTermIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+                countFieldTermIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, countField);
+                ordSet.Clear();
+                foreach (GroupCount group in groups)
+                {
+                    int groupOrd = group.groupValue == null ? -1 : groupFieldTermIndex.LookupTerm(group.groupValue);
+                    if (group.groupValue != null && groupOrd < 0)
+                    {
+                        continue;
+                    }
+
+                    groupCounts[ordSet.Put(groupOrd)] = group;
+                    group.ords = new int[group.uniqueValues.Count];
+                    Arrays.Fill(group.ords, -2);
+                    int i = 0;
+                    foreach (BytesRef value2 in group.uniqueValues)
+                    {
+                        int countOrd = value2 == null ? -1 : countFieldTermIndex.LookupTerm(value2);
+                        if (value2 == null || countOrd >= 0)
+                        {
+                            group.ords[i++] = countOrd;
+                        }
+                    }
+                }
+            }
+        }
+
+        /** Holds distinct values for a single group.
+         *
+         * @lucene.experimental */
+        public class GroupCount : AbstractGroupCount<BytesRef> /*AbstractDistinctValuesCollector.GroupCount<BytesRef>*/
+        {
+            internal int[] ords;
+
+            internal GroupCount(BytesRef groupValue)
+                    : base(groupValue)
+            {
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs b/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
new file mode 100644
index 0000000..17003ba
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermFirstPassGroupingCollector.cs
@@ -0,0 +1,88 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// Concrete implementation of <see cref="AbstractFirstPassGroupingCollector{BytesRef}"/> that groups based on
+    /// field values and more specifically uses <see cref="SortedDocValues"/>
+    /// to collect groups.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public class TermFirstPassGroupingCollector : AbstractFirstPassGroupingCollector<BytesRef>
+    {
+        private readonly BytesRef scratchBytesRef = new BytesRef();
+        private SortedDocValues index;
+
+        private string groupField;
+
+        /// <summary>
+        /// Create the first pass collector.
+        /// </summary>
+        /// <param name="groupField">
+        /// The field used to group
+        /// documents. This field must be single-valued and
+        /// indexed (<see cref="FieldCache"/> is used to access its value
+        /// per-document).
+        /// </param>
+        /// <param name="groupSort">
+        /// The <see cref="Sort"/> used to sort the
+        /// groups.  The top sorted document within each group
+        /// according to groupSort, determines how that group
+        /// sorts against other groups.  This must be non-null,
+        /// ie, if you want to groupSort by relevance use
+        /// <see cref="Sort.RELEVANCE"/>.
+        /// </param>
+        /// <param name="topNGroups">
+        /// How many top groups to keep.
+        /// </param>
+        /// <exception cref="IOException">When I/O related errors occur</exception>
+        public TermFirstPassGroupingCollector(string groupField, Sort groupSort, int topNGroups)
+            : base(groupSort, topNGroups)
+        {
+            this.groupField = groupField;
+        }
+
+        protected override BytesRef GetDocGroupValue(int doc)
+        {
+            int ord = index.GetOrd(doc);
+            if (ord == -1)
+            {
+                return null;
+            }
+            else
+            {
+                index.LookupOrd(ord, scratchBytesRef);
+                return scratchBytesRef;
+            }
+        }
+
+        protected override BytesRef CopyDocGroupValue(BytesRef groupValue, BytesRef reuse)
+        {
+            if (groupValue == null)
+            {
+                return null;
+            }
+            else if (reuse != null)
+            {
+                reuse.CopyBytes(groupValue);
+                return reuse;
+            }
+            else
+            {
+                return BytesRef.DeepCopyOf(groupValue);
+            }
+        }
+
+        public override AtomicReaderContext NextReader
+        {
+            set
+            {
+                base.NextReader = value;
+                index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs b/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
new file mode 100644
index 0000000..08fbb70
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermGroupFacetCollector.cs
@@ -0,0 +1,444 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// An implementation of <see cref="AbstractGroupFacetCollector"/> that computes grouped facets based on the indexed terms
+    /// from the <see cref="FieldCache"/>.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public abstract class TermGroupFacetCollector : AbstractGroupFacetCollector
+    {
+        internal readonly List<GroupedFacetHit> groupedFacetHits;
+        internal readonly SentinelIntSet segmentGroupedFacetHits;
+
+        internal SortedDocValues groupFieldTermsIndex;
+
+        /**
+         * Factory method for creating the right implementation based on the fact whether the facet field contains
+         * multiple tokens per documents.
+         *
+         * @param groupField The group field
+         * @param facetField The facet field
+         * @param facetFieldMultivalued Whether the facet field has multiple tokens per document
+         * @param facetPrefix The facet prefix a facet entry should start with to be included.
+         * @param initialSize The initial allocation size of the internal int set and group facet list which should roughly
+         *                    match the total number of expected unique groups. Be aware that the heap usage is
+         *                    4 bytes * initialSize.
+         * @return <code>TermGroupFacetCollector</code> implementation
+         */
+        public static TermGroupFacetCollector CreateTermGroupFacetCollector(string groupField,
+                                                                            string facetField,
+                                                                            bool facetFieldMultivalued,
+                                                                            BytesRef facetPrefix,
+                                                                            int initialSize)
+        {
+            if (facetFieldMultivalued)
+            {
+                return new MV(groupField, facetField, facetPrefix, initialSize);
+            }
+            else
+            {
+                return new SV(groupField, facetField, facetPrefix, initialSize);
+            }
+        }
+
+        internal TermGroupFacetCollector(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+            : base(groupField, facetField, facetPrefix)
+        {
+            groupedFacetHits = new List<GroupedFacetHit>(initialSize);
+            segmentGroupedFacetHits = new SentinelIntSet(initialSize, int.MinValue);
+        }
+
+        // Implementation for single valued facet fields.
+        internal class SV : TermGroupFacetCollector
+        {
+
+            private SortedDocValues facetFieldTermsIndex;
+
+            internal SV(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+                        : base(groupField, facetField, facetPrefix, initialSize)
+            {
+            }
+
+            public override void Collect(int doc)
+            {
+                int facetOrd = facetFieldTermsIndex.GetOrd(doc);
+                if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd)
+                {
+                    return;
+                }
+
+                int groupOrd = groupFieldTermsIndex.GetOrd(doc);
+                int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.ValueCount + 1) + facetOrd;
+                if (segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+                {
+                    return;
+                }
+
+                segmentTotalCount++;
+                segmentFacetCounts[facetOrd + 1]++;
+
+                segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+
+                BytesRef groupKey;
+                if (groupOrd == -1)
+                {
+                    groupKey = null;
+                }
+                else
+                {
+                    groupKey = new BytesRef();
+                    groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+                }
+
+                BytesRef facetKey;
+                if (facetOrd == -1)
+                {
+                    facetKey = null;
+                }
+                else
+                {
+                    facetKey = new BytesRef();
+                    facetFieldTermsIndex.LookupOrd(facetOrd, facetKey);
+                }
+
+                groupedFacetHits.Add(new GroupedFacetHit(groupKey, facetKey));
+            }
+
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    if (segmentFacetCounts != null)
+                    {
+                        segmentResults.Add(CreateSegmentResult());
+                    }
+
+                    groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+                    facetFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, facetField);
+
+                    // 1+ to allow for the -1 "not set":
+                    segmentFacetCounts = new int[facetFieldTermsIndex.ValueCount + 1];
+                    segmentTotalCount = 0;
+
+                    segmentGroupedFacetHits.Clear();
+                    foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
+                    {
+                        int facetOrd = groupedFacetHit.facetValue == null ? -1 : facetFieldTermsIndex.LookupTerm(groupedFacetHit.facetValue);
+                        if (groupedFacetHit.facetValue != null && facetOrd < 0)
+                        {
+                            continue;
+                        }
+
+                        int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
+                        if (groupedFacetHit.groupValue != null && groupOrd < 0)
+                        {
+                            continue;
+                        }
+
+                        int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.ValueCount + 1) + facetOrd;
+                        segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+                    }
+
+                    if (facetPrefix != null)
+                    {
+                        startFacetOrd = facetFieldTermsIndex.LookupTerm(facetPrefix);
+                        if (startFacetOrd < 0)
+                        {
+                            // Points to the ord one higher than facetPrefix
+                            startFacetOrd = -startFacetOrd - 1;
+                        }
+                        BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
+                        facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
+                        endFacetOrd = facetFieldTermsIndex.LookupTerm(facetEndPrefix);
+                        Debug.Assert(endFacetOrd < 0);
+                        endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
+                    }
+                    else
+                    {
+                        startFacetOrd = -1;
+                        endFacetOrd = facetFieldTermsIndex.ValueCount;
+                    }
+                }
+            }
+
+
+            protected override AbstractSegmentResult CreateSegmentResult()
+            {
+                return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.TermsEnum(), startFacetOrd, endFacetOrd);
+            }
+
+            internal class SegmentResult : AbstractGroupFacetCollector.AbstractSegmentResult
+            {
+
+                internal readonly TermsEnum tenum;
+
+                internal SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd)
+                                : base(counts, total - counts[0], counts[0], endFacetOrd + 1)
+                {
+                    this.tenum = tenum;
+                    this.mergePos = startFacetOrd == -1 ? 1 : startFacetOrd + 1;
+                    if (mergePos < maxTermPos)
+                    {
+                        Debug.Assert(tenum != null);
+                        tenum.SeekExact(startFacetOrd == -1 ? 0 : startFacetOrd);
+                        mergeTerm = tenum.Term();
+                    }
+                }
+
+                protected internal override void NextTerm()
+                {
+                    mergeTerm = tenum.Next();
+                }
+            }
+        }
+
+        // Implementation for multi valued facet fields.
+        internal class MV : TermGroupFacetCollector
+        {
+
+            private SortedSetDocValues facetFieldDocTermOrds;
+            private TermsEnum facetOrdTermsEnum;
+            private int facetFieldNumTerms;
+            private readonly BytesRef scratch = new BytesRef();
+
+            internal MV(string groupField, string facetField, BytesRef facetPrefix, int initialSize)
+                         : base(groupField, facetField, facetPrefix, initialSize)
+            {
+            }
+
+            public override void Collect(int doc)
+            {
+                int groupOrd = groupFieldTermsIndex.GetOrd(doc);
+                if (facetFieldNumTerms == 0)
+                {
+                    int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1);
+                    if (facetPrefix != null || segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+                    {
+                        return;
+                    }
+
+                    segmentTotalCount++;
+                    segmentFacetCounts[facetFieldNumTerms]++;
+
+                    segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+                    BytesRef groupKey;
+                    if (groupOrd == -1)
+                    {
+                        groupKey = null;
+                    }
+                    else
+                    {
+                        groupKey = new BytesRef();
+                        groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+                    }
+                    groupedFacetHits.Add(new GroupedFacetHit(groupKey, null));
+                    return;
+                }
+
+                facetFieldDocTermOrds.Document = doc;
+                long ord;
+                bool empty = true;
+                while ((ord = facetFieldDocTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
+                {
+                    Process(groupOrd, (int)ord);
+                    empty = false;
+                }
+
+                if (empty)
+                {
+                    Process(groupOrd, facetFieldNumTerms); // this facet ord is reserved for docs not containing facet field.
+                }
+            }
+
+            private void Process(int groupOrd, int facetOrd)
+            {
+                if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd)
+                {
+                    return;
+                }
+
+                int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
+                if (segmentGroupedFacetHits.Exists(segmentGroupedFacetsIndex))
+                {
+                    return;
+                }
+
+                segmentTotalCount++;
+                segmentFacetCounts[facetOrd]++;
+
+                segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+
+                BytesRef groupKey;
+                if (groupOrd == -1)
+                {
+                    groupKey = null;
+                }
+                else
+                {
+                    groupKey = new BytesRef();
+                    groupFieldTermsIndex.LookupOrd(groupOrd, groupKey);
+                }
+
+                BytesRef facetValue;
+                if (facetOrd == facetFieldNumTerms)
+                {
+                    facetValue = null;
+                }
+                else
+                {
+                    facetFieldDocTermOrds.LookupOrd(facetOrd, scratch);
+                    facetValue = BytesRef.DeepCopyOf(scratch); // must we?
+                }
+                groupedFacetHits.Add(new GroupedFacetHit(groupKey, facetValue));
+            }
+
+            public override AtomicReaderContext NextReader
+            {
+                set
+                {
+                    if (segmentFacetCounts != null)
+                    {
+                        segmentResults.Add(CreateSegmentResult());
+                    }
+
+                    groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+                    facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(value.AtomicReader, facetField);
+                    facetFieldNumTerms = (int)facetFieldDocTermOrds.ValueCount;
+                    if (facetFieldNumTerms == 0)
+                    {
+                        facetOrdTermsEnum = null;
+                    }
+                    else
+                    {
+                        facetOrdTermsEnum = facetFieldDocTermOrds.TermsEnum();
+                    }
+                    // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
+                    segmentFacetCounts = new int[facetFieldNumTerms + 1];
+                    segmentTotalCount = 0;
+
+                    segmentGroupedFacetHits.Clear();
+                    foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
+                    {
+                        int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
+                        if (groupedFacetHit.groupValue != null && groupOrd < 0)
+                        {
+                            continue;
+                        }
+
+                        int facetOrd;
+                        if (groupedFacetHit.facetValue != null)
+                        {
+                            if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue))
+                            {
+                                continue;
+                            }
+                            facetOrd = (int)facetOrdTermsEnum.Ord();
+                        }
+                        else
+                        {
+                            facetOrd = facetFieldNumTerms;
+                        }
+
+                        // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
+                        int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
+                        segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
+                    }
+
+                    if (facetPrefix != null)
+                    {
+                        TermsEnum.SeekStatus seekStatus;
+                        if (facetOrdTermsEnum != null)
+                        {
+                            seekStatus = facetOrdTermsEnum.SeekCeil(facetPrefix);
+                        }
+                        else
+                        {
+                            seekStatus = TermsEnum.SeekStatus.END;
+                        }
+
+                        if (seekStatus != TermsEnum.SeekStatus.END)
+                        {
+                            startFacetOrd = (int)facetOrdTermsEnum.Ord();
+                        }
+                        else
+                        {
+                            startFacetOrd = 0;
+                            endFacetOrd = 0;
+                            return;
+                        }
+
+                        BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
+                        facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
+                        seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix);
+                        if (seekStatus != TermsEnum.SeekStatus.END)
+                        {
+                            endFacetOrd = (int)facetOrdTermsEnum.Ord();
+                        }
+                        else
+                        {
+                            endFacetOrd = facetFieldNumTerms; // Don't include null...
+                        }
+                    }
+                    else
+                    {
+                        startFacetOrd = 0;
+                        endFacetOrd = facetFieldNumTerms + 1;
+                    }
+                }
+            }
+
+            protected override AbstractSegmentResult CreateSegmentResult()
+            {
+                return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldNumTerms, facetOrdTermsEnum, startFacetOrd, endFacetOrd);
+            }
+
+            internal class SegmentResult : AbstractGroupFacetCollector.AbstractSegmentResult
+            {
+
+                internal readonly TermsEnum tenum;
+
+                internal SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd)
+                                : base(counts, total - counts[missingCountIndex], counts[missingCountIndex],
+                        endFacetOrd == missingCountIndex + 1 ? missingCountIndex : endFacetOrd)
+                {
+                    this.tenum = tenum;
+                    this.mergePos = startFacetOrd;
+                    if (tenum != null)
+                    {
+                        tenum.SeekExact(mergePos);
+                        mergeTerm = tenum.Term();
+                    }
+                }
+
+                protected internal override void NextTerm()
+                {
+                    mergeTerm = tenum.Next();
+                }
+            }
+        }
+    }
+
+
+    internal class GroupedFacetHit
+    {
+        internal readonly BytesRef groupValue;
+        internal readonly BytesRef facetValue;
+
+        internal GroupedFacetHit(BytesRef groupValue, BytesRef facetValue)
+        {
+            this.groupValue = groupValue;
+            this.facetValue = facetValue;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs b/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
new file mode 100644
index 0000000..da70372
--- /dev/null
+++ b/src/Lucene.Net.Grouping/Term/TermSecondPassGroupingCollector.cs
@@ -0,0 +1,65 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping.Terms
+{
+    /// <summary>
+    /// Concrete implementation of <see cref="AbstractSecondPassGroupingCollector{BytesRef}"/> that groups based on
+    /// field values and more specifically uses <see cref="SortedDocValues"/>
+    /// to collect grouped docs.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public class TermSecondPassGroupingCollector : AbstractSecondPassGroupingCollector<BytesRef>
+    {
+        private readonly SentinelIntSet ordSet;
+        private SortedDocValues index;
+        private readonly string groupField;
+
+        public TermSecondPassGroupingCollector(string groupField, ICollection<SearchGroup<BytesRef>> groups, Sort groupSort, Sort withinGroupSort,
+                                               int maxDocsPerGroup, bool getScores, bool getMaxScores, bool fillSortFields)
+                  : base(groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields)
+        {
+            ordSet = new SentinelIntSet(groupMap.Count, -2);
+            this.groupField = groupField;
+            groupDocs = /*(SearchGroupDocs<BytesRef>[])*/ new AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef>[ordSet.Keys.Length];
+        }
+
+        public override AtomicReaderContext NextReader
+        {
+            set
+            {
+                base.NextReader = value;
+                index = FieldCache.DEFAULT.GetTermsIndex(value.AtomicReader, groupField);
+
+                // Rebuild ordSet
+                ordSet.Clear();
+                foreach (AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef> group in groupMap.Values)
+                {
+                    //      System.out.println("  group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+                    int ord = group.groupValue == null ? -1 : index.LookupTerm(group.groupValue);
+                    if (group.groupValue == null || ord >= 0)
+                    {
+                        groupDocs[ordSet.Put(ord)] = group;
+                    }
+                }
+            }
+        }
+
+        protected override AbstractSecondPassGroupingCollector.SearchGroupDocs<BytesRef> RetrieveGroup(int doc)
+        {
+            int slot = ordSet.Find(index.GetOrd(doc));
+            if (slot >= 0)
+            {
+                return groupDocs[slot];
+            }
+            return null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Grouping/TopGroups.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Grouping/TopGroups.cs b/src/Lucene.Net.Grouping/TopGroups.cs
index 017c975..091103d 100644
--- a/src/Lucene.Net.Grouping/TopGroups.cs
+++ b/src/Lucene.Net.Grouping/TopGroups.cs
@@ -1,7 +1,6 @@
 using System;
-using Lucene.Net.Search;
 
-namespace Lucene.Net.Grouping
+namespace Lucene.Net.Search.Grouping
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,7 +24,7 @@ namespace Lucene.Net.Grouping
     /// 
     /// @lucene.experimental 
     /// </summary>
-    public class TopGroups<TGroupValueType>
+    public class TopGroups<TGroupValue>
     {
         /// <summary>
         /// Number of documents matching the search </summary>
@@ -41,7 +40,7 @@ namespace Lucene.Net.Grouping
 
         /// <summary>
         /// Group results in groupSort order </summary>
-        public readonly GroupDocs<TGroupValueType>[] Groups;
+        public readonly GroupDocs<TGroupValue>[] Groups;
 
         /// <summary>
         /// How groups are sorted against each other </summary>
@@ -57,7 +56,7 @@ namespace Lucene.Net.Grouping
         /// </summary>
         public readonly float MaxScore;
 
-        public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValueType>[] groups, float maxScore)
+        public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs<TGroupValue>[] groups, float maxScore)
         {
             GroupSort = groupSort;
             WithinGroupSort = withinGroupSort;
@@ -68,7 +67,7 @@ namespace Lucene.Net.Grouping
             MaxScore = maxScore;
         }
 
-        public TopGroups(TopGroups<TGroupValueType> oldTopGroups, int? totalGroupCount)
+        public TopGroups(TopGroups<TGroupValue> oldTopGroups, int? totalGroupCount)
         {
             GroupSort = oldTopGroups.GroupSort;
             WithinGroupSort = oldTopGroups.WithinGroupSort;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/9d72bcb3/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs b/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
new file mode 100644
index 0000000..4cc29e3
--- /dev/null
+++ b/src/Lucene.Net.Tests.Grouping/AbstractGroupingTestCase.cs
@@ -0,0 +1,30 @@
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Lucene.Net.Search.Grouping
+{
+    /// <summary>
+    /// Base class for grouping related tests.
+    /// </summary>
+    // TODO (MvG) : The grouping tests contain a lot of code duplication. Try to move the common code to this class..
+    public abstract class AbstractGroupingTestCase : LuceneTestCase
+    {
+        protected string GenerateRandomNonEmptyString()
+        {
+            string randomValue;
+            do
+            {
+                // B/c of DV based impl we can't see the difference between an empty string and a null value.
+                // For that reason we don't generate empty string
+                // groups.
+                randomValue = TestUtil.RandomRealisticUnicodeString(Random());
+                //randomValue = TestUtil.randomSimpleString(random());
+            } while ("".equals(randomValue));
+            return randomValue;
+        }
+    }
+}


Mime
View raw message