lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [05/16] lucenenet git commit: Move facets into src folder
Date Tue, 25 Nov 2014 18:52:06 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Range/LongRange.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Range/LongRange.cs b/src/Lucene.Net.Facet/Range/LongRange.cs
new file mode 100644
index 0000000..52da204
--- /dev/null
+++ b/src/Lucene.Net.Facet/Range/LongRange.cs
@@ -0,0 +1,239 @@
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Range
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+    using FunctionValues = Lucene.Net.Queries.Function.FunctionValues;
+    using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+    using DocIdSet = Lucene.Net.Search.DocIdSet;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using Filter = Lucene.Net.Search.Filter;
+    using Bits = Lucene.Net.Util.Bits;
+
+    /// <summary>
+    /// Represents a range over long values.
+    /// 
+    /// @lucene.experimental 
+    /// </summary>
+    public sealed class LongRange : Range
+    {
+        internal readonly long minIncl;
+        internal readonly long maxIncl;
+
+        /// <summary>
+        /// Minimum. </summary>
+        public readonly long min;
+
+        /// <summary>
+        /// Maximum. </summary>
+        public readonly long max;
+
+        /// <summary>
+        /// True if the minimum value is inclusive. </summary>
+        public readonly bool minInclusive;
+
+        /// <summary>
+        /// True if the maximum value is inclusive. </summary>
+        public readonly bool maxInclusive;
+
+        // TODO: can we require fewer args? (same for
+        // Double/FloatRange too)
+
+        /// <summary>
+        /// Create a LongRange. </summary>
+        public LongRange(string label, long minIn, bool minInclusive, long maxIn, bool maxInclusive)
+            : base(label)
+        {
+            this.min = minIn;
+            this.max = maxIn;
+            this.minInclusive = minInclusive;
+            this.maxInclusive = maxInclusive;
+
+            if (!minInclusive)
+            {
+                if (minIn != long.MaxValue)
+                {
+                    minIn++;
+                }
+                else
+                {
+                    FailNoMatch();
+                }
+            }
+
+            if (!maxInclusive)
+            {
+                if (maxIn != long.MinValue)
+                {
+                    maxIn--;
+                }
+                else
+                {
+                    FailNoMatch();
+                }
+            }
+
+            if (minIn > maxIn)
+            {
+                FailNoMatch();
+            }
+
+            this.minIncl = minIn;
+            this.maxIncl = maxIn;
+        }
+
+        /// <summary>
+        /// True if this range accepts the provided value. </summary>
+        public bool accept(long value)
+        {
+            return value >= minIncl && value <= maxIncl;
+        }
+
+        public override string ToString()
+        {
+            return "LongRange(" + minIncl + " to " + maxIncl + ")";
+        }
+
+        public override Filter GetFilter(Filter fastMatchFilter, ValueSource valueSource)
+        {
+            return new FilterAnonymousInnerClassHelper(this, fastMatchFilter, valueSource);
+        }
+
+        private class FilterAnonymousInnerClassHelper : Filter
+        {
+            private readonly LongRange outerInstance;
+
+            private Filter fastMatchFilter;
+            private ValueSource valueSource;
+
+            public FilterAnonymousInnerClassHelper(LongRange outerInstance, Filter fastMatchFilter, ValueSource valueSource)
+            {
+                this.outerInstance = outerInstance;
+                this.fastMatchFilter = fastMatchFilter;
+                this.valueSource = valueSource;
+            }
+
+
+            public override string ToString()
+            {
+                return "Filter(" + outerInstance.ToString() + ")";
+            }
+
+            public override DocIdSet GetDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+            {
+
+                // TODO: this is just like ValueSourceScorer,
+                // ValueSourceFilter (spatial),
+                // ValueSourceRangeFilter (solr); also,
+                // https://issues.apache.org/jira/browse/LUCENE-4251
+
+                FunctionValues values = valueSource.GetValues(new Dictionary<string,object>(), context);
+
+                int maxDoc = context.Reader.MaxDoc;
+
+                Bits fastMatchBits;
+                if (fastMatchFilter != null)
+                {
+                    DocIdSet dis = fastMatchFilter.GetDocIdSet(context, null);
+                    if (dis == null)
+                    {
+                        // No documents match
+                        return null;
+                    }
+                    fastMatchBits = dis.GetBits();
+                    if (fastMatchBits == null)
+                    {
+                        throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+                    }
+                }
+                else
+                {
+                    fastMatchBits = null;
+                }
+
+                return new DocIdSetAnonymousInnerClassHelper(this, acceptDocs, values, maxDoc, fastMatchBits);
+            }
+
+            private class DocIdSetAnonymousInnerClassHelper : DocIdSet
+            {
+                private readonly FilterAnonymousInnerClassHelper outerInstance;
+
+                private Bits acceptDocs;
+                private FunctionValues values;
+                private int maxDoc;
+                private Bits fastMatchBits;
+
+                public DocIdSetAnonymousInnerClassHelper(FilterAnonymousInnerClassHelper outerInstance, Bits acceptDocs, FunctionValues values, int maxDoc, Bits fastMatchBits)
+                {
+                    this.outerInstance = outerInstance;
+                    this.acceptDocs = acceptDocs;
+                    this.values = values;
+                    this.maxDoc = maxDoc;
+                    this.fastMatchBits = fastMatchBits;
+                }
+
+
+                public override Bits GetBits()
+                {
+                    return new BitsAnonymousInnerClassHelper(this);
+                }
+
+                private class BitsAnonymousInnerClassHelper : Bits
+                {
+                    private readonly DocIdSetAnonymousInnerClassHelper outerInstance;
+
+                    public BitsAnonymousInnerClassHelper(DocIdSetAnonymousInnerClassHelper outerInstance)
+                    {
+                        this.outerInstance = outerInstance;
+                    }
+
+                    public virtual bool Get(int docID)
+                    {
+                        if (outerInstance.acceptDocs != null && outerInstance.acceptDocs.Get(docID) == false)
+                        {
+                            return false;
+                        }
+                        if (outerInstance.fastMatchBits != null && outerInstance.fastMatchBits.Get(docID) == false)
+                        {
+                            return false;
+                        }
+                        return outerInstance.outerInstance.outerInstance.accept(outerInstance.values.LongVal(docID));
+                    }
+
+                    
+                    public virtual int Length()
+                    {
+                        return outerInstance.maxDoc;
+                    }
+                }
+
+                public override DocIdSetIterator GetIterator()
+                {
+                    throw new System.NotSupportedException("this filter can only be accessed via bits()");
+                }
+
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Range/LongRangeCounter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Range/LongRangeCounter.cs b/src/Lucene.Net.Facet/Range/LongRangeCounter.cs
new file mode 100644
index 0000000..98123c2
--- /dev/null
+++ b/src/Lucene.Net.Facet/Range/LongRangeCounter.cs
@@ -0,0 +1,385 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Facet.Range
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    /// Counts how many times each range was seen;
+    ///  per-hit it's just a binary search (<seealso cref="#add"/>)
+    ///  against the elementary intervals, and in the end we
+    ///  rollup back to the original ranges. 
+    /// </summary>
+
+    internal sealed class LongRangeCounter
+    {
+
+        internal readonly LongRangeNode root;
+        internal readonly long[] boundaries;
+        internal readonly int[] leafCounts;
+
+        // Used during rollup
+        private int leafUpto;
+        private int missingCount;
+
+        public LongRangeCounter(LongRange[] ranges)
+        {
+            // Maps all range inclusive endpoints to int flags; 1
+            // = start of interval, 2 = end of interval.  We need to
+            // track the start vs end case separately because if a
+            // given point is both, then it must be its own
+            // elementary interval:
+            IDictionary<long?, int?> endsMap = new Dictionary<long?, int?>();
+
+            endsMap[long.MinValue] = 1;
+            endsMap[long.MaxValue] = 2;
+
+            foreach (LongRange range in ranges)
+            {
+                int? cur;
+                if (!endsMap.TryGetValue(range.minIncl,out cur))
+                {
+                    endsMap[range.minIncl] = 1;
+                }
+                else
+                {
+                    endsMap[range.minIncl] = (int)cur | 1;
+                }
+                
+                if (!endsMap.TryGetValue(range.maxIncl,out cur))
+                {
+                    endsMap[range.maxIncl] = 2;
+                }
+                else
+                {
+                    endsMap[range.maxIncl] = (int)cur | 2;
+                }
+            }
+
+            var endsList = new List<long?>(endsMap.Keys);
+            endsList.Sort();
+
+            // Build elementaryIntervals (a 1D Venn diagram):
+            IList<InclusiveRange> elementaryIntervals = new List<InclusiveRange>();
+            int upto0 = 1;
+            long v = endsList[0].HasValue ? endsList[0].Value : 0;
+            long prev;
+            if (endsMap[v] == 3)
+            {
+                elementaryIntervals.Add(new InclusiveRange(v, v));
+                prev = v + 1;
+            }
+            else
+            {
+                prev = v;
+            }
+
+            while (upto0 < endsList.Count)
+            {
+                v = endsList[upto0].HasValue ?  endsList[upto0].Value : 0;
+                int flags = endsMap[v].HasValue ? endsMap[v].Value : 0;
+                //System.out.println("  v=" + v + " flags=" + flags);
+                if (flags == 3)
+                {
+                    // This point is both an end and a start; we need to
+                    // separate it:
+                    if (v > prev)
+                    {
+                        elementaryIntervals.Add(new InclusiveRange(prev, v - 1));
+                    }
+                    elementaryIntervals.Add(new InclusiveRange(v, v));
+                    prev = v + 1;
+                }
+                else if (flags == 1)
+                {
+                    // This point is only the start of an interval;
+                    // attach it to next interval:
+                    if (v > prev)
+                    {
+                        elementaryIntervals.Add(new InclusiveRange(prev, v - 1));
+                    }
+                    prev = v;
+                }
+                else
+                {
+                    Debug.Assert(flags == 2);
+                    // This point is only the end of an interval; attach
+                    // it to last interval:
+                    elementaryIntervals.Add(new InclusiveRange(prev, v));
+                    prev = v + 1;
+                }
+                //System.out.println("    ints=" + elementaryIntervals);
+                upto0++;
+            }
+
+            // Build binary tree on top of intervals:
+            root = Split(0, elementaryIntervals.Count, elementaryIntervals);
+
+            // Set outputs, so we know which range to output for
+            // each node in the tree:
+            for (int i = 0; i < ranges.Length; i++)
+            {
+                root.addOutputs(i, ranges[i]);
+            }
+
+            // Set boundaries (ends of each elementary interval):
+            boundaries = new long[elementaryIntervals.Count];
+            for (int i = 0; i < boundaries.Length; i++)
+            {
+                boundaries[i] = elementaryIntervals[i].end;
+            }
+
+            leafCounts = new int[boundaries.Length];
+
+            //System.out.println("ranges: " + Arrays.toString(ranges));
+            //System.out.println("intervals: " + elementaryIntervals);
+            //System.out.println("boundaries: " + Arrays.toString(boundaries));
+            //System.out.println("root:\n" + root);
+        }
+
+        public void add(long v)
+        {
+            //System.out.println("add v=" + v);
+
+            // NOTE: this works too, but it's ~6% slower on a simple
+            // test with a high-freq TermQuery w/ range faceting on
+            // wikimediumall:
+            /*
+            int index = Arrays.binarySearch(boundaries, v);
+            if (index < 0) {
+              index = -index-1;
+            }
+            leafCounts[index]++;
+            */
+
+            // Binary search to find matched elementary range; we
+            // are guaranteed to find a match because the last
+            // boundary is Long.MAX_VALUE:
+
+            int lo = 0;
+            int hi = boundaries.Length - 1;
+            while (true)
+            {
+                int mid = (int)((uint)(lo + hi) >> 1);
+                //System.out.println("  cycle lo=" + lo + " hi=" + hi + " mid=" + mid + " boundary=" + boundaries[mid] + " to " + boundaries[mid+1]);
+                if (v <= boundaries[mid])
+                {
+                    if (mid == 0)
+                    {
+                        leafCounts[0]++;
+                        return;
+                    }
+                    else
+                    {
+                        hi = mid - 1;
+                    }
+                }
+                else if (v > boundaries[mid + 1])
+                {
+                    lo = mid + 1;
+                }
+                else
+                {
+                    leafCounts[mid + 1]++;
+                    //System.out.println("  incr @ " + (mid+1) + "; now " + leafCounts[mid+1]);
+                    return;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Fills counts corresponding to the original input
+        ///  ranges, returning the missing count (how many hits
+        ///  didn't match any ranges). 
+        /// </summary>
+        public int fillCounts(int[] counts)
+        {
+            //System.out.println("  rollup");
+            missingCount = 0;
+            leafUpto = 0;
+            rollup(root, counts, false);
+            return missingCount;
+        }
+
+        private int rollup(LongRangeNode node, int[] counts, bool sawOutputs)
+        {
+            int count;
+            sawOutputs |= node.outputs != null;
+            if (node.left != null)
+            {
+                count = rollup(node.left, counts, sawOutputs);
+                count += rollup(node.right, counts, sawOutputs);
+            }
+            else
+            {
+                // Leaf:
+                count = leafCounts[leafUpto];
+                leafUpto++;
+                if (!sawOutputs)
+                {
+                    // This is a missing count (no output ranges were
+                    // seen "above" us):
+                    missingCount += count;
+                }
+            }
+            if (node.outputs != null)
+            {
+                foreach (int rangeIndex in node.outputs)
+                {
+                    counts[rangeIndex] += count;
+                }
+            }
+            //System.out.println("  rollup node=" + node.start + " to " + node.end + ": count=" + count);
+            return count;
+        }
+
+        private static LongRangeNode Split(int start, int end, IList<InclusiveRange> elementaryIntervals)
+        {
+            if (start == end - 1)
+            {
+                // leaf
+                InclusiveRange range = elementaryIntervals[start];
+                return new LongRangeNode(range.start, range.end, null, null, start);
+            }
+            else
+            {
+                int mid = (int)((uint)(start + end) >> 1);
+                LongRangeNode left = Split(start, mid, elementaryIntervals);
+                LongRangeNode right = Split(mid, end, elementaryIntervals);
+                return new LongRangeNode(left.start, right.end, left, right, -1);
+            }
+        }
+
+        private sealed class InclusiveRange
+        {
+            public readonly long start;
+            public readonly long end;
+
+            public InclusiveRange(long start, long end)
+            {
+                Debug.Assert(end >= start);
+                this.start = start;
+                this.end = end;
+            }
+
+            public override string ToString()
+            {
+                return start + " to " + end;
+            }
+        }
+
+        /// <summary>
+        /// Holds one node of the segment tree. </summary>
+        public sealed class LongRangeNode
+        {
+            internal readonly LongRangeNode left;
+            internal readonly LongRangeNode right;
+
+            // Our range, inclusive:
+            internal readonly long start;
+            internal readonly long end;
+
+            // If we are a leaf, the index into elementary ranges that
+            // we point to:
+            internal readonly int leafIndex;
+
+            // Which range indices to output when a query goes
+            // through this node:
+            internal IList<int?> outputs;
+
+            public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right, int leafIndex)
+            {
+                this.start = start;
+                this.end = end;
+                this.left = left;
+                this.right = right;
+                this.leafIndex = leafIndex;
+            }
+
+            public override string ToString()
+            {
+                StringBuilder sb = new StringBuilder();
+                ToString(sb, 0);
+                return sb.ToString();
+            }
+
+            internal static void indent(StringBuilder sb, int depth)
+            {
+                for (int i = 0; i < depth; i++)
+                {
+                    sb.Append("  ");
+                }
+            }
+
+            /// <summary>
+            /// Recursively assigns range outputs to each node. </summary>
+            internal void addOutputs(int index, LongRange range)
+		{
+		  if (start >= range.minIncl && end <= range.maxIncl)
+		  {
+			// Our range is fully included in the incoming
+			// range; add to our output list:
+			if (outputs == null)
+			{
+			  outputs = new List<int?>();
+			}
+			outputs.Add(index);
+		  }
+		  else if (left != null)
+		  {
+			Debug.Assert(right != null);
+			// Recurse:
+			left.addOutputs(index, range);
+			right.addOutputs(index, range);
+		  }
+		}
+
+            internal void ToString(StringBuilder sb, int depth)
+            {
+                indent(sb, depth);
+                if (left == null)
+                {
+                    Debug.Assert(right == null);
+                    sb.Append("leaf: " + start + " to " + end);
+                }
+                else
+                {
+                    sb.Append("node: " + start + " to " + end);
+                }
+                if (outputs != null)
+                {
+                    sb.Append(" outputs=");
+                    sb.Append(outputs);
+                }
+                sb.Append('\n');
+
+                if (left != null)
+                {
+                    Debug.Assert(right != null);
+                    left.ToString(sb, depth + 1);
+                    right.ToString(sb, depth + 1);
+                }
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Range/LongRangeFacetCounts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Range/LongRangeFacetCounts.cs b/src/Lucene.Net.Facet/Range/LongRangeFacetCounts.cs
new file mode 100644
index 0000000..8435bbe
--- /dev/null
+++ b/src/Lucene.Net.Facet/Range/LongRangeFacetCounts.cs
@@ -0,0 +1,143 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Range
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using MatchingDocs = FacetsCollector.MatchingDocs;
+    using FunctionValues = Lucene.Net.Queries.Function.FunctionValues;
+    using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+    using LongFieldSource = Lucene.Net.Queries.Function.ValueSources.LongFieldSource;
+    using DocIdSet = Lucene.Net.Search.DocIdSet;
+    using Filter = Lucene.Net.Search.Filter;
+    using Bits = Lucene.Net.Util.Bits;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+
+    /// <summary>
+    /// <seealso cref="Facets"/> implementation that computes counts for
+    ///  dynamic long ranges from a provided <seealso cref="ValueSource"/>,
+    ///  using <seealso cref="FunctionValues#longVal"/>.  Use
+    ///  this for dimensions that change in real-time (e.g. a
+    ///  relative time based dimension like "Past day", "Past 2
+    ///  days", etc.) or that change for each request (e.g. 
+    ///  distance from the user's location, "< 1 km", "< 2 km",
+    ///  etc.).
+    /// 
+    ///  @lucene.experimental 
+    /// </summary>
+    public class LongRangeFacetCounts : RangeFacetCounts
+    {
+
+        /// <summary>
+        /// Create {@code LongRangeFacetCounts}, using {@link
+        ///  LongFieldSource} from the specified field. 
+        /// </summary>
+        public LongRangeFacetCounts(string field, FacetsCollector hits, params LongRange[] ranges)
+            : this(field, new LongFieldSource(field), hits, ranges)
+        {
+        }
+
+        /// <summary>
+        /// Create {@code RangeFacetCounts}, using the provided
+        ///  <seealso cref="ValueSource"/>. 
+        /// </summary>
+        public LongRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, params LongRange[] ranges)
+            : this(field, valueSource, hits, null, ranges)
+        {
+        }
+
+        /// <summary>
+        /// Create {@code RangeFacetCounts}, using the provided
+        ///  <seealso cref="ValueSource"/>, and using the provided Filter as
+        ///  a fastmatch: only documents passing the filter are
+        ///  checked for the matching ranges.  The filter must be
+        ///  random access (implement <seealso cref="DocIdSet#bits"/>). 
+        /// </summary>
+        public LongRangeFacetCounts(string field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, params LongRange[] ranges)
+            : base(field, ranges, fastMatchFilter)
+        {
+            Count(valueSource, hits.GetMatchingDocs);
+        }
+
+        private void Count(ValueSource valueSource, IList<MatchingDocs> matchingDocs)
+        {
+
+            LongRange[] ranges = (LongRange[])this.Ranges;
+
+            LongRangeCounter counter = new LongRangeCounter(ranges);
+
+            int missingCount = 0;
+            foreach (MatchingDocs hits in matchingDocs)
+            {
+                FunctionValues fv = valueSource.GetValues(new Dictionary<string, object>(), hits.context);
+
+                TotCount += hits.totalHits;
+                Bits bits;
+                if (FastMatchFilter != null)
+                {
+                    DocIdSet dis = FastMatchFilter.GetDocIdSet(hits.context, null);
+                    if (dis == null)
+                    {
+                        // No documents match
+                        continue;
+                    }
+                    bits = dis.GetBits();
+                    if (bits == null)
+                    {
+                        throw new System.ArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+                    }
+                }
+                else
+                {
+                    bits = null;
+                }
+
+                DocIdSetIterator docs = hits.bits.GetIterator();
+                int doc;
+                while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                {
+                    if (bits != null && bits.Get(doc) == false)
+                    {
+                        doc++;
+                        continue;
+                    }
+                    // Skip missing docs:
+                    if (fv.Exists(doc))
+                    {
+                        counter.add(fv.LongVal(doc));
+                    }
+                    else
+                    {
+                        missingCount++;
+                    }
+                }
+            }
+
+            int x = counter.fillCounts(Counts);
+
+            missingCount += x;
+
+            //System.out.println("totCount " + totCount + " missingCount " + counter.missingCount);
+            TotCount -= missingCount;
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Range/Range.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Range/Range.cs b/src/Lucene.Net.Facet/Range/Range.cs
new file mode 100644
index 0000000..548b915
--- /dev/null
+++ b/src/Lucene.Net.Facet/Range/Range.cs
@@ -0,0 +1,90 @@
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.Range
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using ValueSource = Lucene.Net.Queries.Function.ValueSource;
+    using Filter = Lucene.Net.Search.Filter;
+    using FilteredQuery = Lucene.Net.Search.FilteredQuery; // javadocs
+    using Lucene.Net.Search; // javadocs
+
+    /// <summary>
+    /// Base class for a single labeled range.
+    /// 
+    ///  @lucene.experimental 
+    /// </summary>
+    public abstract class Range
+    {
+
+        /// <summary>
+        /// Label that identifies this range. </summary>
+        public readonly string Label;
+
+        /// <summary>
+        /// Sole constructor. </summary>
+        protected internal Range(string label)
+        {
+            if (label == null)
+            {
+                throw new System.NullReferenceException("label cannot be null");
+            }
+            this.Label = label;
+        }
+
+        /// <summary>
+        /// Returns a new <seealso cref="Filter"/> accepting only documents
+        ///  in this range.  This filter is not general-purpose;
+        ///  you should either use it with <seealso cref="DrillSideways"/> by
+        ///  adding it to <seealso cref="DrillDownQuery#add"/>, or pass it to
+        ///  <seealso cref="FilteredQuery"/> using its {@link
+        ///  FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}.  If the
+        ///  <seealso cref="ValueSource"/> is static, e.g. an indexed numeric
+        ///  field, then it may be more efficient to use {@link
+        ///  NumericRangeFilter}.  The provided fastMatchFilter,
+        ///  if non-null, will first be consulted, and only if
+        ///  that is set for each document will the range then be
+        ///  checked. 
+        /// </summary>
+        public abstract Filter GetFilter(Filter fastMatchFilter, ValueSource valueSource);
+
+        /// <summary>
+        /// Returns a new <seealso cref="Filter"/> accepting only documents
+        ///  in this range.  This filter is not general-purpose;
+        ///  you should either use it with <seealso cref="DrillSideways"/> by
+        ///  adding it to <seealso cref="DrillDownQuery#add"/>, or pass it to
+        ///  <seealso cref="FilteredQuery"/> using its {@link
+        ///  FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}.  If the
+        ///  <seealso cref="ValueSource"/> is static, e.g. an indexed numeric
+        ///  field, then it may be more efficient to use <seealso cref="NumericRangeFilter"/>. 
+        /// </summary>
+        public virtual Filter GetFilter(ValueSource valueSource)
+        {
+            return GetFilter(null, valueSource);
+        }
+
+        /// <summary>
+        /// Invoke this for a useless range. </summary>
+        protected internal virtual void FailNoMatch()
+        {
+            throw new System.ArgumentException("range \"" + Label + "\" matches nothing");
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Range/RangeFacetCounts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Range/RangeFacetCounts.cs b/src/Lucene.Net.Facet/Range/RangeFacetCounts.cs
new file mode 100644
index 0000000..29e02fb
--- /dev/null
+++ b/src/Lucene.Net.Facet/Range/RangeFacetCounts.cs
@@ -0,0 +1,99 @@
+using System.Collections.Generic;
+using Lucene.Net.Facet;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Range
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using Filter = Lucene.Net.Search.Filter;
+
+    /// <summary>
+    /// Base class for range faceting.
+    /// 
+    ///  @lucene.experimental 
+    /// </summary>
+    public abstract class RangeFacetCounts : Facets
+    {
+        /// <summary>
+        /// Ranges passed to constructor. </summary>
+        protected internal readonly Range[] Ranges;
+
+        /// <summary>
+        /// Counts, initialized in by subclass. </summary>
+        protected internal readonly int[] Counts;
+
+        /// <summary>
+        /// Optional: if specified, we first test this Filter to
+        ///  see whether the document should be checked for
+        ///  matching ranges.  If this is null, all documents are
+        ///  checked. 
+        /// </summary>
+        protected internal readonly Filter FastMatchFilter;
+
+        /// <summary>
+        /// Our field name. </summary>
+        protected internal readonly string Field;
+
+        /// <summary>
+        /// Total number of hits. </summary>
+        protected internal int TotCount;
+
+        /// <summary>
+        /// Create {@code RangeFacetCounts} </summary>
+        protected internal RangeFacetCounts(string field, Range[] ranges, Filter fastMatchFilter)
+        {
+            this.Field = field;
+            this.Ranges = ranges;
+            this.FastMatchFilter = fastMatchFilter;
+            Counts = new int[ranges.Length];
+        }
+
+        public override FacetResult GetTopChildren(int topN, string dim, params string[] path)
+        {
+            if (dim.Equals(Field) == false)
+            {
+                throw new System.ArgumentException("invalid dim \"" + dim + "\"; should be \"" + Field + "\"");
+            }
+            if (path.Length != 0)
+            {
+                throw new System.ArgumentException("path.length should be 0");
+            }
+            LabelAndValue[] labelValues = new LabelAndValue[Counts.Length];
+            for (int i = 0; i < Counts.Length; i++)
+            {
+                labelValues[i] = new LabelAndValue(Ranges[i].Label, Counts[i]);
+            }
+            return new FacetResult(dim, path, TotCount, labelValues, labelValues.Length);
+        }
+
+        public override float GetSpecificValue(string dim, params string[] path)
+        {
+            // TODO: should we impl this?
+            throw new System.NotSupportedException();
+        }
+
+        public override IList<FacetResult> GetAllDims(int topN)
+        {
+            return new[] { GetTopChildren(topN, null) };
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs b/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
new file mode 100644
index 0000000..8113d80
--- /dev/null
+++ b/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
@@ -0,0 +1,158 @@
+using System.Collections.Generic;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.SortedSet
+{
+    /*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+    /// <summary>
+    /// Default implementation of <seealso cref="SortedSetDocValuesFacetCounts"/>
+    /// </summary>
+    public class DefaultSortedSetDocValuesReaderState : SortedSetDocValuesReaderState
+    {
+
+        private readonly string field;
+        private readonly AtomicReader topReader;
+        private readonly int valueCount;
+
+        /// <summary>
+        /// <seealso cref="IndexReader"/> passed to the constructor. </summary>
+        public readonly IndexReader origReader;
+
+        private readonly IDictionary<string, OrdRange> prefixToOrdRange = new Dictionary<string, OrdRange>();
+
+        /// <summary>
+        /// Creates this, pulling doc values from the specified
+        ///  field. 
+        /// </summary>
+        public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
+        {
+            this.field = field;
+            this.origReader = reader;
+
+            // We need this to create thread-safe MultiSortedSetDV
+            // per collector:
+            topReader = SlowCompositeReaderWrapper.Wrap(reader);
+            SortedSetDocValues dv = topReader.GetSortedSetDocValues(field);
+            if (dv == null)
+            {
+                throw new System.ArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
+            }
+            if (dv.ValueCount > int.MaxValue)
+            {
+                throw new System.ArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.ValueCount);
+            }
+            valueCount = (int)dv.ValueCount;
+
+            // TODO: we can make this more efficient if eg we can be
+            // "involved" when OrdinalMap is being created?  Ie see
+            // each term/ord it's assigning as it goes...
+            string lastDim = null;
+            int startOrd = -1;
+
+            // TODO: this approach can work for full hierarchy?;
+            // TaxoReader can't do this since ords are not in
+            // "sorted order" ... but we should generalize this to
+            // support arbitrary hierarchy:
+            for (int ord = 0; ord < valueCount; ord++)
+            {
+                BytesRef term = new BytesRef();
+                dv.LookupOrd(ord, term);
+                string[] components = FacetsConfig.StringToPath(term.Utf8ToString());
+                if (components.Length != 2)
+                {
+                    throw new System.ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + term.Utf8ToString());
+                }
+                if (!components[0].Equals(lastDim))
+                {
+                    if (lastDim != null)
+                    {
+                        prefixToOrdRange[lastDim] = new OrdRange(startOrd, ord - 1);
+                    }
+                    startOrd = ord;
+                    lastDim = components[0];
+                }
+            }
+
+            if (lastDim != null)
+            {
+                prefixToOrdRange[lastDim] = new OrdRange(startOrd, valueCount - 1);
+            }
+        }
+
+        /// <summary>
+        /// Return top-level doc values. </summary>
+        public override SortedSetDocValues DocValues
+        {
+            get
+            {
+                return topReader.GetSortedSetDocValues(field);
+            }
+        }
+
+        /// <summary>
+        /// Returns mapping from prefix to <seealso cref="SortedSetDocValuesReaderState.OrdRange"/>. </summary>
+        public override IDictionary<string, OrdRange> PrefixToOrdRange
+        {
+            get
+            {
+                return prefixToOrdRange;
+            }
+        }
+
+        /// <summary>
+        /// Returns the <seealso cref="SortedSetDocValuesReaderState.OrdRange"/> for this dimension. </summary>
+        public override OrdRange GetOrdRange(string dim)
+        {
+            return prefixToOrdRange[dim];
+        }
+
+        /// <summary>
+        /// Indexed field we are reading. </summary>
+        public override string Field
+        {
+            get
+            {
+                return field;
+            }
+        }
+
+        public override IndexReader OrigReader
+        {
+            get
+            {
+                return origReader;
+            }
+        }
+
+        /// <summary>
+        /// Number of unique labels. </summary>
+        public override int Size
+        {
+            get
+            {
+                return valueCount;
+            }
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetCounts.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetCounts.cs b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetCounts.cs
new file mode 100644
index 0000000..8ccb190
--- /dev/null
+++ b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetCounts.cs
@@ -0,0 +1,350 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Lucene.Net.Facet;
+
+namespace Lucene.Net.Facet.SortedSet
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using MatchingDocs = FacetsCollector.MatchingDocs;
+    using OrdRange = Lucene.Net.Facet.SortedSet.SortedSetDocValuesReaderState.OrdRange;
+    using AtomicReader = Lucene.Net.Index.AtomicReader;
+    using IndexReader = Lucene.Net.Index.IndexReader;
+    using MultiDocValues = Lucene.Net.Index.MultiDocValues;
+    using MultiSortedSetDocValues = Lucene.Net.Index.MultiDocValues.MultiSortedSetDocValues;
+    using ReaderUtil = Lucene.Net.Index.ReaderUtil;
+    using SortedSetDocValues = Lucene.Net.Index.SortedSetDocValues;
+    using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+    using LongValues = Lucene.Net.Util.LongValues;
+
+    /// <summary>
+    /// Compute facets counts from previously
+    ///  indexed <seealso cref="SortedSetDocValuesFacetField"/>,
+    ///  without require a separate taxonomy index.  Faceting is
+    ///  a bit slower (~25%), and there is added cost on every
+    ///  <seealso cref="IndexReader"/> open to create a new {@link
+    ///  SortedSetDocValuesReaderState}.  Furthermore, this does
+    ///  not support hierarchical facets; only flat (dimension +
+    ///  label) facets, but it uses quite a bit less RAM to do
+    ///  so.
+    /// 
+    ///  <para><b>NOTE</b>: this class should be instantiated and
+    ///  then used from a single thread, because it holds a
+    ///  thread-private instance of <seealso cref="SortedSetDocValues"/>.
+    /// 
+    /// </para>
+    /// <para><b>NOTE:</b>: tie-break is by unicode sort order
+    /// 
+    /// @lucene.experimental 
+    /// </para>
+    /// </summary>
+    public class SortedSetDocValuesFacetCounts : Facets
+    {
+
+        internal readonly SortedSetDocValuesReaderState state;
+        internal readonly SortedSetDocValues dv;
+        internal readonly string field;
+        internal readonly int[] counts;
+
+        /// <summary>
+        /// Sparse faceting: returns any dimension that had any
+        ///  hits, topCount labels per dimension. 
+        /// </summary>
+        public SortedSetDocValuesFacetCounts(SortedSetDocValuesReaderState state, FacetsCollector hits)
+        {
+            this.state = state;
+            this.field = state.Field;
+            dv = state.DocValues;
+            counts = new int[state.Size];
+            //System.out.println("field=" + field);
+            Count(hits.GetMatchingDocs);
+        }
+
+        public override FacetResult GetTopChildren(int topN, string dim, params string[] path)
+        {
+            if (topN <= 0)
+            {
+                throw new System.ArgumentException("topN must be > 0 (got: " + topN + ")");
+            }
+            if (path.Length > 0)
+            {
+                throw new System.ArgumentException("path should be 0 length");
+            }
+            OrdRange ordRange = state.GetOrdRange(dim);
+            if (ordRange == null)
+            {
+                throw new System.ArgumentException("dimension \"" + dim + "\" was not indexed");
+            }
+            return GetDim(dim, ordRange, topN);
+        }
+
+        private FacetResult GetDim(string dim, OrdRange ordRange, int topN)
+        {
+
+            TopOrdAndIntQueue q = null;
+
+            int bottomCount = 0;
+
+            int dimCount = 0;
+            int childCount = 0;
+
+            TopOrdAndIntQueue.OrdAndValue reuse = null;
+            //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
+            for (int ord = ordRange.Start; ord <= ordRange.End; ord++)
+            {
+                //System.out.println("  ord=" + ord + " count=" + counts[ord]);
+                if (counts[ord] > 0)
+                {
+                    dimCount += counts[ord];
+                    childCount++;
+                    if (counts[ord] > bottomCount)
+                    {
+                        if (reuse == null)
+                        {
+                            reuse = new TopOrdAndIntQueue.OrdAndValue();
+                        }
+                        reuse.Ord = ord;
+                        reuse.Value = counts[ord];
+                        if (q == null)
+                        {
+                            // Lazy init, so we don't create this for the
+                            // sparse case unnecessarily
+                            q = new TopOrdAndIntQueue(topN);
+                        }
+                        reuse = q.InsertWithOverflow(reuse);
+                        if (q.Size() == topN)
+                        {
+                            bottomCount = q.Top().Value;
+                        }
+                    }
+                }
+            }
+
+            if (q == null)
+            {
+                return null;
+            }
+
+            LabelAndValue[] labelValues = new LabelAndValue[q.Size()];
+            for (int i = labelValues.Length - 1; i >= 0; i--)
+            {
+                TopOrdAndIntQueue.OrdAndValue ordAndValue = q.Pop();
+                var term = new BytesRef();
+                dv.LookupOrd(ordAndValue.Ord, term);
+                string[] parts = FacetsConfig.StringToPath(term.Utf8ToString());
+                labelValues[i] = new LabelAndValue(parts[1], ordAndValue.Value);
+            }
+
+            return new FacetResult(dim, new string[0], dimCount, labelValues, childCount);
+        }
+
+        /// <summary>
+        /// Does all the "real work" of tallying up the counts. </summary>
+        private void Count(IList<FacetsCollector.MatchingDocs> matchingDocs)
+        {
+            //System.out.println("ssdv count");
+
+            MultiDocValues.OrdinalMap ordinalMap;
+
+            // TODO: is this right?  really, we need a way to
+            // verify that this ordinalMap "matches" the leaves in
+            // matchingDocs...
+            if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1)
+            {
+                ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping;
+            }
+            else
+            {
+                ordinalMap = null;
+            }
+
+            IndexReader origReader = state.OrigReader;
+
+            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
+            {
+
+                var reader = hits.context.AtomicReader;
+                //System.out.println("  reader=" + reader);
+                // LUCENE-5090: make sure the provided reader context "matches"
+                // the top-level reader passed to the
+                // SortedSetDocValuesReaderState, else cryptic
+                // AIOOBE can happen:
+                if (!Equals(ReaderUtil.GetTopLevelContext(hits.context).Reader, origReader))
+                {
+                    throw new ThreadStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
+                }
+
+                SortedSetDocValues segValues = reader.GetSortedSetDocValues(field);
+                if (segValues == null)
+                {
+                    continue;
+                }
+
+                DocIdSetIterator docs = hits.bits.GetIterator();
+
+                // TODO: yet another option is to count all segs
+                // first, only in seg-ord space, and then do a
+                // merge-sort-PQ in the end to only "resolve to
+                // global" those seg ords that can compete, if we know
+                // we just want top K?  ie, this is the same algo
+                // that'd be used for merging facets across shards
+                // (distributed faceting).  but this has much higher
+                // temp ram req'ts (sum of number of ords across all
+                // segs)
+                if (ordinalMap != null)
+                {
+                    int segOrd = hits.context.Ord;
+
+                    int numSegOrds = (int)segValues.ValueCount;
+
+                    if (hits.totalHits < numSegOrds / 10)
+                    {
+                        //System.out.println("    remap as-we-go");
+                        // Remap every ord to global ord as we iterate:
+                        int doc;
+                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                        {
+                            //System.out.println("    doc=" + doc);
+                            segValues.Document = doc;
+                            int term = (int)segValues.NextOrd();
+                            while (term != SortedSetDocValues.NO_MORE_ORDS)
+                            {
+                                //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
+                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++;
+                                term = (int)segValues.NextOrd();
+                            }
+                        }
+                    }
+                    else
+                    {
+                        //System.out.println("    count in seg ord first");
+
+                        // First count in seg-ord space:
+                        int[] segCounts = new int[numSegOrds];
+                        int doc;
+                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                        {
+                            //System.out.println("    doc=" + doc);
+                            segValues.Document = doc;
+                            int term = (int)segValues.NextOrd();
+                            while (term != SortedSetDocValues.NO_MORE_ORDS)
+                            {
+                                //System.out.println("      ord=" + term);
+                                segCounts[term]++;
+                                term = (int)segValues.NextOrd();
+                            }
+                        }
+
+                        // Then, migrate to global ords:
+                        for (int ord = 0; ord < numSegOrds; ord++)
+                        {
+                            int count = segCounts[ord];
+                            if (count != 0)
+                            {
+                                //System.out.println("    migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
+                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    // No ord mapping (e.g., single segment index):
+                    // just aggregate directly into counts:
+                    int doc;
+                    while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+                    {
+                        segValues.Document = doc;
+                        int term = (int)segValues.NextOrd();
+                        while (term != SortedSetDocValues.NO_MORE_ORDS)
+                        {
+                            counts[term]++;
+                            term = (int)segValues.NextOrd();
+                        }
+                    }
+                }
+            }
+        }
+
+        public override float GetSpecificValue(string dim, params string[] path)
+        {
+            if (path.Length != 1)
+            {
+                throw new System.ArgumentException("path must be length=1");
+            }
+            int ord = (int)dv.LookupTerm(new BytesRef(FacetsConfig.PathToString(dim, path)));
+            if (ord < 0)
+            {
+                return -1;
+            }
+
+            return counts[ord];
+        }
+
+        public override IList<FacetResult> GetAllDims(int topN)
+        {
+
+            IList<FacetResult> results = new List<FacetResult>();
+            foreach (KeyValuePair<string, OrdRange> ent in state.PrefixToOrdRange)
+            {
+                FacetResult fr = GetDim(ent.Key, ent.Value, topN);
+                if (fr != null)
+                {
+                    results.Add(fr);
+                }
+            }
+
+            var resultArray = results.ToArray();
+            // Sort by highest count:
+            Array.Sort(resultArray, new ComparatorAnonymousInnerClassHelper(this));
+            return resultArray;
+        }
+
+        private class ComparatorAnonymousInnerClassHelper : IComparer<FacetResult>
+        {
+            private readonly SortedSetDocValuesFacetCounts outerInstance;
+
+            public ComparatorAnonymousInnerClassHelper(SortedSetDocValuesFacetCounts outerInstance)
+            {
+                this.outerInstance = outerInstance;
+            }
+
+            public virtual int Compare(FacetResult a, FacetResult b)
+            {
+                if ((int)a.Value > (int)b.Value)
+                {
+                    return -1;
+                }
+                else if ((int)b.Value > (int)a.Value)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return a.Dim.CompareTo(b.Dim);
+                }
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetField.cs b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetField.cs
new file mode 100644
index 0000000..c860d56
--- /dev/null
+++ b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesFacetField.cs
@@ -0,0 +1,65 @@
+namespace Lucene.Net.Facet.SortedSet
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using Field = Lucene.Net.Documents.Field;
+    using FieldType = Lucene.Net.Documents.FieldType;
+
+    /// <summary>
+    /// Add an instance of this to your Document for every facet
+    ///  label to be indexed via SortedSetDocValues. 
+    /// </summary>
+    public class SortedSetDocValuesFacetField : Field
+    {
+
+        /// <summary>
+        /// Indexed <seealso cref="FieldType"/>. </summary>
+        public static readonly FieldType TYPE = new FieldType();
+        static SortedSetDocValuesFacetField()
+        {
+            TYPE.Indexed = true;
+            TYPE.Freeze();
+        }
+
+        /// <summary>
+        /// Dimension. </summary>
+        public readonly string Dim;
+
+        /// <summary>
+        /// Label. </summary>
+        public readonly string Label;
+
+        /// <summary>
+        /// Sole constructor. </summary>
+        public SortedSetDocValuesFacetField(string dim, string label)
+            : base("dummy", TYPE)
+        {
+            FacetField.VerifyLabel(label);
+            FacetField.VerifyLabel(dim);
+            this.Dim = dim;
+            this.Label = label;
+        }
+
+        public override string ToString()
+        {
+            return "SortedSetDocValuesFacetField(dim=" + Dim + " label=" + Label + ")";
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesReaderState.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesReaderState.cs b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesReaderState.cs
new file mode 100644
index 0000000..ef7e0fe
--- /dev/null
+++ b/src/Lucene.Net.Facet/SortedSet/SortedSetDocValuesReaderState.cs
@@ -0,0 +1,103 @@
+using System.Collections.Generic;
+
+namespace Lucene.Net.Facet.SortedSet
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using IndexReader = Lucene.Net.Index.IndexReader;
+    using SortedSetDocValues = Lucene.Net.Index.SortedSetDocValues;
+
+    /// <summary>
+    /// Wraps a <seealso cref="IndexReader"/> and resolves ords
+    ///  using existing <seealso cref="SortedSetDocValues"/> APIs without a
+    ///  separate taxonomy index.  This only supports flat facets
+    ///  (dimension + label), and it makes faceting a bit
+    ///  slower, adds some cost at reopen time, but avoids
+    ///  managing the separate taxonomy index.  It also requires
+    ///  less RAM than the taxonomy index, as it manages the flat
+    ///  (2-level) hierarchy more efficiently.  In addition, the
+    ///  tie-break during faceting is now meaningful (in label
+    ///  sorted order).
+    /// 
+    ///  <para><b>NOTE</b>: creating an instance of this class is
+    ///  somewhat costly, as it computes per-segment ordinal maps,
+    ///  so you should create it once and re-use that one instance
+    ///  for a given <seealso cref="IndexReader"/>. 
+    /// </para>
+    /// </summary>
+
+    public abstract class SortedSetDocValuesReaderState
+    {
+
+        /// <summary>
+        /// Holds start/end range of ords, which maps to one
+        ///  dimension (someday we may generalize it to map to
+        ///  hierarchies within one dimension). 
+        /// </summary>
+        public sealed class OrdRange
+        {
+            /// <summary>
+            /// Start of range, inclusive: </summary>
+            public readonly int Start;
+            /// <summary>
+            /// End of range, inclusive: </summary>
+            public readonly int End;
+
+            /// <summary>
+            /// Start and end are inclusive. </summary>
+            public OrdRange(int start, int end)
+            {
+                this.Start = start;
+                this.End = end;
+            }
+        }
+
+        /// <summary>
+        /// Sole constructor. </summary>
+        protected internal SortedSetDocValuesReaderState()
+        {
+        }
+
+        /// <summary>
+        /// Return top-level doc values. </summary>
+        public abstract SortedSetDocValues DocValues { get; }
+
+        /// <summary>
+        /// Indexed field we are reading. </summary>
+        public abstract string Field { get; }
+
+        /// <summary>
+        /// Returns the <seealso cref="OrdRange"/> for this dimension. </summary>
+        public abstract OrdRange GetOrdRange(string dim);
+
+        /// <summary>
+        /// Returns mapping from prefix to <seealso cref="OrdRange"/>. </summary>
+        public abstract IDictionary<string, OrdRange> PrefixToOrdRange { get; }
+
+        /// <summary>
+        /// Returns top-level index reader. </summary>
+        public abstract IndexReader OrigReader { get; }
+
+        /// <summary>
+        /// Number of unique labels. </summary>
+        public abstract int Size { get; }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/AssociationFacetField.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/AssociationFacetField.cs b/src/Lucene.Net.Facet/Taxonomy/AssociationFacetField.cs
new file mode 100644
index 0000000..6113eef
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/AssociationFacetField.cs
@@ -0,0 +1,91 @@
+using Lucene.Net.Facet;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    using Document = Lucene.Net.Documents.Document; // javadocs
+    using Field = Lucene.Net.Documents.Field;
+    using FieldType = Lucene.Net.Documents.FieldType;
+    using BytesRef = Lucene.Net.Util.BytesRef;
+
+    /// <summary>
+    /// Add an instance of this to your <seealso cref="Document"/> to add
+    ///  a facet label associated with an arbitrary byte[].
+    ///  This will require a custom <seealso cref="Facets"/>
+    ///  implementation at search time; see {@link
+    ///  IntAssociationFacetField} and {@link
+    ///  FloatAssociationFacetField} to use existing {@link
+    ///  Facets} implementations.
+    /// 
+    ///  @lucene.experimental 
+    /// </summary>
+    public class AssociationFacetField : Field
+    {
+
+        /// <summary>
+        /// Indexed <seealso cref="FieldType"/>. </summary>
+        public static readonly FieldType TYPE = new FieldType();
+        static AssociationFacetField()
+        {
+            TYPE.Indexed = true;
+            TYPE.Freeze();
+        }
+
+        /// <summary>
+        /// Dimension for this field. </summary>
+        public readonly string dim;
+
+        /// <summary>
+        /// Facet path for this field. </summary>
+        public readonly string[] path;
+
+        /// <summary>
+        /// Associated value. </summary>
+        public readonly BytesRef assoc;
+
+        /// <summary>
+        /// Creates this from {@code dim} and {@code path} and an
+        ///  association 
+        /// </summary>
+        public AssociationFacetField(BytesRef assoc, string dim, params string[] path)
+            : base("dummy", TYPE)
+        {
+            FacetField.VerifyLabel(dim);
+            foreach (string label in path)
+            {
+                FacetField.VerifyLabel(label);
+            }
+            this.dim = dim;
+            this.assoc = assoc;
+            if (path.Length == 0)
+            {
+                throw new System.ArgumentException("path must have at least one element");
+            }
+            this.path = path;
+        }
+
+        public override string ToString()
+        {
+            return "AssociationFacetField(dim=" + dim + " path=" + Arrays.ToString(path) + " bytes=" + assoc + ")";
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/CachedOrdinalsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/CachedOrdinalsReader.cs b/src/Lucene.Net.Facet/Taxonomy/CachedOrdinalsReader.cs
new file mode 100644
index 0000000..0471f65
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/CachedOrdinalsReader.cs
@@ -0,0 +1,208 @@
+using System;
+using System.Collections.Generic;
+using System.Threading;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    using DocValuesFormat = Lucene.Net.Codecs.DocValuesFormat;
+    using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
+    using BinaryDocValues = Lucene.Net.Index.BinaryDocValues;
+    using Accountable = Lucene.Net.Util.Accountable;
+    using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+    using IntsRef = Lucene.Net.Util.IntsRef;
+    using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator;
+
+    /// <summary>
+    /// A per-segment cache of documents' facet ordinals. Every
+    /// <seealso cref="CachedOrds"/> holds the ordinals in a raw {@code
+    /// int[]}, and therefore consumes as much RAM as the total
+    /// number of ordinals found in the segment, but saves the
+    /// CPU cost of decoding ordinals during facet counting.
+    /// 
+    /// <para>
+    /// <b>NOTE:</b> every <seealso cref="CachedOrds"/> is limited to 2.1B
+    /// total ordinals. If that is a limitation for you then
+    /// consider limiting the segment size to fewer documents, or
+    /// use an alternative cache which pages through the category
+    /// ordinals.
+    /// 
+    /// </para>
+    /// <para>
+    /// <b>NOTE:</b> when using this cache, it is advised to use
+    /// a <seealso cref="DocValuesFormat"/> that does not cache the data in
+    /// memory, at least for the category lists fields, or
+    /// otherwise you'll be doing double-caching.
+    /// 
+    /// </para>
+    /// <para>
+    /// <b>NOTE:</b> create one instance of this and re-use it
+    /// for all facet implementations (the cache is per-instance,
+    /// not static).
+    /// </para>
+    /// </summary>
+    public class CachedOrdinalsReader : OrdinalsReader, Accountable
+    {
+
+        private readonly OrdinalsReader source;
+
+        private readonly IDictionary<object, CachedOrds> ordsCache = new WeakDictionary<object, CachedOrds>();
+
+        /// <summary>
+        /// Sole constructor. </summary>
+        public CachedOrdinalsReader(OrdinalsReader source)
+        {
+            this.source = source;
+        }
+
+        private CachedOrds GetCachedOrds(AtomicReaderContext context)
+        {
+            lock (this)
+            {
+                object cacheKey = context.Reader.CoreCacheKey;
+                CachedOrds ords = ordsCache[cacheKey];
+                if (ords == null)
+                {
+                    ords = new CachedOrds(source.GetReader(context), context.Reader.MaxDoc);
+                    ordsCache[cacheKey] = ords;
+                }
+
+                return ords;
+            }
+        }
+
+        public override string IndexFieldName
+        {
+            get
+            {
+                return source.IndexFieldName;
+            }
+        }
+
+        public override OrdinalsSegmentReader GetReader(AtomicReaderContext context)
+        {
+            CachedOrds cachedOrds = GetCachedOrds(context);
+            return new OrdinalsSegmentReaderAnonymousInnerClassHelper(this, cachedOrds);
+        }
+
+        private class OrdinalsSegmentReaderAnonymousInnerClassHelper : OrdinalsSegmentReader
+        {
+            private readonly CachedOrdinalsReader outerInstance;
+
+            private Lucene.Net.Facet.Taxonomy.CachedOrdinalsReader.CachedOrds cachedOrds;
+
+            public OrdinalsSegmentReaderAnonymousInnerClassHelper(CachedOrdinalsReader outerInstance, Lucene.Net.Facet.Taxonomy.CachedOrdinalsReader.CachedOrds cachedOrds)
+            {
+                this.outerInstance = outerInstance;
+                this.cachedOrds = cachedOrds;
+            }
+
+            public override void Get(int docID, IntsRef ordinals)
+            {
+                ordinals.Ints = cachedOrds.ordinals;
+                ordinals.Offset = cachedOrds.offsets[docID];
+                ordinals.Length = cachedOrds.offsets[docID + 1] - ordinals.Offset;
+            }
+        }
+
+        /// <summary>
+        /// Holds the cached ordinals in two parallel {@code int[]} arrays. </summary>
+        public sealed class CachedOrds : Accountable
+        {
+
+            /// <summary>
+            /// Index into <seealso cref="#ordinals"/> for each document. </summary>
+            public readonly int[] offsets;
+
+            /// <summary>
+            /// Holds ords for all docs. </summary>
+            public readonly int[] ordinals;
+
+            /// <summary>
+            /// Creates a new <seealso cref="CachedOrds"/> from the <seealso cref="BinaryDocValues"/>.
+            /// Assumes that the <seealso cref="BinaryDocValues"/> is not {@code null}.
+            /// </summary>
+            public CachedOrds(OrdinalsSegmentReader source, int maxDoc)
+            {
+                offsets = new int[maxDoc + 1];
+                int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size
+
+                // this aggregator is limited to Integer.MAX_VALUE total ordinals.
+                long totOrds = 0;
+                IntsRef values = new IntsRef(32);
+                for (int docID = 0; docID < maxDoc; docID++)
+                {
+                    offsets[docID] = (int)totOrds;
+                    source.Get(docID, values);
+                    long nextLength = totOrds + values.Length;
+                    if (nextLength > ords.Length)
+                    {
+                        if (nextLength > ArrayUtil.MAX_ARRAY_LENGTH)
+                        {
+                            throw new ThreadStateException("too many ordinals (>= " + nextLength + ") to cache");
+                        }
+                        ords = ArrayUtil.Grow(ords, (int)nextLength);
+                    }
+                    Array.Copy(values.Ints, 0, ords, (int)totOrds, values.Length);
+                    totOrds = nextLength;
+                }
+                offsets[maxDoc] = (int)totOrds;
+
+                // if ords array is bigger by more than 10% of what we really need, shrink it
+                if ((double)totOrds / ords.Length < 0.9)
+                {
+                    this.ordinals = new int[(int)totOrds];
+                    Array.Copy(ords, 0, this.ordinals, 0, (int)totOrds);
+                }
+                else
+                {
+                    this.ordinals = ords;
+                }
+            }
+
+            public long RamBytesUsed()
+            {
+                long mem = RamUsageEstimator.ShallowSizeOf(this) + RamUsageEstimator.SizeOf(offsets);
+                if (offsets != ordinals)
+                {
+                    mem += RamUsageEstimator.SizeOf(ordinals);
+                }
+                return mem;
+            }
+        }
+
+        public virtual long RamBytesUsed()
+        {
+            lock (this)
+            {
+                long bytes = 0;
+                foreach (CachedOrds ords in ordsCache.Values)
+                {
+                    bytes += ords.RamBytesUsed();
+                }
+
+                return bytes;
+            }
+        }
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/eea269f3/src/Lucene.Net.Facet/Taxonomy/CategoryPath.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Facet/Taxonomy/CategoryPath.cs b/src/Lucene.Net.Facet/Taxonomy/CategoryPath.cs
new file mode 100644
index 0000000..df69862
--- /dev/null
+++ b/src/Lucene.Net.Facet/Taxonomy/CategoryPath.cs
@@ -0,0 +1,316 @@
+using System;
+using System.Diagnostics;
+using System.Text;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Facet.Taxonomy
+{
+
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+
+    /// <summary>
+    /// Holds a sequence of string components, specifying the hierarchical name of a
+    /// category.
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+    public class CategoryPath : IComparable<CategoryPath>
+    {
+
+        /// <summary>
+        /// An empty <seealso cref="CategoryPath"/> </summary>
+        public static readonly CategoryPath EMPTY = new CategoryPath();
+
+        /// <summary>
+        /// The components of this <seealso cref="CategoryPath"/>. Note that this array may be
+        /// shared with other <seealso cref="CategoryPath"/> instances, e.g. as a result of
+        /// <seealso cref="#subpath(int)"/>, therefore you should traverse the array up to
+        /// <seealso cref="#length"/> for this path's components.
+        /// </summary>
+        public readonly string[] components;
+
+        /// <summary>
+        /// The number of components of this <seealso cref="CategoryPath"/>. </summary>
+        public readonly int length;
+
+        // Used by singleton EMPTY
+        private CategoryPath()
+        {
+            components = null;
+            length = 0;
+        }
+
+        // Used by subpath
+        private CategoryPath(CategoryPath copyFrom, int prefixLen)
+        {
+            // while the code which calls this method is safe, at some point a test
+            // tripped on AIOOBE in toString, but we failed to reproduce. adding the
+            // assert as a safety check.
+            Debug.Assert(prefixLen > 0 && prefixLen <= copyFrom.components.Length, "prefixLen cannot be negative nor larger than the given components' length: prefixLen=" + prefixLen + " components.length=" + copyFrom.components.Length);
+            this.components = copyFrom.components;
+            length = prefixLen;
+        }
+
+        /// <summary>
+        /// Construct from the given path components. </summary>
+        public CategoryPath(params string[] components)
+        {
+            Debug.Assert(components.Length > 0, "use CategoryPath.EMPTY to create an empty path");
+            foreach (string comp in components)
+            {
+                if (string.IsNullOrEmpty(comp))
+                {
+                    throw new System.ArgumentException("empty or null components not allowed: " + Arrays.ToString(components));
+                }
+            }
+            this.components = components;
+            length = components.Length;
+        }
+
+        /// <summary>
+        /// Construct from a given path, separating path components with {@code delimiter}. </summary>
+        public CategoryPath(string pathString, char delimiter)
+        {
+            string[] comps = pathString.Split(new[] { delimiter }, StringSplitOptions.RemoveEmptyEntries);
+            if (comps.Length == 1 && comps[0].Length == 0)
+            {
+                components = null;
+                length = 0;
+            }
+            else
+            {
+                foreach (string comp in comps)
+                {
+                    if (string.IsNullOrEmpty(comp))
+                    {
+                        throw new System.ArgumentException("empty or null components not allowed: " + Arrays.ToString(comps));
+                    }
+                }
+                components = comps;
+                length = components.Length;
+            }
+        }
+
+        /// <summary>
+        /// Returns the number of characters needed to represent the path, including
+        /// delimiter characters, for using with
+        /// <seealso cref="#copyFullPath(char[], int, char)"/>.
+        /// </summary>
+        public virtual int FullPathLength()
+        {
+            if (length == 0)
+            {
+                return 0;
+            }
+
+            int charsNeeded = 0;
+            for (int i = 0; i < length; i++)
+            {
+                charsNeeded += components[i].Length;
+            }
+            charsNeeded += length - 1; // num delimter chars
+            return charsNeeded;
+        }
+
+        /// <summary>
+        /// Compares this path with another <seealso cref="CategoryPath"/> for lexicographic
+        /// order.
+        /// </summary>
+        public virtual int CompareTo(CategoryPath other)
+        {
+            int len = length < other.length ? length : other.length;
+            for (int i = 0, j = 0; i < len; i++, j++)
+            {
+                int cmp = components[i].CompareTo(other.components[j]);
+                if (cmp < 0) // this is 'before'
+                {
+                    return -1;
+                }
+                if (cmp > 0) // this is 'after'
+                {
+                    return 1;
+                }
+            }
+
+            // one is a prefix of the other
+            return length - other.length;
+        }
+
+        private void hasDelimiter(string offender, char delimiter)
+        {
+            throw new System.ArgumentException("delimiter character '" + delimiter + "' (U+" + delimiter.ToString() + ") appears in path component \"" + offender + "\"");
+        }
+
+        private void noDelimiter(char[] buf, int offset, int len, char delimiter)
+        {
+            for (int idx = 0; idx < len; idx++)
+            {
+                if (buf[offset + idx] == delimiter)
+                {
+                    hasDelimiter(new string(buf, offset, len), delimiter);
+                }
+            }
+        }
+
+        /// <summary>
+        /// Copies the path components to the given {@code char[]}, starting at index
+        /// {@code start}. {@code delimiter} is copied between the path components.
+        /// Returns the number of chars copied.
+        /// 
+        /// <para>
+        /// <b>NOTE:</b> this method relies on the array being large enough to hold the
+        /// components and separators - the amount of needed space can be calculated
+        /// with <seealso cref="#fullPathLength()"/>.
+        /// </para>
+        /// </summary>
+        public virtual int CopyFullPath(char[] buf, int start, char delimiter)
+        {
+            if (length == 0)
+            {
+                return 0;
+            }
+
+            int idx = start;
+            int upto = length - 1;
+            for (int i = 0; i < upto; i++)
+            {
+                int len = components[i].Length;
+                components[i].CopyTo(0, buf, idx, len - 0);
+                noDelimiter(buf, idx, len, delimiter);
+                idx += len;
+                buf[idx++] = delimiter;
+            }
+            components[upto].CopyTo(0, buf, idx, components[upto].Length - 0);
+            noDelimiter(buf, idx, components[upto].Length, delimiter);
+
+            return idx + components[upto].Length - start;
+        }
+
+        public override bool Equals(object obj)
+        {
+            if (!(obj is CategoryPath))
+            {
+                return false;
+            }
+
+            CategoryPath other = (CategoryPath)obj;
+            if (length != other.length)
+            {
+                return false; // not same length, cannot be equal
+            }
+
+            // CategoryPaths are more likely to differ at the last components, so start
+            // from last-first
+            for (int i = length - 1; i >= 0; i--)
+            {
+                if (!components[i].Equals(other.components[i]))
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public override int GetHashCode()
+        {
+            if (length == 0)
+            {
+                return 0;
+            }
+
+            int hash = length;
+            for (int i = 0; i < length; i++)
+            {
+                hash = hash * 31 + components[i].GetHashCode();
+            }
+            return hash;
+        }
+
+        /// <summary>
+        /// Calculate a 64-bit hash function for this path. </summary>
+        public virtual long LongHashCode()
+        {
+            if (length == 0)
+            {
+                return 0;
+            }
+
+            long hash = length;
+            for (int i = 0; i < length; i++)
+            {
+                hash = hash * 65599 + components[i].GetHashCode();
+            }
+            return hash;
+        }
+
+        /// <summary>
+        /// Returns a sub-path of this path up to {@code length} components. </summary>
+        public virtual CategoryPath Subpath(int length)
+        {
+            if (length >= this.length || length < 0)
+            {
+                return this;
+            }
+            else if (length == 0)
+            {
+                return EMPTY;
+            }
+            else
+            {
+                return new CategoryPath(this, length);
+            }
+        }
+
+        /// <summary>
+        /// Returns a string representation of the path, separating components with
+        /// '/'.
+        /// </summary>
+        /// <seealso cref= #toString(char) </seealso>
+        public override string ToString()
+        {
+            return ToString('/');
+        }
+
+        /// <summary>
+        /// Returns a string representation of the path, separating components with the
+        /// given delimiter.
+        /// </summary>
+        public virtual string ToString(char delimiter)
+        {
+            if (length == 0)
+            {
+                return "";
+            }
+
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < length; i++)
+            {
+                if (components[i].IndexOf(delimiter) != -1)
+                {
+                    hasDelimiter(components[i], delimiter);
+                }
+                sb.Append(components[i]).Append(delimiter);
+            }
+            sb.Length = sb.Length - 1; // remove last delimiter
+            return sb.ToString();
+        }
+
+    }
+
+}
\ No newline at end of file


Mime
View raw message