lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mhern...@apache.org
Subject [10/50] [abbrv] git commit: nearing completion - this file 50%, overall 90% on namespace
Date Tue, 24 Sep 2013 18:32:46 GMT
nearing completion - this file 50%, overall 90% on namespace


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/d9ad1fea
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/d9ad1fea
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/d9ad1fea

Branch: refs/heads/branch_4x
Commit: d9ad1fea5fe5fb1a72a7248fee1e2c04d0a20253
Parents: e47e663
Author: Mike Potts <mike@feature23.com>
Authored: Sun Jul 14 12:03:09 2013 -0400
Committer: Mike Potts <mike@feature23.com>
Committed: Sun Jul 14 12:03:09 2013 -0400

----------------------------------------------------------------------
 .../CompressingStoredFieldsIndexReader.cs       |   2 +-
 .../CompressingStoredFieldsIndexWriter.cs       | 167 ++++++
 .../Compressing/CompressingTermVectorsFormat.cs |  28 +
 .../Compressing/CompressingTermVectorsReader.cs | 507 +++++++++++++++++++
 src/core/Lucene.Net.csproj                      |   3 +
 5 files changed, 706 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
index f981b32..d5a16df 100644
--- a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexReader.cs
@@ -147,7 +147,7 @@ namespace Lucene.Net.Codecs.Compressing
             return hi;
           }
 
-          private long getStartPointer(int docID) 
+          public long GetStartPointer(int docID) 
           {
             if (docID < 0 || docID >= maxDoc) {
               throw new ArgumentException("docID out of range [0-" + maxDoc + "]: " + docID);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
new file mode 100644
index 0000000..ece363a
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingStoredFieldsIndexWriter.cs
@@ -0,0 +1,167 @@
+using Lucene.Net.Store;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+    public sealed class CompressingStoredFieldsIndexWriter : IDisposable
+    {
+        static readonly int BLOCK_SIZE = 1024; // number of chunks to serialize at once
+
+        private IndexOutput fieldsIndexOut;
+        private int totalDocs;
+        private int blockDocs;
+        private int blockChunks;
+        private long firstStartPointer;
+        private long maxStartPointer;
+        private int[] docBaseDeltas;
+        private long[] startPointerDeltas;
+
+        static long moveSignToLowOrderBit(long n)
+        {
+            return (n >> 63) ^ (n << 1);
+        }
+
+        CompressingStoredFieldsIndexWriter(IndexOutput indexOutput)
+        {
+            this.fieldsIndexOut = indexOutput;
+            reset();
+            totalDocs = 0;
+            docBaseDeltas = new int[BLOCK_SIZE];
+            startPointerDeltas = new long[BLOCK_SIZE];
+            fieldsIndexOut.WriteVInt(PackedInts.VERSION_CURRENT);
+        }
+
+        private void reset()
+        {
+            blockChunks = 0;
+            blockDocs = 0;
+            firstStartPointer = -1; // means unset
+        }
+
+        private void writeBlock()
+        {
+            fieldsIndexOut.WriteVInt(blockChunks);
+
+            // The trick here is that we only store the difference from the average start
+            // pointer or doc base, this helps save bits per value.
+            // And in order to prevent a few chunks that would be far from the average to
+            // raise the number of bits per value for all of them, we only encode blocks
+            // of 1024 chunks at once
+            // See LUCENE-4512
+
+            // doc bases
+            int avgChunkDocs;
+            if (blockChunks == 1)
+            {
+                avgChunkDocs = 0;
+            }
+            else
+            {
+                //hackmp - TODO - This needs review.  The function as a whole is desgined
with an int as the core value,
+                //including contracts on other methods.  I NEVER like casting from double
to int, but for now...
+                avgChunkDocs = (int)Math.Round((float)(blockDocs - docBaseDeltas[blockChunks
- 1]) / (blockChunks - 1));
+            }
+            fieldsIndexOut.WriteVInt(totalDocs - blockDocs); // docBase
+            fieldsIndexOut.WriteVInt(avgChunkDocs);
+            int docBase = 0;
+            long maxDelta = 0;
+            for (int i = 0; i < blockChunks; ++i)
+            {
+                int delta = docBase - avgChunkDocs * i;
+                maxDelta |= moveSignToLowOrderBit(delta);
+                docBase += docBaseDeltas[i];
+            }
+
+            int bitsPerDocBase = PackedInts.BitsRequired(maxDelta);
+            fieldsIndexOut.WriteVInt(bitsPerDocBase);
+            PackedInts.Writer writer = PackedInts.GetWriterNoHeader(fieldsIndexOut,
+                PackedInts.Format.PACKED, blockChunks, bitsPerDocBase, 1);
+            docBase = 0;
+            for (int i = 0; i < blockChunks; ++i)
+            {
+                long delta = docBase - avgChunkDocs * i;
+                writer.Add(moveSignToLowOrderBit(delta));
+                docBase += docBaseDeltas[i];
+            }
+            writer.Finish();
+
+            // start pointers
+            fieldsIndexOut.WriteVLong(firstStartPointer);
+            long avgChunkSize;
+            if (blockChunks == 1)
+            {
+                avgChunkSize = 0;
+            }
+            else
+            {
+                avgChunkSize = (maxStartPointer - firstStartPointer) / (blockChunks - 1);
+            }
+            fieldsIndexOut.WriteVLong(avgChunkSize);
+            long startPointer = 0;
+            maxDelta = 0;
+            for (int i = 0; i < blockChunks; ++i)
+            {
+                startPointer += startPointerDeltas[i];
+                long delta = startPointer - avgChunkSize * i;
+                maxDelta |= moveSignToLowOrderBit(delta);
+            }
+
+            int bitsPerStartPointer = PackedInts.BitsRequired(maxDelta);
+            fieldsIndexOut.WriteVInt(bitsPerStartPointer);
+            writer = PackedInts.GetWriterNoHeader(fieldsIndexOut, PackedInts.Format.PACKED,
+                blockChunks, bitsPerStartPointer, 1);
+            startPointer = 0;
+            for (int i = 0; i < blockChunks; ++i)
+            {
+                startPointer += startPointerDeltas[i];
+                long delta = startPointer - avgChunkSize * i;
+                writer.Add(moveSignToLowOrderBit(delta));
+            }
+            writer.Finish();
+        }
+
+        void writeIndex(int numDocs, long startPointer)
+        {
+            if (blockChunks == BLOCK_SIZE)
+            {
+                writeBlock();
+                reset();
+            }
+
+            if (firstStartPointer == -1)
+            {
+                firstStartPointer = maxStartPointer = startPointer;
+            }
+
+            docBaseDeltas[blockChunks] = numDocs;
+            startPointerDeltas[blockChunks] = startPointer - maxStartPointer;
+
+            ++blockChunks;
+            blockDocs += numDocs;
+            totalDocs += numDocs;
+            maxStartPointer = startPointer;
+        }
+
+        void finish(int numDocs)
+        {
+            if (numDocs != totalDocs)
+            {
+                throw new ArgumentOutOfRangeException("Expected " + numDocs + " docs, but
got " + totalDocs);
+            }
+            if (blockChunks > 0)
+            {
+                writeBlock();
+            }
+            fieldsIndexOut.WriteVInt(0); // end marker
+        }
+
+        public void Dispose()
+        {
+            fieldsIndexOut.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs b/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
new file mode 100644
index 0000000..0a2afd1
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingTermVectorsFormat.cs
@@ -0,0 +1,28 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+    public class CompressingTermVectorsFormat: TermVectorsFormat
+    {
+        private string formatName;
+        private string segmentSuffix;
+        private CompressionMode compressionMode;
+        private int chunkSize;
+
+        public CompressingTermVectorsFormat(String formatName, String segmentSuffix, 
+            CompressionMode compressionMode, int chunkSize)
+        {
+            this.formatName = formatName;
+            this.segmentSuffix = segmentSuffix;
+            this.compressionMode = compressionMode;
+            if (chunkSize < 1)
+            {
+                throw new ArgumentException("chunkSize must be >= 1");
+            }
+            this.chunkSize = chunkSize;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
----------------------------------------------------------------------
diff --git a/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs b/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
new file mode 100644
index 0000000..0de0f4f
--- /dev/null
+++ b/src/core/Codecs/Compressing/CompressingTermVectorsReader.cs
@@ -0,0 +1,507 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Packed;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Codecs.Compressing
+{
+    public sealed class CompressingTermVectorsReader: IDisposable, TermVectorsReader
+    {
+        private FieldInfos fieldInfos;
+        CompressingStoredFieldsIndexReader indexReader;
+        IndexInput vectorsStream;
+        private int packedIntsVersion;
+        private CompressionMode compressionMode;
+        private Decompressor decompressor;
+        private int chunkSize;
+        private int numDocs;
+        private bool closed;
+        private BlockPackedReaderIterator reader;
+        
+        private CompressingTermVectorsReader(CompressingTermVectorsReader reader)
+        {
+            this.fieldInfos = reader.fieldInfos;
+            this.vectorsStream = (IndexInput)reader.vectorsStream.Clone();
+            this.indexReader = reader.indexReader.clone();
+            this.packedIntsVersion = reader.packedIntsVersion;
+            this.compressionMode = reader.compressionMode;
+            this.decompressor = (Decompressor)reader.decompressor.Clone();
+            this.chunkSize = reader.chunkSize;
+            this.numDocs = reader.numDocs;
+            this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion,
BLOCK_SIZE, 0);
+            this.closed = false;
+        }
+
+          /** Sole constructor. */
+        public CompressingTermVectorsReader(Directory d, SegmentInfo si, String segmentSuffix,
FieldInfos fn,
+            IOContext context, String formatName, CompressionMode compressionMode) 
+        {
+            this.compressionMode = compressionMode;
+            string segment = si.name;
+            bool success = false;
+            fieldInfos = fn;
+            numDocs = si.DocCount;
+            IndexInput indexStream = null;
+            try {
+                vectorsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix,
VECTORS_EXTENSION), context);
+                string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix,
VECTORS_INDEX_EXTENSION);
+                indexStream = d.OpenInput(indexStreamFN, context);
+
+                string codecNameIdx = formatName + CODEC_SFX_IDX;
+                string codecNameDat = formatName + CODEC_SFX_DAT;
+                CodecUtil.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
+                CodecUtil.CheckHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+
+                indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+                indexStream = null;
+
+                packedIntsVersion = vectorsStream.ReadVInt();
+                chunkSize = vectorsStream.ReadVInt();
+                decompressor = compressionMode.newDecompressor();
+                this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion,
BLOCK_SIZE, 0);
+
+                success = true;
+            } finally {
+                if (!success) {
+                IOUtils.CloseWhileHandlingException(this, indexStream);
+                }
+            }
+        }
+
+        CompressionMode getCompressionMode() 
+        {
+            return compressionMode;
+        }
+
+        int getChunkSize() {
+            return chunkSize;
+        }
+
+        int getPackedIntsVersion() {
+            return packedIntsVersion;
+        }
+
+        CompressingStoredFieldsIndexReader getIndex() {
+            return indexReader;
+        }
+
+        IndexInput getVectorsStream() {
+            return vectorsStream;
+        }
+
+        /**
+        * @throws AlreadyClosedException if this TermVectorsReader is closed
+        */
+        private void ensureOpen()
+        {
+            if (closed) {
+                throw new AlreadyClosedException("this FieldsReader is closed");
+            }
+        }
+
+
+
+        public void Dispose()
+        {
+            if (!closed)
+            {
+                IOUtils.Close(vectorsStream, indexReader);
+                closed = true;
+            }
+        }
+
+        public override Index.Fields Get(int doc)
+        {
+            ensureOpen();
+
+            // seek to the right place
+            {
+              long startPointer = indexReader.GetStartPointer(doc);
+              vectorsStream.Seek(startPointer);
+            }
+
+            // decode
+            // - docBase: first doc ID of the chunk
+            // - chunkDocs: number of docs of the chunk
+            int docBase = vectorsStream.ReadVInt();
+            int chunkDocs = vectorsStream.ReadVInt();
+            if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs
> numDocs) {
+              throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs
+ ",doc=" + doc);
+            }
+
+            long skip; // number of fields to skip
+            long numFields; // number of fields of the document we're looking for
+            long totalFields; // total number of fields of the chunk (sum for all docs)
+            if (chunkDocs == 1) {
+              skip = 0;
+              numFields = totalFields = vectorsStream.ReadVInt();
+            } else {
+              reader.Reset(vectorsStream, chunkDocs);
+              long sum = 0;
+              for (int i = docBase; i < doc; ++i) {
+                sum += reader.Next();
+              }
+              skip = sum;
+              numFields = (int) reader.Next();
+              sum += numFields;
+              for (int i = doc + 1; i < docBase + chunkDocs; ++i) {
+                sum += reader.Next();
+              }
+              totalFields = sum;
+            }
+
+            if (numFields == 0) {
+              // no vectors
+              return null;
+            }
+
+            // read field numbers that have term vectors
+            int[] fieldNums;
+            {
+              int token = vectorsStream.ReadByte() & 0xFF;
+              int bitsPerFieldNum = token & 0x1F;
+              int totalDistinctFields = Number.URShift(token, 5);
+              if (totalDistinctFields == 0x07) {
+                totalDistinctFields += vectorsStream.ReadVInt();
+              }
+              ++totalDistinctFields;
+              PackedInts.ReaderIterator it = PackedInts.GetReaderIteratorNoHeader(vectorsStream,
PackedInts.Format.PACKED, packedIntsVersion, totalDistinctFields, bitsPerFieldNum, 1);
+              fieldNums = new int[totalDistinctFields];
+              for (int i = 0; i < totalDistinctFields; ++i) {
+                fieldNums[i] = (int) it.Next();
+              }
+            }
+
+            // read field numbers and flags
+            int[] fieldNumOffs = new int[numFields];
+            PackedInts.Reader flags;
+            {
+              int bitsPerOff = PackedInts.BitsRequired(fieldNums.Length - 1);
+              PackedInts.Reader allFieldNumOffs = PackedInts.GetReaderNoHeader(vectorsStream,
PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
+              switch (vectorsStream.ReadVInt()) {
+                case 0:
+                  PackedInts.Reader fieldFlags = PackedInts.getReaderNoHeader(vectorsStream,
PackedInts.Format.PACKED, packedIntsVersion, fieldNums.Length, FLAGS_BITS);
+                  PackedInts.Mutable f = PackedInts.GetMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
+                  for (int i = 0; i < totalFields; ++i) {
+                    int fieldNumOff = (int) allFieldNumOffs.Get(i);
+                    int fgs = (int) fieldFlags.Get(fieldNumOff);
+                    f.Set(i, fgs);
+                  }
+                  flags = f;
+                  break;
+                case 1:
+                  flags = PackedInts.GetReaderNoHeader(vectorsStream, PackedInts.Format.PACKED,
packedIntsVersion, totalFields, FLAGS_BITS);
+                  break;
+                default:
+                  throw new AssertionError();
+              }
+              for (int i = 0; i < numFields; ++i) {
+                //hackmp - TODO - NEEDS REVIEW
+                //Here again, seems to be a larger impact to change all ints to long, than
simply cast.  Will need Pual to review..
+                fieldNumOffs[i] = (int) allFieldNumOffs.Get((int)skip + i);
+              }
+            }
+
+            // number of terms per field for all fields
+            PackedInts.Reader numTerms;
+            long totalTerms;
+            {
+              int bitsRequired = vectorsStream.ReadVInt();
+              numTerms = PackedInts.GetReaderNoHeader(vectorsStream, PackedInts.Format.PACKED,
packedIntsVersion, totalFields, bitsRequired);
+              long sum = 0;
+              for (int i = 0; i < totalFields; ++i) {
+                sum += numTerms.Get(i);
+              }
+              totalTerms = sum;
+            }
+
+            // term lengths
+            long docOff = 0, docLen = 0, totalLen;
+            int[] fieldLengths = new int[numFields];
+            int[][] prefixLengths = new int[numFields][];
+            int[][] suffixLengths = new int[numFields][];
+            {
+              reader.Reset(vectorsStream, totalTerms);
+              // skip
+              long toSkip = 0;
+              for (int i = 0; i < skip; ++i) {
+                toSkip += numTerms.Get(i);
+              }
+              reader.Skip(toSkip);
+              // read prefix lengths
+              for (int i = 0; i < numFields; ++i) {
+                //hackmp - TODO - NEEDS REVIEW
+                //casting long to int
+                long termCount = (int) numTerms.Get((int)skip + i);
+                int[] fieldPrefixLengths = new int[termCount];
+                prefixLengths[i] = fieldPrefixLengths;
+                for (int j = 0; j < termCount; ) {
+                  //hackmp - TODO - NEEDS REVIEW
+                  //casting long to int..
+                  LongsRef next = reader.Next((int)termCount - j);
+                  for (int k = 0; k < next.length; ++k) {
+                    fieldPrefixLengths[j++] = (int) next.longs[next.offset + k];
+                  }
+                }
+              }
+              reader.Skip(totalTerms - reader.Ord);
+
+              reader.Reset(vectorsStream, totalTerms);
+              // skip
+              toSkip = 0;
+              for (int i = 0; i < skip; ++i) {
+                for (int j = 0; j < numTerms.Get(i); ++j) {
+                  docOff += reader.Next();
+                }
+              }
+              for (int i = 0; i < numFields; ++i) {
+                  //HACKMP - TODO - NEEDS REVIEW
+                  //..and again, casting long to int
+                int termCount = (int) numTerms.Get((int)skip + i);
+                int[] fieldSuffixLengths = new int[termCount];
+                suffixLengths[i] = fieldSuffixLengths;
+                for (int j = 0; j < termCount; ) {
+                  LongsRef next = reader.Next(termCount - j);
+                  for (int k = 0; k < next.length; ++k) {
+                    fieldSuffixLengths[j++] = (int) next.longs[next.offset + k];
+                  }
+                }
+                fieldLengths[i] = sum(suffixLengths[i]);
+                docLen += fieldLengths[i];
+              }     
+              totalLen = docOff + docLen;
+              for (long i = skip + numFields; i < totalFields; ++i) {
+                  //hackmp - TODO - NEEDS REVIEW
+                  //long > int
+                for (int j = 0; j < numTerms.Get((int)i); ++j) 
+                {
+                  totalLen += reader.Next();
+                }
+              }
+            }
+
+            // term freqs
+            int[] termFreqs = new int[totalTerms];
+            {
+              reader.Reset(vectorsStream, totalTerms);
+              for (int i = 0; i < totalTerms; ) {
+                //hackmp - TODO - NEEDS REVIEW
+                //long > int
+                LongsRef next = reader.Next((int)totalTerms - i);
+                for (int k = 0; k < next.length; ++k) {
+                  termFreqs[i++] = 1 + (int) next.longs[next.offset + k];
+                }
+              }
+            }
+
+            // total number of positions, offsets and payloads
+            int totalPositions = 0, totalOffsets = 0, totalPayloads = 0;
+            for (int i = 0, termIndex = 0; i < totalFields; ++i) 
+            {
+              int f = (int) flags.Get(i);
+              int termCount = (int) numTerms.Get(i);
+              for (int j = 0; j < termCount; ++j) {
+                int freq = termFreqs[termIndex++];
+                if ((f & POSITIONS) != 0) {
+                  totalPositions += freq;
+                }
+                if ((f & OFFSETS) != 0) {
+                  totalOffsets += freq;
+                }
+                if ((f & PAYLOADS) != 0) {
+                  totalPayloads += freq;
+                }
+              }
+            }
+
+            int[][] positionIndex = positionIndex(skip, numFields, numTerms, termFreqs);
+            int[][] positions, startOffsets, lengths;
+            if (totalPositions > 0) {
+              positions = readPositions(skip, numFields, flags, numTerms, termFreqs, POSITIONS,
totalPositions, positionIndex);
+            } else {
+              positions = new int[numFields][];
+            }
+
+            if (totalOffsets > 0) {
+              // average number of chars per term
+              float[] charsPerTerm = new float[fieldNums.Length];
+              for (int i = 0; i < charsPerTerm.Length; ++i) {
+                charsPerTerm[i] = Number.IntBitsToFloat(vectorsStream.ReadInt());
+              }
+              startOffsets = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS,
totalOffsets, positionIndex);
+              lengths = readPositions(skip, numFields, flags, numTerms, termFreqs, OFFSETS,
totalOffsets, positionIndex);
+
+              for (int i = 0; i < numFields; ++i) {
+                int[] fStartOffsets = startOffsets[i];
+                int[] fPositions = positions[i];
+                // patch offsets from positions
+                if (fStartOffsets != null && fPositions != null) {
+                  float fieldCharsPerTerm = charsPerTerm[fieldNumOffs[i]];
+                  for (int j = 0; j < startOffsets[i].Length; ++j) {
+                    fStartOffsets[j] += (int) (fieldCharsPerTerm * fPositions[j]);
+                  }
+                }
+                if (fStartOffsets != null) {
+                  int[] fPrefixLengths = prefixLengths[i];
+                  int[] fSuffixLengths = suffixLengths[i];
+                  int[] fLengths = lengths[i];
+                    //hackmp - TODO - NEEDS REVIEW
+                    //long > int
+                  for (int j = 0, end = (int) numTerms.Get((int)skip + i); j < end; ++j)
{
+                    // delta-decode start offsets and  patch lengths using term lengths
+                    int termLength = fPrefixLengths[j] + fSuffixLengths[j];
+                    lengths[i][positionIndex[i][j]] += termLength;
+                    for (int k = positionIndex[i][j] + 1; k < positionIndex[i][j + 1];
++k) {
+                      fStartOffsets[k] += fStartOffsets[k - 1];
+                      fLengths[k] += termLength;
+                    }
+                  }
+                }
+              }
+            } else {
+              startOffsets = lengths = new int[numFields][];
+            }
+            if (totalPositions > 0) {
+              // delta-decode positions
+              for (int i = 0; i < numFields; ++i) {
+                int[] fPositions = positions[i];
+                int[] fpositionIndex = positionIndex[i];
+                if (fPositions != null) {
+                    //hackmp - TODO - NEED REVIEW
+                    //long > int
+                  for (int j = 0, end = (int) numTerms.Get((int)skip + i); j < end; ++j)
{
+                    // delta-decode start offsets
+                    for (int k = fpositionIndex[j] + 1; k < fpositionIndex[j + 1]; ++k)
{
+                      fPositions[k] += fPositions[k - 1];
+                    }
+                  }
+                }
+              }
+            }
+
+            // payload lengths
+            int[][] payloadIndex = new int[numFields][];
+            long totalPayloadLength = 0;
+            int payloadOff = 0;
+            int payloadLen = 0;
+            if (totalPayloads > 0) {
+              reader.Reset(vectorsStream, totalPayloads);
+              // skip
+              int termIndex = 0;
+              for (int i = 0; i < skip; ++i) {
+                int f = (int) flags.Get(i);
+                int termCount = (int) numTerms.Get(i);
+                if ((f & PAYLOADS) != 0) {
+                  for (int j = 0; j < termCount; ++j) {
+                    int freq = termFreqs[termIndex + j];
+                    for (int k = 0; k < freq; ++k) {
+                      int l = (int) reader.Next();
+                      payloadOff += l;
+                    }
+                  }
+                }
+                termIndex += termCount;
+              }
+              totalPayloadLength = payloadOff;
+              // read doc payload lengths
+              for (int i = 0; i < numFields; ++i) {
+                  //hackmp - TODO - NEEDS REVIEW
+                  //long > int
+                int f = (int) flags.Get((int)skip + i);
+                int termCount = (int) numTerms.Get((int)skip + i);
+                if ((f & PAYLOADS) != 0) {
+                  int totalFreq = positionIndex[i][termCount];
+                  payloadIndex[i] = new int[totalFreq + 1];
+                  int posIdx = 0;
+                  payloadIndex[i][posIdx] = payloadLen;
+                  for (int j = 0; j < termCount; ++j) {
+                    int freq = termFreqs[termIndex + j];
+                    for (int k = 0; k < freq; ++k) {
+                      int payloadLength = (int) reader.Next();
+                      payloadLen += payloadLength;
+                      payloadIndex[i][posIdx+1] = payloadLen;
+                      ++posIdx;
+                    }
+                  }
+                }
+                termIndex += termCount;
+              }
+              totalPayloadLength += payloadLen;
+              for (long i = skip + numFields; i < totalFields; ++i) {
+                  //hackmp - TODO - NEEDS REVIEW
+                  //long > int
+                int f = (int) flags.Get((int)i);
+                int termCount = (int) numTerms.Get((int)i);
+                if ((f & PAYLOADS) != 0) {
+                  for (int j = 0; j < termCount; ++j) {
+                    int freq = termFreqs[termIndex + j];
+                    for (int k = 0; k < freq; ++k) {
+                      totalPayloadLength += reader.Next();
+                    }
+                  }
+                }
+                termIndex += termCount;
+              }
+            }
+
+            // decompress data
+            BytesRef suffixBytes = new BytesRef();
+            //hackmp - TODO - NEEDS REVIEW
+            //long > int
+            decompressor.Decompress(vectorsStream, (int)totalLen + (int)totalPayloadLength,
(int)docOff + (int)payloadOff, (int)docLen + payloadLen, suffixBytes);
+            suffixBytes.length = (int)docLen;
+            BytesRef payloadBytes = new BytesRef(suffixBytes.bytes, suffixBytes.offset +
(int)docLen, payloadLen);
+
+            int[] fieldFlags = new int[numFields];
+            for (int i = 0; i < numFields; ++i) {
+                //hackmp - TODO - NEEDS REVIEW
+                //long > int
+              fieldFlags[i] = (int) flags.Get((int)skip + i);
+            }
+
+            int[] fieldNumTerms = new int[numFields];
+            for (int i = 0; i < numFields; ++i) {
+                //hackmp - TODO - NEEDS REVIEW
+              fieldNumTerms[i] = (int) numTerms.Get((int)skip + i);
+            }
+
+            int[][] fieldTermFreqs = new int[numFields][];
+            {
+              long termIdx = 0;
+              for (int i = 0; i < skip; ++i) {
+                termIdx += numTerms.Get(i);
+              }
+              for (int i = 0; i < numFields; ++i) {
+                  //hackmp - TODO - NEEDS REVIEW
+                  //long > int
+                long termCount = (int) numTerms.Get((int)skip + i);
+                fieldTermFreqs[i] = new int[termCount];
+                for (int j = 0; j < termCount; ++j) {
+                  fieldTermFreqs[i][j] = termFreqs[termIdx++];
+                }
+              }
+            }
+
+            return new TVFields(fieldNums, fieldFlags, fieldNumOffs, fieldNumTerms, fieldLengths,
+                prefixLengths, suffixLengths, fieldTermFreqs,
+                positionIndex, positions, startOffsets, lengths,
+                payloadBytes, payloadIndex,
+                suffixBytes);
+        }
+
+        public override object Clone()
+        {
+            return new CompressingTermVectorsReader(this);
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            throw new NotImplementedException();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/d9ad1fea/src/core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/core/Lucene.Net.csproj b/src/core/Lucene.Net.csproj
index 85f9818..306396c 100644
--- a/src/core/Lucene.Net.csproj
+++ b/src/core/Lucene.Net.csproj
@@ -188,8 +188,11 @@
     <Compile Include="Codecs\CodecUtil.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsFormat.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexReader.cs" />
+    <Compile Include="Codecs\Compressing\CompressingStoredFieldsIndexWriter.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsReader.cs" />
     <Compile Include="Codecs\Compressing\CompressingStoredFieldsWriter.cs" />
+    <Compile Include="Codecs\Compressing\CompressingTermVectorsFormat.cs" />
+    <Compile Include="Codecs\Compressing\CompressingTermVectorsReader.cs" />
     <Compile Include="Codecs\Compressing\CompressionMode.cs" />
     <Compile Include="Codecs\Compressing\Compressor.cs" />
     <Compile Include="Codecs\Compressing\Decompressor.cs" />


Mime
View raw message