lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [09/51] [abbrv] [partial] Cleaning up and getting ready to development towards v4.8
Date Sat, 06 Sep 2014 19:36:20 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs
new file mode 100644
index 0000000..1cbf8c3
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsReader.cs
@@ -0,0 +1,877 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Codecs;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+    
+
+    /// <summary>
+    /// Handles a terms dict, but decouples all details of
+    /// doc/freqs/positions reading to an instance of {@link
+    /// PostingsReaderBase}.  This class is reusable for
+    /// codecs that use a different format for
+    /// docs/freqs/positions (though codecs are also free to
+    /// make their own terms dict impl).
+    ///
+    /// This class also interacts with an instance of {@link
+    /// TermsIndexReaderBase}, to abstract away the specific
+    /// implementation of the terms dict index. 
+    /// 
+    /// @lucene.experimental
+    /// </summary>
+public class BlockTermsReader : FieldsProducer {
+  // Open input to the main terms dict file (_X.tis)
+  private readonly IndexInput input;
+
+  // Reads the terms dict entries, to gather state to
+  // produce DocsEnum on demand
+  private readonly PostingsReaderBase postingsReader;
+
+  private readonly TreeMap<String,BlockTreeTermsReader.FieldReader> fields = new TreeMap<>();
+
+  // Reads the terms index
+  private TermsIndexReaderBase indexReader;
+
+  // keeps the dirStart offset
+  private long dirOffset;
+  
+  private const int version; 
+
+  // Used as key for the terms cache
+  private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey {
+    String field;
+    BytesRef term;
+
+    public FieldAndTerm() {
+    }
+
+    public FieldAndTerm(FieldAndTerm other) {
+      field = other.field;
+      term = BytesRef.deepCopyOf(other.term);
+    }
+
+    @Override
+    public bool equals(Object _other) {
+      FieldAndTerm other = (FieldAndTerm) _other;
+      return other.field.equals(field) && term.bytesEquals(other.term);
+    }
+
+    @Override
+    public FieldAndTerm clone() {
+      return new FieldAndTerm(this);
+    }
+
+    @Override
+    public int hashCode() {
+      return field.hashCode() * 31 + term.hashCode();
+    }
+  }
+  
+  // private String segment;
+  
+  public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context,
+                          String segmentSuffix)
+     {
+    
+    this.postingsReader = postingsReader;
+
+    // this.segment = segment;
+    in = dir.openInput(IndexFileNames.segmentFileName(info.name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION),
+                       context);
+
+    bool success = false;
+    try {
+      version = readHeader(in);
+
+      // Have PostingsReader init itself
+      postingsReader.init(in);
+
+      // Read per-field details
+      seekDir(in, dirOffset);
+
+      final int numFields = in.readVInt();
+      if (numFields < 0) {
+        throw new CorruptIndexException("invalid number of fields: " + numFields + " (resource=" + in + ")");
+      }
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readVInt();
+        final long numTerms = in.readVLong();
+        Debug.Assert( numTerms >= 0;
+        final long termsStartPointer = in.readVLong();
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
+        final long sumDocFreq = in.readVLong();
+        final int docCount = in.readVInt();
+        final int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+        if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
+          throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
+        }
+        if (sumDocFreq < docCount) {  // #postings must be >= #docs with field
+          throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
+        }
+        if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
+          throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
+        }
+        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
+        if (previous != null) {
+          throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
+        }
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        in.close();
+      }
+    }
+
+    this.indexReader = indexReader;
+  }
+
+  private int readHeader(IndexInput input)  {
+    int version = CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
+                          BlockTermsWriter.VERSION_START,
+                          BlockTermsWriter.VERSION_CURRENT);
+    if (version < BlockTermsWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
+  }
+  
+  private void seekDir(IndexInput input, long dirOffset)  {
+    if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+      input.seek(input.length() - CodecUtil.footerLength() - 8);
+      dirOffset = input.readLong();
+    } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
+    input.seek(dirOffset);
+  }
+  
+  @Override
+  public void close()  {
+    try {
+      try {
+        if (indexReader != null) {
+          indexReader.close();
+        }
+      } finally {
+        // null so if an app hangs on to us (ie, we are not
+        // GCable, despite being closed) we still free most
+        // ram
+        indexReader = null;
+        if (in != null) {
+          in.close();
+        }
+      }
+    } finally {
+      if (postingsReader != null) {
+        postingsReader.close();
+      }
+    }
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return Collections.unmodifiableSet(fields.keySet()).iterator();
+  }
+
+  @Override
+  public Terms terms(String field)  {
+    Debug.Assert( field != null;
+    return fields.get(field);
+  }
+
+  @Override
+  public int size() {
+    return fields.size();
+  }
+
+  private class FieldReader extends Terms {
+    final long numTerms;
+    final FieldInfo fieldInfo;
+    final long termsStartPointer;
+    final long sumTotalTermFreq;
+    final long sumDocFreq;
+    final int docCount;
+    final int longsSize;
+
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
+      Debug.Assert( numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.numTerms = numTerms;
+      this.termsStartPointer = termsStartPointer;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+      this.longsSize = longsSize;
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse)  {
+      return new SegmentTermsEnum();
+    }
+
+    @Override
+    public bool hasFreqs() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    }
+
+    @Override
+    public bool hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public bool hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public bool hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+
+    @Override
+    public long size() {
+      return numTerms;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
+
+    @Override
+    public long getSumDocFreq()  {
+      return sumDocFreq;
+    }
+
+    @Override
+    public int getDocCount()  {
+      return docCount;
+    }
+
+    // Iterates through terms in this field
+    private final class SegmentTermsEnum extends TermsEnum {
+      private final IndexInput in;
+      private final BlockTermState state;
+      private final bool doOrd;
+      private final FieldAndTerm fieldTerm = new FieldAndTerm();
+      private final TermsIndexReaderBase.FieldIndexEnum indexEnum;
+      private final BytesRef term = new BytesRef();
+
+      /* This is true if indexEnum is "still" seek'd to the index term
+         for the current term. We set it to true on seeking, and then it
+         remains valid until next() is called enough times to load another
+         terms block: */
+      private bool indexIsCurrent;
+
+      /* True if we've already called .next() on the indexEnum, to "bracket"
+         the current block of terms: */
+      private bool didIndexNext;
+
+      /* Next index term, bracketing the current block of terms; this is
+         only valid if didIndexNext is true: */
+      private BytesRef nextIndexTerm;
+
+      /* True after seekExact(TermState), do defer seeking.  If the app then
+         calls next() (which is not "typical"), then we'll do the real seek */
+      private bool seekPending;
+
+      /* How many blocks we've read since last seek.  Once this
+         is >= indexEnum.getDivisor() we set indexIsCurrent to false (since
+         the index can no long bracket seek-within-block). */
+      private int blocksSinceSeek;
+
+      private byte[] termSuffixes;
+      private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput();
+
+      /* Common prefix used for all terms in this block. */
+      private int termBlockPrefix;
+
+      /* How many terms in current block */
+      private int blockTermCount;
+
+      private byte[] docFreqBytes;
+      private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
+      private int metaDataUpto;
+
+      private long[] longs;
+      private byte[] bytes;
+      private ByteArrayDataInput bytesReader;
+
+      public SegmentTermsEnum()  {
+        in = BlockTermsReader.this.in.clone();
+        in.seek(termsStartPointer);
+        indexEnum = indexReader.getFieldEnum(fieldInfo);
+        doOrd = indexReader.supportsOrd();
+        fieldTerm.field = fieldInfo.name;
+        state = postingsReader.newTermState();
+        state.totalTermFreq = -1;
+        state.ord = -1;
+
+        termSuffixes = new byte[128];
+        docFreqBytes = new byte[64];
+        //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
+        longs = new long[longsSize];
+      }
+
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return BytesRef.getUTF8SortedAsUnicodeComparator();
+      }
+
+      // TODO: we may want an alternate mode here which is
+      // "if you are about to return NOT_FOUND I won't use
+      // the terms data from that"; eg FuzzyTermsEnum will
+      // (usually) just immediately call seek again if we
+      // return NOT_FOUND so it's a waste for us to fill in
+      // the term that was actually NOT_FOUND
+      @Override
+      public SeekStatus seekCeil(final BytesRef target)  {
+
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+   
+        //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this="  + this);
+        if (didIndexNext) {
+          if (nextIndexTerm == null) {
+            //System.out.println("  nextIndexTerm=null");
+          } else {
+            //System.out.println("  nextIndexTerm=" + nextIndexTerm.utf8ToString());
+          }
+        }
+
+        bool doSeek = true;
+
+        // See if we can avoid seeking, because target term
+        // is after current term but before next index term:
+        if (indexIsCurrent) {
+
+          final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term, target);
+
+          if (cmp == 0) {
+            // Already at the requested term
+            return SeekStatus.FOUND;
+          } else if (cmp < 0) {
+
+            // Target term is after current term
+            if (!didIndexNext) {
+              if (indexEnum.next() == -1) {
+                nextIndexTerm = null;
+              } else {
+                nextIndexTerm = indexEnum.term();
+              }
+              //System.out.println("  now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+              didIndexNext = true;
+            }
+
+            if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
+              // Optimization: requested term is within the
+              // same term block we are now in; skip seeking
+              // (but do scanning):
+              doSeek = false;
+              //System.out.println("  skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
+            }
+          }
+        }
+
+        if (doSeek) {
+          //System.out.println("  seek");
+
+          // Ask terms index to find biggest indexed term (=
+          // first term in a block) that's <= our text:
+          in.seek(indexEnum.seek(target));
+          bool result = nextBlock();
+
+          // Block must exist since, at least, the indexed term
+          // is in the block:
+          Debug.Assert( result;
+
+          indexIsCurrent = true;
+          didIndexNext = false;
+          blocksSinceSeek = 0;          
+
+          if (doOrd) {
+            state.ord = indexEnum.ord()-1;
+          }
+
+          term.copyBytes(indexEnum.term());
+          //System.out.println("  seek: term=" + term.utf8ToString());
+        } else {
+          //System.out.println("  skip seek");
+          if (state.termBlockOrd == blockTermCount && !nextBlock()) {
+            indexIsCurrent = false;
+            return SeekStatus.END;
+          }
+        }
+
+        seekPending = false;
+
+        int common = 0;
+
+        // Scan within block.  We could do this by calling
+        // _next() and testing the resulting term, but this
+        // is wasteful.  Instead, we first confirm the
+        // target matches the common prefix of this block,
+        // and then we scan the term bytes directly from the
+        // termSuffixesreader's byte[], saving a copy into
+        // the BytesRef term per term.  Only when we return
+        // do we then copy the bytes into the term.
+
+        while(true) {
+
+          // First, see if target term matches common prefix
+          // in this block:
+          if (common < termBlockPrefix) {
+            final int cmp = (term.bytes[common]&0xFF) - (target.bytes[target.offset + common]&0xFF);
+            if (cmp < 0) {
+
+              // TODO: maybe we should store common prefix
+              // in block header?  (instead of relying on
+              // last term of previous block)
+
+              // Target's prefix is after the common block
+              // prefix, so term cannot be in this block
+              // but it could be in next block.  We
+              // must scan to end-of-block to set common
+              // prefix for next block:
+              if (state.termBlockOrd < blockTermCount) {
+                while(state.termBlockOrd < blockTermCount-1) {
+                  state.termBlockOrd++;
+                  state.ord++;
+                  termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
+                }
+                final int suffix = termSuffixesReader.readVInt();
+                term.length = termBlockPrefix + suffix;
+                if (term.bytes.length < term.length) {
+                  term.grow(term.length);
+                }
+                termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              }
+              state.ord++;
+              
+              if (!nextBlock()) {
+                indexIsCurrent = false;
+                return SeekStatus.END;
+              }
+              common = 0;
+
+            } else if (cmp > 0) {
+              // Target's prefix is before the common prefix
+              // of this block, so we position to start of
+              // block and return NOT_FOUND:
+              Debug.Assert( state.termBlockOrd == 0;
+
+              final int suffix = termSuffixesReader.readVInt();
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              return SeekStatus.NOT_FOUND;
+            } else {
+              common++;
+            }
+
+            continue;
+          }
+
+          // Test every term in this block
+          while (true) {
+            state.termBlockOrd++;
+            state.ord++;
+
+            final int suffix = termSuffixesReader.readVInt();
+            
+            // We know the prefix matches, so just compare the new suffix:
+            final int termLen = termBlockPrefix + suffix;
+            int bytePos = termSuffixesReader.getPosition();
+
+            bool next = false;
+            final int limit = target.offset + (termLen < target.length ? termLen : target.length);
+            int targetPos = target.offset + termBlockPrefix;
+            while(targetPos < limit) {
+              final int cmp = (termSuffixes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+              if (cmp < 0) {
+                // Current term is still before the target;
+                // keep scanning
+                next = true;
+                break;
+              } else if (cmp > 0) {
+                // Done!  Current term is after target. Stop
+                // here, fill in real term, return NOT_FOUND.
+                term.length = termBlockPrefix + suffix;
+                if (term.bytes.length < term.length) {
+                  term.grow(term.length);
+                }
+                termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+                //System.out.println("  NOT_FOUND");
+                return SeekStatus.NOT_FOUND;
+              }
+            }
+
+            if (!next && target.length <= termLen) {
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+
+              if (target.length == termLen) {
+                // Done!  Exact match.  Stop here, fill in
+                // real term, return FOUND.
+                //System.out.println("  FOUND");
+                return SeekStatus.FOUND;
+              } else {
+                //System.out.println("  NOT_FOUND");
+                return SeekStatus.NOT_FOUND;
+              }
+            }
+
+            if (state.termBlockOrd == blockTermCount) {
+              // Must pre-fill term for next block's common prefix
+              term.length = termBlockPrefix + suffix;
+              if (term.bytes.length < term.length) {
+                term.grow(term.length);
+              }
+              termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+              break;
+            } else {
+              termSuffixesReader.skipBytes(suffix);
+            }
+          }
+
+          // The purpose of the terms dict index is to seek
+          // the enum to the closest index term before the
+          // term we are looking for.  So, we should never
+          // cross another index term (besides the first
+          // one) while we are scanning:
+
+          Debug.Assert( indexIsCurrent;
+
+          if (!nextBlock()) {
+            //System.out.println("  END");
+            indexIsCurrent = false;
+            return SeekStatus.END;
+          }
+          common = 0;
+        }
+      }
+
+      @Override
+      public BytesRef next()  {
+        //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termBlockOrd);
+
+        // If seek was previously called and the term was cached,
+        // usually caller is just going to pull a D/&PEnum or get
+        // docFreq, etc.  But, if they then call next(),
+        // this method catches up all internal state so next()
+        // works properly:
+        if (seekPending) {
+          Debug.Assert( !indexIsCurrent;
+          in.seek(state.blockFilePointer);
+          final int pendingSeekCount = state.termBlockOrd;
+          bool result = nextBlock();
+
+          final long savOrd = state.ord;
+
+          // Block must exist since seek(TermState) was called w/ a
+          // TermState previously returned by this enum when positioned
+          // on a real term:
+          Debug.Assert( result;
+
+          while(state.termBlockOrd < pendingSeekCount) {
+            BytesRef nextResult = _next();
+            Debug.Assert( nextResult != null;
+          }
+          seekPending = false;
+          state.ord = savOrd;
+        }
+        return _next();
+      }
+
+      /* Decodes only the term bytes of the next term.  If caller then asks for
+         metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+         decode all metadata up to the current term. */
+      private BytesRef _next()  {
+        //System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termBlockOrd + " (vs " + blockTermCount + ")");
+        if (state.termBlockOrd == blockTermCount && !nextBlock()) {
+          //System.out.println("  eof");
+          indexIsCurrent = false;
+          return null;
+        }
+
+        // TODO: cutover to something better for these ints!  simple64?
+        final int suffix = termSuffixesReader.readVInt();
+        //System.out.println("  suffix=" + suffix);
+
+        term.length = termBlockPrefix + suffix;
+        if (term.bytes.length < term.length) {
+          term.grow(term.length);
+        }
+        termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+        state.termBlockOrd++;
+
+        // NOTE: meaningless in the non-ord case
+        state.ord++;
+
+        //System.out.println("  return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term + " tbOrd=" + state.termBlockOrd);
+        return term;
+      }
+
+      @Override
+      public BytesRef term() {
+        return term;
+      }
+
+      @Override
+      public int docFreq()  {
+        //System.out.println("BTR.docFreq");
+        decodeMetaData();
+        //System.out.println("  return " + state.docFreq);
+        return state.docFreq;
+      }
+
+      @Override
+      public long totalTermFreq()  {
+        decodeMetaData();
+        return state.totalTermFreq;
+      }
+
+      @Override
+      public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)  {
+        //System.out.println("BTR.docs this=" + this);
+        decodeMetaData();
+        //System.out.println("BTR.docs:  state.docFreq=" + state.docFreq);
+        return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
+      }
+
+      @Override
+      public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)  {
+        if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+          // Positions were not indexed:
+          return null;
+        }
+
+        decodeMetaData();
+        return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
+      }
+
+      @Override
+      public void seekExact(BytesRef target, TermState otherState) {
+        //System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this);
+        Debug.Assert( otherState != null && otherState instanceof BlockTermState;
+        Debug.Assert( !doOrd || ((BlockTermState) otherState).ord < numTerms;
+        state.copyFrom(otherState);
+        seekPending = true;
+        indexIsCurrent = false;
+        term.copyBytes(target);
+      }
+      
+      @Override
+      public TermState termState()  {
+        //System.out.println("BTR.termState this=" + this);
+        decodeMetaData();
+        TermState ts = state.clone();
+        //System.out.println("  return ts=" + ts);
+        return ts;
+      }
+
+      @Override
+      public void seekExact(long ord)  {
+        //System.out.println("BTR.seek by ord ord=" + ord);
+        if (indexEnum == null) {
+          throw new IllegalStateException("terms index was not loaded");
+        }
+
+        Debug.Assert( ord < numTerms;
+
+        // TODO: if ord is in same terms block and
+        // after current ord, we should avoid this seek just
+        // like we do in the seek(BytesRef) case
+        in.seek(indexEnum.seek(ord));
+        bool result = nextBlock();
+
+        // Block must exist since ord < numTerms:
+        Debug.Assert( result;
+
+        indexIsCurrent = true;
+        didIndexNext = false;
+        blocksSinceSeek = 0;
+        seekPending = false;
+
+        state.ord = indexEnum.ord()-1;
+        Debug.Assert( state.ord >= -1: "ord=" + state.ord;
+        term.copyBytes(indexEnum.term());
+
+        // Now, scan:
+        int left = (int) (ord - state.ord);
+        while(left > 0) {
+          final BytesRef term = _next();
+          Debug.Assert( term != null;
+          left--;
+          Debug.Assert( indexIsCurrent;
+        }
+      }
+
+      @Override
+      public long ord() {
+        if (!doOrd) {
+          throw new UnsupportedOperationException();
+        }
+        return state.ord;
+      }
+
+      /* Does initial decode of next block of terms; this
+         doesn't actually decode the docFreq, totalTermFreq,
+         postings details (frq/prx offset, etc.) metadata;
+         it just loads them as byte[] blobs which are then      
+         decoded on-demand if the metadata is ever requested
+         for any term in this block.  This enables terms-only
+         intensive consumes (eg certain MTQs, respelling) to
+         not pay the price of decoding metadata they won't
+         use. */
+      private bool nextBlock()  {
+
+        // TODO: we still lazy-decode the byte[] for each
+        // term (the suffix), but, if we decoded
+        // all N terms up front then seeking could do a fast
+        // bsearch w/in the block...
+
+        //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
+        state.blockFilePointer = in.getFilePointer();
+        blockTermCount = in.readVInt();
+        //System.out.println("  blockTermCount=" + blockTermCount);
+        if (blockTermCount == 0) {
+          return false;
+        }
+        termBlockPrefix = in.readVInt();
+
+        // term suffixes:
+        int len = in.readVInt();
+        if (termSuffixes.length < len) {
+          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        //System.out.println("  termSuffixes len=" + len);
+        in.readBytes(termSuffixes, 0, len);
+        termSuffixesReader.reset(termSuffixes, 0, len);
+
+        // docFreq, totalTermFreq
+        len = in.readVInt();
+        if (docFreqBytes.length < len) {
+          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        //System.out.println("  freq bytes len=" + len);
+        in.readBytes(docFreqBytes, 0, len);
+        freqReader.reset(docFreqBytes, 0, len);
+
+        // metadata
+        len = in.readVInt();
+        if (bytes == null) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+          bytesReader = new ByteArrayDataInput();
+        } else if (bytes.length < len) {
+          bytes = new byte[ArrayUtil.oversize(len, 1)];
+        }
+        in.readBytes(bytes, 0, len);
+        bytesReader.reset(bytes, 0, len);
+
+        metaDataUpto = 0;
+        state.termBlockOrd = 0;
+
+        blocksSinceSeek++;
+        indexIsCurrent = indexIsCurrent && (blocksSinceSeek < indexReader.getDivisor());
+        //System.out.println("  indexIsCurrent=" + indexIsCurrent);
+
+        return true;
+      }
+     
+      private void decodeMetaData()  {
+        //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termBlockOrd + " state=" + state);
+        if (!seekPending) {
+          // TODO: cutover to random-access API
+          // here.... really stupid that we have to decode N
+          // wasted term metadata just to get to the N+1th
+          // that we really need...
+
+          // lazily catch up on metadata decode:
+          final int limit = state.termBlockOrd;
+          bool absolute = metaDataUpto == 0;
+          // TODO: better API would be "jump straight to term=N"???
+          while (metaDataUpto < limit) {
+            //System.out.println("  decode mdUpto=" + metaDataUpto);
+            // TODO: we could make "tiers" of metadata, ie,
+            // decode docFreq/totalTF but don't decode postings
+            // metadata; this way caller could get
+            // docFreq/totalTF w/o paying decode cost for
+            // postings
+
+            // TODO: if docFreq were bulk decoded we could
+            // just skipN here:
+
+            // docFreq, totalTermFreq
+            state.docFreq = freqReader.readVInt();
+            //System.out.println("    dF=" + state.docFreq);
+            if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+              state.totalTermFreq = state.docFreq + freqReader.readVLong();
+              //System.out.println("    totTF=" + state.totalTermFreq);
+            }
+            // metadata
+            for (int i = 0; i < longs.length; i++) {
+              longs[i] = bytesReader.readVLong();
+            }
+            postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
+            metaDataUpto++;
+            absolute = false;
+          }
+        } else {
+          //System.out.println("  skip! seekPending");
+        }
+      }
+    }
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = (postingsReader!=null) ? postingsReader.ramBytesUsed() : 0;
+    sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
+    return sizeInBytes;
+  }
+
+  @Override
+  public void checkIntegrity()  {   
+    // verify terms
+    if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+      CodecUtil.checksumEntireFile(in);
+    }
+    // verify postings
+    postingsReader.checkIntegrity();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
new file mode 100644
index 0000000..3c20376
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/BlockTermsWriter.cs
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+    
+}
+
+// TODO: currently we encode all terms between two indexed
+// terms as a block; but, we could decouple the two, ie
+// allow several blocks in between two indexed terms
+
+/**
+ * Writes terms dict, block-encoding (column stride) each
+ * term's metadata for each set of terms between two
+ * index terms.
+ *
+ * @lucene.experimental
+ */
+
+public class BlockTermsWriter extends FieldsConsumer {
+
+  final static String CODEC_NAME = "BLOCK_TERMS_DICT";
+
+  // Initial format
+  public static final int VERSION_START = 0;
+  public static final int VERSION_APPEND_ONLY = 1;
+  public static final int VERSION_META_ARRAY = 2;
+  public static final int VERSION_CHECKSUM = 3;
+  public static final int VERSION_CURRENT = VERSION_CHECKSUM;
+
+  /** Extension of terms file */
+  static final String TERMS_EXTENSION = "tib";
+
+  protected IndexOutput out;
+  final PostingsWriterBase postingsWriter;
+  final FieldInfos fieldInfos;
+  FieldInfo currentField;
+  private final TermsIndexWriterBase termsIndexWriter;
+
+  private static class FieldMetaData {
+    public final FieldInfo fieldInfo;
+    public final long numTerms;
+    public final long termsStartPointer;
+    public final long sumTotalTermFreq;
+    public final long sumDocFreq;
+    public final int docCount;
+    public final int longsSize;
+
+    public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
+      Debug.Assert( numTerms > 0;
+      this.fieldInfo = fieldInfo;
+      this.termsStartPointer = termsStartPointer;
+      this.numTerms = numTerms;
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+      this.longsSize = longsSize;
+    }
+  }
+
+  private final List<FieldMetaData> fields = new ArrayList<>();
+
+  // private final String segment;
+
+  public BlockTermsWriter(TermsIndexWriterBase termsIndexWriter,
+      SegmentWriteState state, PostingsWriterBase postingsWriter)
+       {
+    final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
+    this.termsIndexWriter = termsIndexWriter;
+    out = state.directory.createOutput(termsFileName, state.context);
+    bool success = false;
+    try {
+      fieldInfos = state.fieldInfos;
+      writeHeader(out);
+      currentField = null;
+      this.postingsWriter = postingsWriter;
+      // segment = state.segmentName;
+      
+      //System.out.println("BTW.init seg=" + state.segmentName);
+      
+      postingsWriter.init(out); // have consumer write its format/header
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(out);
+      }
+    }
+  }
+  
+  private void writeHeader(IndexOutput out)  {
+    CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);     
+  }
+
+  @Override
+  public TermsConsumer addField(FieldInfo field)  {
+    //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name);
+    Debug.Assert( currentField == null || currentField.name.compareTo(field.name) < 0;
+    currentField = field;
+    TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
+    return new TermsWriter(fieldIndexWriter, field, postingsWriter);
+  }
+
+  @Override
+  public void close()  {
+    if (out != null) {
+      try {
+        final long dirStart = out.getFilePointer();
+        
+        out.writeVInt(fields.size());
+        for(FieldMetaData field : fields) {
+          out.writeVInt(field.fieldInfo.number);
+          out.writeVLong(field.numTerms);
+          out.writeVLong(field.termsStartPointer);
+          if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+            out.writeVLong(field.sumTotalTermFreq);
+          }
+          out.writeVLong(field.sumDocFreq);
+          out.writeVInt(field.docCount);
+          if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+            out.writeVInt(field.longsSize);
+          }
+        }
+        writeTrailer(dirStart);
+        CodecUtil.writeFooter(out);
+      } finally {
+        IOUtils.close(out, postingsWriter, termsIndexWriter);
+        out = null;
+      }
+    }
+  }
+
+  private void writeTrailer(long dirStart)  {
+    out.writeLong(dirStart);    
+  }
+  
+  private static class TermEntry {
+    public final BytesRef term = new BytesRef();
+    public BlockTermState state;
+  }
+
+  class TermsWriter extends TermsConsumer {
+    private final FieldInfo fieldInfo;
+    private final PostingsWriterBase postingsWriter;
+    private final long termsStartPointer;
+    private long numTerms;
+    private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
+    long sumTotalTermFreq;
+    long sumDocFreq;
+    int docCount;
+    int longsSize;
+
+    private TermEntry[] pendingTerms;
+
+    private int pendingCount;
+
+    TermsWriter(
+        TermsIndexWriterBase.FieldWriter fieldIndexWriter,
+        FieldInfo fieldInfo,
+        PostingsWriterBase postingsWriter) 
+    {
+      this.fieldInfo = fieldInfo;
+      this.fieldIndexWriter = fieldIndexWriter;
+      pendingTerms = new TermEntry[32];
+      for(int i=0;i<pendingTerms.length;i++) {
+        pendingTerms[i] = new TermEntry();
+      }
+      termsStartPointer = out.getFilePointer();
+      this.postingsWriter = postingsWriter;
+      this.longsSize = postingsWriter.setField(fieldInfo);
+    }
+    
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public PostingsConsumer startTerm(BytesRef text)  {
+      //System.out.println("BTW: startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
+      postingsWriter.startTerm();
+      return postingsWriter;
+    }
+
+    private final BytesRef lastPrevTerm = new BytesRef();
+
+    @Override
+    public void finishTerm(BytesRef text, TermStats stats)  {
+
+      Debug.Assert( stats.docFreq > 0;
+      //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
+
+      final bool isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
+
+      if (isIndexTerm) {
+        if (pendingCount > 0) {
+          // Instead of writing each term, live, we gather terms
+          // in RAM in a pending buffer, and then write the
+          // entire block in between index terms:
+          flushBlock();
+        }
+        fieldIndexWriter.add(text, stats, out.getFilePointer());
+        //System.out.println("  index term!");
+      }
+
+      if (pendingTerms.length == pendingCount) {
+        final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(pendingTerms, 0, newArray, 0, pendingCount);
+        for(int i=pendingCount;i<newArray.length;i++) {
+          newArray[i] = new TermEntry();
+        }
+        pendingTerms = newArray;
+      }
+      final TermEntry te = pendingTerms[pendingCount];
+      te.term.copyBytes(text);
+      te.state = postingsWriter.newTermState();
+      te.state.docFreq = stats.docFreq;
+      te.state.totalTermFreq = stats.totalTermFreq;
+      postingsWriter.finishTerm(te.state);
+
+      pendingCount++;
+      numTerms++;
+    }
+
+    // Finishes all terms in this field
+    @Override
+    public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)  {
+      if (pendingCount > 0) {
+        flushBlock();
+      }
+      // EOF marker:
+      out.writeVInt(0);
+
+      this.sumTotalTermFreq = sumTotalTermFreq;
+      this.sumDocFreq = sumDocFreq;
+      this.docCount = docCount;
+      fieldIndexWriter.finish(out.getFilePointer());
+      if (numTerms > 0) {
+        fields.add(new FieldMetaData(fieldInfo,
+                                     numTerms,
+                                     termsStartPointer,
+                                     sumTotalTermFreq,
+                                     sumDocFreq,
+                                     docCount,
+                                     longsSize));
+      }
+    }
+
+    private int sharedPrefix(BytesRef term1, BytesRef term2) {
+      Debug.Assert( term1.offset == 0;
+      Debug.Assert( term2.offset == 0;
+      int pos1 = 0;
+      int pos1End = pos1 + Math.min(term1.length, term2.length);
+      int pos2 = 0;
+      while(pos1 < pos1End) {
+        if (term1.bytes[pos1] != term2.bytes[pos2]) {
+          return pos1;
+        }
+        pos1++;
+        pos2++;
+      }
+      return pos1;
+    }
+
+    private final RAMOutputStream bytesWriter = new RAMOutputStream();
+    private final RAMOutputStream bufferWriter = new RAMOutputStream();
+
+    private void flushBlock()  {
+      //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
+
+      // First pass: compute common prefix for all terms
+      // in the block, against term before first term in
+      // this block:
+      int commonPrefix = sharedPrefix(lastPrevTerm, pendingTerms[0].term);
+      for(int termCount=1;termCount<pendingCount;termCount++) {
+        commonPrefix = Math.min(commonPrefix,
+                                sharedPrefix(lastPrevTerm,
+                                             pendingTerms[termCount].term));
+      }        
+
+      out.writeVInt(pendingCount);
+      out.writeVInt(commonPrefix);
+
+      // 2nd pass: write suffixes, as separate byte[] blob
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final int suffix = pendingTerms[termCount].term.length - commonPrefix;
+        // TODO: cutover to better intblock codec, instead
+        // of interleaving here:
+        bytesWriter.writeVInt(suffix);
+        bytesWriter.writeBytes(pendingTerms[termCount].term.bytes, commonPrefix, suffix);
+      }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
+
+      // 3rd pass: write the freqs as byte[] blob
+      // TODO: cutover to better intblock codec.  simple64?
+      // write prefix, suffix first:
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final BlockTermState state = pendingTerms[termCount].state;
+        Debug.Assert( state != null;
+        bytesWriter.writeVInt(state.docFreq);
+        if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+          bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
+        }
+      }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
+
+      // 4th pass: write the metadata 
+      long[] longs = new long[longsSize];
+      bool absolute = true;
+      for(int termCount=0;termCount<pendingCount;termCount++) {
+        final BlockTermState state = pendingTerms[termCount].state;
+        postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
+        for (int i = 0; i < longsSize; i++) {
+          bytesWriter.writeVLong(longs[i]);
+        }
+        bufferWriter.writeTo(bytesWriter);
+        bufferWriter.reset();
+        absolute = false;
+      }
+      out.writeVInt((int) bytesWriter.getFilePointer());
+      bytesWriter.writeTo(out);
+      bytesWriter.reset();
+
+      lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
+      pendingCount = 0;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
new file mode 100644
index 0000000..effd850
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexReader.cs
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+    
+}
+
+/** 
+ * TermsIndexReader for simple every Nth terms indexes.
+ *
+ * @see FixedGapTermsIndexWriter
+ * @lucene.experimental 
+ */
+public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
+
+  // NOTE: long is overkill here, since this number is 128
+  // by default and only indexDivisor * 128 if you change
+  // the indexDivisor at search time.  But, we use this in a
+  // number of places to multiply out the actual ord, and we
+  // will overflow int during those multiplies.  So to avoid
+  // having to upgrade each multiple to long in multiple
+  // places (error prone), we use long here:
+  private long totalIndexInterval;
+
+  private int indexDivisor;
+  final private int indexInterval;
+
+  // Closed if indexLoaded is true:
+  private IndexInput in;
+  private volatile bool indexLoaded;
+
+  private final Comparator<BytesRef> termComp;
+
+  private final static int PAGED_BYTES_BITS = 15;
+
+  // all fields share this single logical byte[]
+  private final PagedBytes termBytes = new PagedBytes(PAGED_BYTES_BITS);
+  private PagedBytes.Reader termBytesReader;
+
+  final HashMap<FieldInfo,FieldIndexData> fields = new HashMap<>();
+  
+  // start of the field info data
+  private long dirOffset;
+  
+  private final int version;
+
+  public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
+     {
+
+    this.termComp = termComp;
+
+    Debug.Assert( indexDivisor == -1 || indexDivisor > 0;
+
+    in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context);
+    
+    bool success = false;
+
+    try {
+      
+      version = readHeader(in);
+      
+      if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+        CodecUtil.checksumEntireFile(in);
+      }
+      
+      indexInterval = in.readInt();
+      if (indexInterval < 1) {
+        throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
+      }
+      this.indexDivisor = indexDivisor;
+
+      if (indexDivisor < 0) {
+        totalIndexInterval = indexInterval;
+      } else {
+        // In case terms index gets loaded, later, on demand
+        totalIndexInterval = indexInterval * indexDivisor;
+      }
+      Debug.Assert( totalIndexInterval > 0;
+      
+      seekDir(in, dirOffset);
+
+      // Read directory
+      final int numFields = in.readVInt();     
+      if (numFields < 0) {
+        throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
+      }
+      //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
+      for(int i=0;i<numFields;i++) {
+        final int field = in.readVInt();
+        final int numIndexTerms = in.readVInt();
+        if (numIndexTerms < 0) {
+          throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
+        }
+        final long termsStart = in.readVLong();
+        final long indexStart = in.readVLong();
+        final long packedIndexStart = in.readVLong();
+        final long packedOffsetsStart = in.readVLong();
+        if (packedIndexStart < indexStart) {
+          throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + in + ")");
+        }
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart));
+        if (previous != null) {
+          throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
+        }
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(in);
+      }
+      if (indexDivisor > 0) {
+        in.close();
+        in = null;
+        if (success) {
+          indexLoaded = true;
+        }
+        termBytesReader = termBytes.freeze(true);
+      }
+    }
+  }
+  
+  @Override
+  public int getDivisor() {
+    return indexDivisor;
+  }
+
+  private int readHeader(IndexInput input)  {
+    int version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
+      FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_CURRENT);
+    if (version < FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      dirOffset = input.readLong();
+    }
+    return version;
+  }
+
+  private class IndexEnum extends FieldIndexEnum {
+    private final FieldIndexData.CoreFieldIndex fieldIndex;
+    private final BytesRef term = new BytesRef();
+    private long ord;
+
+    public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
+      this.fieldIndex = fieldIndex;
+    }
+
+    @Override
+    public BytesRef term() {
+      return term;
+    }
+
+    @Override
+    public long seek(BytesRef target) {
+      int lo = 0;          // binary search
+      int hi = fieldIndex.numIndexTerms - 1;
+      Debug.Assert( totalIndexInterval > 0 : "totalIndexInterval=" + totalIndexInterval;
+
+      while (hi >= lo) {
+        int mid = (lo + hi) >>> 1;
+
+        final long offset = fieldIndex.termOffsets.get(mid);
+        final int length = (int) (fieldIndex.termOffsets.get(1+mid) - offset);
+        termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+        int delta = termComp.compare(target, term);
+        if (delta < 0) {
+          hi = mid - 1;
+        } else if (delta > 0) {
+          lo = mid + 1;
+        } else {
+          Debug.Assert( mid >= 0;
+          ord = mid*totalIndexInterval;
+          return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(mid);
+        }
+      }
+
+      if (hi < 0) {
+        Debug.Assert( hi == -1;
+        hi = 0;
+      }
+
+      final long offset = fieldIndex.termOffsets.get(hi);
+      final int length = (int) (fieldIndex.termOffsets.get(1+hi) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+
+      ord = hi*totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(hi);
+    }
+
+    @Override
+    public long next() {
+      final int idx = 1 + (int) (ord / totalIndexInterval);
+      if (idx >= fieldIndex.numIndexTerms) {
+        return -1;
+      }
+      ord += totalIndexInterval;
+
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+
+    @Override
+    public long ord() {
+      return ord;
+    }
+
+    @Override
+    public long seek(long ord) {
+      int idx = (int) (ord / totalIndexInterval);
+      // caller must ensure ord is in bounds
+      Debug.Assert( idx < fieldIndex.numIndexTerms;
+      final long offset = fieldIndex.termOffsets.get(idx);
+      final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
+      this.ord = idx * totalIndexInterval;
+      return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
+    }
+  }
+
+  @Override
+  public bool supportsOrd() {
+    return true;
+  }
+
+  private final class FieldIndexData {
+
+    volatile CoreFieldIndex coreIndex;
+
+    private final long indexStart;
+    private final long termsStart;
+    private final long packedIndexStart;
+    private final long packedOffsetsStart;
+
+    private final int numIndexTerms;
+
+    public FieldIndexData(FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart, long packedIndexStart,
+                          long packedOffsetsStart)  {
+
+      this.termsStart = termsStart;
+      this.indexStart = indexStart;
+      this.packedIndexStart = packedIndexStart;
+      this.packedOffsetsStart = packedOffsetsStart;
+      this.numIndexTerms = numIndexTerms;
+
+      if (indexDivisor > 0) {
+        loadTermsIndex();
+      }
+    }
+
+    private void loadTermsIndex()  {
+      if (coreIndex == null) {
+        coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
+      }
+    }
+
+    private final class CoreFieldIndex {
+
+      // where this field's terms begin in the packed byte[]
+      // data
+      final long termBytesStart;
+
+      // offset into index termBytes
+      final PackedInts.Reader termOffsets;
+
+      // index pointers into main terms dict
+      final PackedInts.Reader termsDictOffsets;
+
+      final int numIndexTerms;
+      final long termsStart;
+
+      public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart, int numIndexTerms)  {
+
+        this.termsStart = termsStart;
+        termBytesStart = termBytes.getPointer();
+
+        IndexInput clone = in.clone();
+        clone.seek(indexStart);
+
+        // -1 is passed to mean "don't load term index", but
+        // if we are then later loaded it's overwritten with
+        // a real value
+        Debug.Assert( indexDivisor > 0;
+
+        this.numIndexTerms = 1+(numIndexTerms-1) / indexDivisor;
+
+        Debug.Assert( this.numIndexTerms  > 0: "numIndexTerms=" + numIndexTerms + " indexDivisor=" + indexDivisor;
+
+        if (indexDivisor == 1) {
+          // Default (load all index terms) is fast -- slurp in the images from disk:
+          
+          try {
+            final long numTermBytes = packedIndexStart - indexStart;
+            termBytes.copy(clone, numTermBytes);
+
+            // records offsets into main terms dict file
+            termsDictOffsets = PackedInts.getReader(clone);
+            Debug.Assert( termsDictOffsets.size() == numIndexTerms;
+
+            // records offsets into byte[] term data
+            termOffsets = PackedInts.getReader(clone);
+            Debug.Assert( termOffsets.size() == 1+numIndexTerms;
+          } finally {
+            clone.close();
+          }
+        } else {
+          // Get packed iterators
+          final IndexInput clone1 = in.clone();
+          final IndexInput clone2 = in.clone();
+
+          try {
+            // Subsample the index terms
+            clone1.seek(packedIndexStart);
+            final PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.getReaderIterator(clone1, PackedInts.DEFAULT_BUFFER_SIZE);
+
+            clone2.seek(packedOffsetsStart);
+            final PackedInts.ReaderIterator termOffsetsIter = PackedInts.getReaderIterator(clone2,  PackedInts.DEFAULT_BUFFER_SIZE);
+
+            // TODO: often we can get by w/ fewer bits per
+            // value, below.. .but this'd be more complex:
+            // we'd have to try @ fewer bits and then grow
+            // if we overflowed it.
+
+            PackedInts.Mutable termsDictOffsetsM = PackedInts.getMutable(this.numIndexTerms, termsDictOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
+            PackedInts.Mutable termOffsetsM = PackedInts.getMutable(this.numIndexTerms+1, termOffsetsIter.getBitsPerValue(), PackedInts.DEFAULT);
+
+            termsDictOffsets = termsDictOffsetsM;
+            termOffsets = termOffsetsM;
+
+            int upto = 0;
+
+            long termOffsetUpto = 0;
+
+            while(upto < this.numIndexTerms) {
+              // main file offset copies straight over
+              termsDictOffsetsM.set(upto, termsDictOffsetsIter.next());
+
+              termOffsetsM.set(upto, termOffsetUpto);
+
+              long termOffset = termOffsetsIter.next();
+              long nextTermOffset = termOffsetsIter.next();
+              final int numTermBytes = (int) (nextTermOffset - termOffset);
+
+              clone.seek(indexStart + termOffset);
+              Debug.Assert( indexStart + termOffset < clone.length() : "indexStart=" + indexStart + " termOffset=" + termOffset + " len=" + clone.length();
+              Debug.Assert( indexStart + termOffset + numTermBytes < clone.length();
+
+              termBytes.copy(clone, numTermBytes);
+              termOffsetUpto += numTermBytes;
+
+              upto++;
+              if (upto == this.numIndexTerms) {
+                break;
+              }
+
+              // skip terms:
+              termsDictOffsetsIter.next();
+              for(int i=0;i<indexDivisor-2;i++) {
+                termOffsetsIter.next();
+                termsDictOffsetsIter.next();
+              }
+            }
+            termOffsetsM.set(upto, termOffsetUpto);
+
+          } finally {
+            clone1.close();
+            clone2.close();
+            clone.close();
+          }
+        }
+      }
+      
+      /** Returns approximate RAM bytes Used */
+      public long ramBytesUsed() {
+        return ((termOffsets!=null)? termOffsets.ramBytesUsed() : 0) +
+            ((termsDictOffsets!=null)? termsDictOffsets.ramBytesUsed() : 0);
+      }
+    }
+  }
+
+  @Override
+  public FieldIndexEnum getFieldEnum(FieldInfo fieldInfo) {
+    final FieldIndexData fieldData = fields.get(fieldInfo);
+    if (fieldData.coreIndex == null) {
+      return null;
+    } else {
+      return new IndexEnum(fieldData.coreIndex);
+    }
+  }
+
+  @Override
+  public void close()  {
+    if (in != null && !indexLoaded) {
+      in.close();
+    }
+  }
+
+  private void seekDir(IndexInput input, long dirOffset)  {
+    if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+      input.seek(input.length() - CodecUtil.footerLength() - 8);
+      dirOffset = input.readLong();
+    } else if (version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+      input.seek(input.length() - 8);
+      dirOffset = input.readLong();
+    }
+    input.seek(dirOffset);
+  }
+  
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = ((termBytes!=null) ? termBytes.ramBytesUsed() : 0) +
+        ((termBytesReader!=null)? termBytesReader.ramBytesUsed() : 0);
+    for(FieldIndexData entry : fields.values()) {
+      sizeInBytes += entry.coreIndex.ramBytesUsed();
+    }
+    return sizeInBytes;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
new file mode 100644
index 0000000..ba0bbf9
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/FixedGapTermsIndexWriter.cs
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHoutput WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Lucene.Net.Index;
+    using Lucene.Net.Store;
+    using Lucene.Net.Util;
+    using Lucene.Net.Util.Packed;
+
+    /// <summary>
+    /// Selects every Nth term as and index term, and hold term
+    /// bytes (mostly) fully expanded in memory.  This terms index
+    /// supports seeking by ord.  See {@link
+    /// VariableGapTermsIndexWriter} for a more memory efficient
+    /// terms index that does not support seeking by ord.
+    ///
+    /// @lucene.experimental */    
+    /// </summary>
+    public class FixedGapTermsIndexWriter : TermsIndexWriterBase
+    {
+        protected IndexOutput output;
+
+        /** Extension of terms index file */
+        private static readonly String TERMS_INDEX_EXTENSION = "tii";
+        private static readonly String CODEC_NAME = "SIMPLE_STANDARD_TERMS_INDEX";
+        private static readonly int VERSION_START = 0;
+        private static readonly int VERSION_APPEND_ONLY = 1;
+
+        private static readonly int VERSION_CHECKSUM = 1000;
+
+        // 4.x "skipped" trunk's monotonic addressing: give any user a nice exception
+        private static readonly int VERSION_CURRENT = VERSION_CHECKSUM;
+        private readonly int termIndexInterval;
+        private readonly List<SimpleFieldWriter> fields = new List<SimpleFieldWriter>();
+        private readonly FieldInfos fieldInfos;  //@SuppressWarnings("unused") 
+
+        public FixedGapTermsIndexWriter(SegmentWriteState state)
+        {
+            String indexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix,
+                TERMS_INDEX_EXTENSION);
+            termIndexInterval = state.TermIndexInterval;
+            output = state.Directory.CreateOutput(indexFileName, state.Context);
+            bool success = false;
+            try
+            {
+                fieldInfos = state.FieldInfos;
+                WriteHeader(output);
+                output.WriteInt(termIndexInterval);
+                success = true;
+            }
+            finally
+            {
+                if (!success)
+                {
+                    IOUtils.CloseWhileHandlingException(output);
+                }
+            }
+        }
+
+        private void WriteHeader(IndexOutput output)
+        {
+            CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT);
+        }
+
+        public override FieldWriter AddField(FieldInfo field, long termsFilePointer)
+        {
+            //System.output.println("FGW: addFfield=" + field.name);
+            SimpleFieldWriter writer = new SimpleFieldWriter(field, termsFilePointer);
+            fields.Add(writer);
+            return writer;
+        }
+
+        /// <remarks>
+        /// NOTE: if your codec does not sort in unicode code
+        /// point order, you must override this method, to simply
+        /// return indexedTerm.length.
+        /// </remarks>
+        protected int IndexedTermPrefixLength(BytesRef priorTerm, BytesRef indexedTerm)
+        {
+            // As long as codec sorts terms in unicode codepoint
+            // order, we can safely strip off the non-distinguishing
+            // suffix to save RAM in the loaded terms index.
+            int idxTermOffset = indexedTerm.Offset;
+            int priorTermOffset = priorTerm.Offset;
+            int limit = Math.Min(priorTerm.Length, indexedTerm.Length);
+            for (int byteIdx = 0; byteIdx < limit; byteIdx++)
+            {
+                if (priorTerm.Bytes[priorTermOffset + byteIdx] != indexedTerm.Bytes[idxTermOffset + byteIdx])
+                {
+                    return byteIdx + 1;
+                }
+            }
+            return Math.Min(1 + priorTerm.Length, indexedTerm.Length);
+        }
+
+        public void Dispose()
+        {
+            if (output != null)
+            {
+                bool success = false;
+                try
+                {
+                    long dirStart = output.FilePointer;
+                    int fieldCount = fields.Count;
+
+                    int nonNullFieldCount = 0;
+                    for (int i = 0; i < fieldCount; i++)
+                    {
+                        SimpleFieldWriter field = fields[i];
+                        if (field.numIndexTerms > 0)
+                        {
+                            nonNullFieldCount++;
+                        }
+                    }
+
+                    output.WriteVInt(nonNullFieldCount);
+                    for (int i = 0; i < fieldCount; i++)
+                    {
+                        SimpleFieldWriter field = fields[i];
+                        if (field.numIndexTerms > 0)
+                        {
+                            output.WriteVInt(field.fieldInfo.Number);
+                            output.WriteVInt(field.numIndexTerms);
+                            output.WriteVLong(field.termsStart);
+                            output.WriteVLong(field.indexStart);
+                            output.WriteVLong(field.packedIndexStart);
+                            output.WriteVLong(field.packedOffsetsStart);
+                        }
+                    }
+                    WriteTrailer(dirStart);
+                    CodecUtil.WriteFooter(output);
+                    success = true;
+                }
+                finally
+                {
+                    if (success)
+                    {
+                        IOUtils.Close(output);
+                    }
+                    else
+                    {
+                        IOUtils.CloseWhileHandlingException(output);
+                    }
+                    output = null;
+                }
+            }
+        }
+
+        private void WriteTrailer(long dirStart)
+        {
+            output.WriteLong(dirStart);
+        }
+
+
+        private class SimpleFieldWriter : FieldWriter
+        {
+            public readonly FieldInfo fieldInfo;
+            public int numIndexTerms;
+            public readonly long indexStart;
+            public readonly long termsStart;
+            public long packedIndexStart;
+            public long packedOffsetsStart;
+            private long numTerms;
+
+            // TODO: we could conceivably make a PackedInts wrapper
+            // that auto-grows... then we wouldn't force 6 bytes RAM
+            // per index term:
+            private short[] termLengths;
+            private int[] termsPointerDeltas;
+            private long lastTermsPointer;
+            private long totTermLength;
+
+            private readonly BytesRef lastTerm = new BytesRef();
+
+            public SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer)
+            {
+                this.fieldInfo = fieldInfo;
+                indexStart = output.FilePointer;
+                termsStart = lastTermsPointer = termsFilePointer;
+                termLengths = new short[0];
+                termsPointerDeltas = new int[0];
+            }
+
+            public override bool CheckIndexTerm(BytesRef text, TermStats stats)
+            {
+                // First term is first indexed term:
+                //System.output.println("FGW: checkIndexTerm text=" + text.utf8ToString());
+                if (0 == (numTerms++ % termIndexInterval))
+                {
+                    return true;
+                }
+                else
+                {
+                    if (0 == numTerms % termIndexInterval)
+                    {
+                        // save last term just before next index term so we
+                        // can compute wasted suffix
+                        lastTerm.CopyBytes(text);
+                    }
+                    return false;
+                }
+            }
+
+            public override void Add(BytesRef text, TermStats stats, long termsFilePointer)
+            {
+                int indexedTermLength = IndexedTermPrefixLength(lastTerm, text);
+                //System.output.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);
+
+                // write only the min prefix that shows the diff
+                // against prior term
+                output.WriteBytes(text.Bytes, text.Offset, indexedTermLength);
+
+                if (termLengths.Length == numIndexTerms)
+                {
+                    termLengths = ArrayUtil.Grow(termLengths);
+                }
+                if (termsPointerDeltas.Length == numIndexTerms)
+                {
+                    termsPointerDeltas = ArrayUtil.Grow(termsPointerDeltas);
+                }
+
+                // save delta terms pointer
+                termsPointerDeltas[numIndexTerms] = (int)(termsFilePointer - lastTermsPointer);
+                lastTermsPointer = termsFilePointer;
+
+                // save term length (in bytes)
+                Debug.Assert(indexedTermLength <= Short.MAX_VALUE);
+                termLengths[numIndexTerms] = (short)indexedTermLength;
+                totTermLength += indexedTermLength;
+
+                lastTerm.CopyBytes(text);
+                numIndexTerms++;
+            }
+
+            public override void Finish(long termsFilePointer)
+            {
+
+                // write primary terms dict offsets
+                packedIndexStart = output.FilePointer;
+
+                PackedInts.Writer w = PackedInts.GetWriter(output, numIndexTerms,
+                    PackedInts.BitsRequired(termsFilePointer),
+                    PackedInts.DEFAULT);
+
+                // relative to our indexStart
+                long upto = 0;
+                for (int i = 0; i < numIndexTerms; i++)
+                {
+                    upto += termsPointerDeltas[i];
+                    w.Add(upto);
+                }
+                w.Finish();
+
+                packedOffsetsStart = output.FilePointer;
+
+                // write offsets into the byte[] terms
+                w = PackedInts.GetWriter(output, 1 + numIndexTerms, PackedInts.BitsRequired(totTermLength),
+                    PackedInts.DEFAULT);
+                upto = 0;
+                for (int i = 0; i < numIndexTerms; i++)
+                {
+                    w.Add(upto);
+                    upto += termLengths[i];
+                }
+                w.Add(upto);
+                w.Finish();
+
+                // our referrer holds onto us, while other fields are
+                // being written, so don't tie up this RAM:
+                termLengths = null;
+                termsPointerDeltas = null;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
new file mode 100644
index 0000000..aa15e4e
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexReaderBase.cs
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+
+
+    /// <summary>
+    /// TODO
+    ///   - allow for non-regular index intervals?  eg with a
+    ///     long string of rare terms, you don't need such
+    ///     frequent indexing
+    /// 
+    /// {@link BlockTermsReader} interacts with an instance of this class
+    /// to manage its terms index.  The writer must accept
+    /// indexed terms (many pairs of BytesRef text + long
+    /// fileOffset), and then this reader must be able to
+    /// retrieve the nearest index term to a provided term
+    /// text. 
+    ///
+    ///  @lucene.experimental */
+    /// </summary>
+    public abstract class TermsIndexReaderBase : IDisposable
+    {
+
+        public abstract FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo);
+
+        public abstract void Dispose();
+
+        public abstract bool SupportsOrd();
+
+        public abstract int GetDivisor();
+
+        /// <summary>
+        /// Similar to TermsEnum, except, the only "metadata" it
+        /// reports for a given indexed term is the long fileOffset
+        /// into the main terms dictionary file.
+        /// </summary>
+        public abstract class FieldIndexEnum
+        {
+
+            /// <summary> 
+            /// Seeks to "largest" indexed term that's <=
+            ///  term; returns file pointer index (into the main
+            /// terms index file) for that term 
+            /// </summary>
+            public abstract long Seek(BytesRef term);
+
+            /** Returns -1 at end */
+            public abstract long Next();
+
+            public abstract BytesRef Term();
+
+            /// <summary></summary>
+            /// <remarks>Only implemented if {@link TermsIndexReaderBase.supportsOrd()} 
+            /// returns true</remarks>
+            /// <returns></returns>
+            public abstract long Seek(long ord);
+
+            /// <summary></summary>
+            /// <remarks>Only implemented if {@link TermsIndexReaderBase.supportsOrd()} 
+            /// returns true</remarks>
+            /// <returns></returns>
+            public abstract long Ord();
+        }
+
+        /// <summary>Returns approximate RAM bytes used</summary>
+        public abstract long RamBytesUsed();
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs
new file mode 100644
index 0000000..bd20e4e
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/TermsIndexWriterBase.cs
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+
+    /// <summary>
+    ///  Base class for terms index implementations to plug
+    /// into {@link BlockTermsWriter}.
+    /// 
+    /// @see TermsIndexReaderBase
+    /// @lucene.experimental 
+    /// </summary>
+    public abstract class TermsIndexWriterBase : IDisposable
+    {
+
+        public abstract FieldWriter AddField(FieldInfo fieldInfo, long termsFilePointer);
+
+        public void Dispose()
+        {
+            //
+        }
+
+
+        /// <summary>Terms index API for a single field</summary>
+        public abstract class FieldWriter
+        {
+            public abstract bool CheckIndexTerm(BytesRef text, TermStats stats);
+            public abstract void Add(BytesRef text, TermStats stats, long termsFilePointer);
+            public abstract void Finish(long termsFilePointer);
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
new file mode 100644
index 0000000..0c97779
--- /dev/null
+++ b/src/Lucene.Net.Codecs/BlockTerms/VariableGapTermsIndexReader.cs
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Codecs.BlockTerms
+{
+
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics;
+    using Lucene.Net.Index;
+    using Lucene.Net.Store;
+    using Lucene.Net.Util;
+    using Lucene.Net.Util.Fst;
+
+/** See {@link VariableGapTermsIndexWriter}
+ * 
+ * @lucene.experimental */
+
+    public class VariableGapTermsIndexReader : TermsIndexReaderBase
+    {
+
+        private readonly PositiveIntOutputs fstOutputs = PositiveIntOutputs.Singleton;
+        private readonly int indexDivisor;
+        private readonly IndexInput input;       // Closed if indexLoaded is true:
+        private volatile bool indexLoaded;
+
+        private readonly Dictionary<FieldInfo, FieldIndexData> fields = new Dictionary<FieldInfo, FieldIndexData>();
+
+        private long dirOffset;                 // start of the field info data
+        private readonly int version;
+        private readonly String segment;
+
+        public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor,
+            String segmentSuffix, IOContext context)
+        {
+            input =
+                dir.OpenInput(
+                    IndexFileNames.SegmentFileName(segment, segmentSuffix,
+                        VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true));
+            this.segment = segment;
+            bool success = false;
+
+            Debug.Debug.Assert((indexDivisor == -1 || indexDivisor > 0);
+
+            try
+            {
+
+                version = readHeader(input);
+                this.indexDivisor = indexDivisor;
+
+                if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
+                {
+                    CodecUtil.ChecksumEntireFile(input);
+                }
+
+                SeekDir(in,
+                dirOffset)
+                ;
+
+                // Read directory
+                int numFields = input.ReadVInt();
+                if (numFields < 0)
+                {
+                    throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")");
+                }
+
+                for (int i = 0; i < numFields; i++)
+                {
+                    final
+                    int field = in.
+                    readVInt();
+                    final
+                    long indexStart = in.
+                    readVLong();
+                    final
+                    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+                    FieldIndexData previous = fields.put(fieldInfo, new FieldIndexData(fieldInfo, indexStart));
+                    if (previous != null)
+                    {
+                        throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" +in + ")" )
+                        ;
+                    }
+                }
+                success = true;
+            }
+            finally
+            {
+                if (indexDivisor > 0)
+                {
+                in.
+                    close();
+                    in =
+                    null;
+                    if (success)
+                    {
+                        indexLoaded = true;
+                    }
+                }
+            }
+        }
+
+
+        private int ReadHeader(IndexInput input)
+        {
+            int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
+                VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT);
+            if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY)
+            {
+                dirOffset = input.ReadLong();
+            }
+            return version;
+        }
+
+        public override void Dispose()
+        {
+            throw new NotImplementedException();
+        }
+
+        public override bool SupportsOrd()
+        {
+            return false;
+        }
+
+        public override int GetDivisor()
+        {
+            return indexDivisor;
+        }
+
+        public override FieldIndexEnum GetFieldEnum(FieldInfo fieldInfo)
+        {
+            FieldIndexData fieldData = fields[fieldInfo];
+            if (fieldData.Fst == null)
+            {
+                return null;
+            }
+            else
+            {
+                return new IndexEnum(fieldData.Fst);
+            }
+        }
+
+        public override void Close()
+        {
+            if (input !=
+                null && !indexLoaded)
+            {
+                input.Close();
+            }
+        }
+
+        private void SeekDir(IndexInput input, long dirOffset)
+        {
+            if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM)
+            {
+                input.Seek(input.Length() - CodecUtil.FooterLength() - 8);
+                dirOffset = input.ReadLong();
+            }
+            else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY)
+            {
+                input.Seek(input.Length() - 8);
+                dirOffset = input.ReadLong();
+            }
+            input.Seek(dirOffset);
+        }
+
+        public override long RamBytesUsed()
+        {
+            long sizeInBytes = 0;
+
+            foreach (var entry in fields.Values)
+            {
+                sizeInBytes += entry.RamBytesUsed();
+            }
+
+            return sizeInBytes;
+        }
+
+
+        internal class FieldIndexData
+        {
+
+            private readonly long indexStart;
+            // Set only if terms index is loaded:
+            public volatile FST<long> Fst;
+
+            public FieldIndexData(FieldInfo fieldInfo, long indexStart)
+            {
+                this.indexStart = indexStart;
+
+                if (indexDivisor > 0)
+                {
+                    loadTermsIndex();
+                }
+            }
+
+            private void loadTermsIndex()
+            {
+                if (Fst == null)
+                {
+                    IndexInput clone = input.Clone();
+                    clone.Seek(indexStart);
+                    Fst = new FST<>(clone, fstOutputs);
+                    clone.Close();
+
+                    /*
+        final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+        Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+        Util.toDot(fst, w, false, false);
+        System.out.println("FST INDEX: SAVED to " + dotFileName);
+        w.close();
+        */
+
+                    if (indexDivisor > 1)
+                    {
+                        // subsample
+                        IntsRef scratchIntsRef = new IntsRef();
+                        PositiveIntOutputs outputs = PositiveIntOutputs.GetSingleton();
+                        Builder<long> builder = new Builder<long>(FST.INPUT_TYPE.BYTE1, outputs);
+                        BytesRefFSTEnum<long> fstEnum = new BytesRefFSTEnum<long>(fst);
+                        BytesRefFSTEnum.InputOutput<long> result;
+                        int count = indexDivisor;
+                        while ((result = fstEnum.Next()) != null)
+                        {
+                            if (count == indexDivisor)
+                            {
+                                builder.Add(Util.ToIntsRef(result.Input, scratchIntsRef), result.Output);
+                                count = 0;
+                            }
+                            count++;
+                        }
+                        Fst = builder.Finish();
+                    }
+                }
+            }
+
+            /** Returns approximate RAM bytes used */
+
+            public long RamBytesUsed()
+            {
+                return Fst == null ? 0 : Fst.SizeInBytes();
+            }
+        }
+
+        internal class IndexEnum : FieldIndexEnum
+        {
+            private readonly BytesRefFSTEnum<long> fstEnum;
+            private BytesRefFSTEnum<long>.InputOutput<long> current;
+
+            public IndexEnum(FST<long> fst)
+            {
+                fstEnum = new BytesRefFSTEnum<long>(fst);
+            }
+
+            public override BytesRef Term()
+            {
+                if (current == null)
+                {
+                    return null;
+                }
+                else
+                {
+                    return current.Input;
+                }
+            }
+
+            public override long Seek(BytesRef target)
+            {
+                //System.out.println("VGR: seek field=" + fieldInfo.name + " target=" + target);
+                current = fstEnum.SeekFloor(target);
+                //System.out.println("  got input=" + current.input + " output=" + current.output);
+                return current.Output;
+            }
+
+            public override long Next()
+            {
+                //System.out.println("VGR: next field=" + fieldInfo.name);
+                current = fstEnum.Next();
+                if (current == null)
+                {
+                    //System.out.println("  eof");
+                    return -1;
+                }
+                else
+                {
+                    return current.Output;
+                }
+            }
+
+            public override long Ord()
+            {
+                throw new NotImplementedException();
+            }
+
+            public override long Seek(long ord)
+            {
+                throw new NotImplementedException();
+            }
+        }
+
+    }
+}


Mime
View raw message