lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [02/51] [abbrv] [partial] Cleaning up and getting ready to development towards v4.8
Date Sat, 06 Sep 2014 19:36:13 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Sep/SepPostingsWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Sep/SepPostingsWriter.cs b/src/Lucene.Net.Codecs/Sep/SepPostingsWriter.cs
new file mode 100644
index 0000000..ced150e
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Sep/SepPostingsWriter.cs
@@ -0,0 +1,371 @@
+package org.apache.lucene.codecs.sep;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.BlockTermState;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
+ *  to .pyl, skip data to .skp
+ *
+ * @lucene.experimental */
+public final class SepPostingsWriter extends PostingsWriterBase {
+  final static String CODEC = "SepPostingsWriter";
+
+  final static String DOC_EXTENSION = "doc";
+  final static String SKIP_EXTENSION = "skp";
+  final static String FREQ_EXTENSION = "frq";
+  final static String POS_EXTENSION = "pos";
+  final static String PAYLOAD_EXTENSION = "pyl";
+
+  // Increment version to change it:
+  final static int VERSION_START = 0;
+  final static int VERSION_CURRENT = VERSION_START;
+
+  IntIndexOutput freqOut;
+  IntIndexOutput.Index freqIndex;
+
+  IntIndexOutput posOut;
+  IntIndexOutput.Index posIndex;
+
+  IntIndexOutput docOut;
+  IntIndexOutput.Index docIndex;
+
+  IndexOutput payloadOut;
+
+  IndexOutput skipOut;
+
+  final SepSkipListWriter skipListWriter;
+  /** Expert: The fraction of TermDocs entries stored in skip tables,
+   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  final int skipInterval;
+  static final int DEFAULT_SKIP_INTERVAL = 16;
+  
+  /**
+   * Expert: minimum docFreq to write any skip data at all
+   */
+  final int skipMinimum;
+
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  final int maxSkipLevels = 10;
+
+  final int totalNumDocs;
+
+  bool storePayloads;
+  IndexOptions indexOptions;
+
+  FieldInfo fieldInfo;
+
+  int lastPayloadLength;
+  int lastPosition;
+  long payloadStart;
+  int lastDocID;
+  int df;
+
+  SepTermState lastState;
+  long lastPayloadFP;
+  long lastSkipFP;
+
+  public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory)  {
+    this(state, factory, DEFAULT_SKIP_INTERVAL);
+  }
+
+  public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory, int skipInterval)  {
+    freqOut = null;
+    freqIndex = null;
+    posOut = null;
+    posIndex = null;
+    payloadOut = null;
+    bool success = false;
+    try {
+      this.skipInterval = skipInterval;
+      this.skipMinimum = skipInterval; /* set to the same for now */
+      final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
+
+      docOut = factory.createOutput(state.directory, docFileName, state.context);
+      docIndex = docOut.index();
+
+      if (state.fieldInfos.hasFreq()) {
+        final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
+        freqOut = factory.createOutput(state.directory, frqFileName, state.context);
+        freqIndex = freqOut.index();
+      }
+
+      if (state.fieldInfos.hasProx()) {      
+        final String posFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, POS_EXTENSION);
+        posOut = factory.createOutput(state.directory, posFileName, state.context);
+        posIndex = posOut.index();
+        
+        // TODO: -- only if at least one field stores payloads?
+        final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION);
+        payloadOut = state.directory.createOutput(payloadFileName, state.context);
+      }
+
+      final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION);
+      skipOut = state.directory.createOutput(skipFileName, state.context);
+      
+      totalNumDocs = state.segmentInfo.getDocCount();
+      
+      skipListWriter = new SepSkipListWriter(skipInterval,
+          maxSkipLevels,
+          totalNumDocs,
+          freqOut, docOut,
+          posOut, payloadOut);
+      
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(docOut, skipOut, freqOut, posOut, payloadOut);
+      }
+    }
+  }
+
+  @Override
+  public void init(IndexOutput termsOut)  {
+    CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    // TODO: -- just ask skipper to "start" here
+    termsOut.writeInt(skipInterval);                // write skipInterval
+    termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    termsOut.writeInt(skipMinimum);                 // write skipMinimum
+  }
+
+  @Override
+  public BlockTermState newTermState() {
+    return new SepTermState();
+  }
+
+  @Override
+  public void startTerm()  {
+    docIndex.mark();
+    //System.out.println("SEPW: startTerm docIndex=" + docIndex);
+
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      freqIndex.mark();
+    }
+    
+    if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      posIndex.mark();
+      payloadStart = payloadOut.getFilePointer();
+      lastPayloadLength = -1;
+    }
+
+    skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
+  }
+
+  // Currently, this instance is re-used across fields, so
+  // our parent calls setField whenever the field changes
+  @Override
+  public int setField(FieldInfo fieldInfo) {
+    this.fieldInfo = fieldInfo;
+    this.indexOptions = fieldInfo.getIndexOptions();
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
+      throw new UnsupportedOperationException("this codec cannot index offsets");
+    }
+    skipListWriter.setIndexOptions(indexOptions);
+    storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
+    lastPayloadFP = 0;
+    lastSkipFP = 0;
+    lastState = setEmptyState();
+    return 0;
+  }
+
+  private SepTermState setEmptyState() {
+    SepTermState emptyState = new SepTermState();
+    emptyState.docIndex = docOut.index();
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      emptyState.freqIndex = freqOut.index();
+      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        emptyState.posIndex = posOut.index();
+      }
+    }
+    emptyState.payloadFP = 0;
+    emptyState.skipFP = 0;
+    return emptyState;
+  }
+
+  /** Adds a new doc in this term.  If this returns null
+   *  then we just skip consuming positions/payloads. */
+  @Override
+  public void startDoc(int docID, int termDocFreq)  {
+
+    final int delta = docID - lastDocID;
+    //System.out.println("SEPW: startDoc: write doc=" + docID + " delta=" + delta + " out.fp=" + docOut);
+
+    if (docID < 0 || (df > 0 && delta <= 0)) {
+      throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
+    }
+
+    if ((++df % skipInterval) == 0) {
+      // TODO: -- awkward we have to make these two
+      // separate calls to skipper
+      //System.out.println("    buffer skip lastDocID=" + lastDocID);
+      skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
+      skipListWriter.bufferSkip(df);
+    }
+
+    lastDocID = docID;
+    docOut.write(delta);
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      //System.out.println("    sepw startDoc: write freq=" + termDocFreq);
+      freqOut.write(termDocFreq);
+    }
+  }
+
+  /** Add a new position & payload */
+  @Override
+  public void addPosition(int position, BytesRef payload, int startOffset, int endOffset)  {
+    Debug.Assert( indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+
+    final int delta = position - lastPosition;
+    Debug.Assert( delta >= 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
+    lastPosition = position;
+
+    if (storePayloads) {
+      final int payloadLength = payload == null ? 0 : payload.length;
+      if (payloadLength != lastPayloadLength) {
+        lastPayloadLength = payloadLength;
+        // TODO: explore whether we get better compression
+        // by not storing payloadLength into prox stream?
+        posOut.write((delta<<1)|1);
+        posOut.write(payloadLength);
+      } else {
+        posOut.write(delta << 1);
+      }
+
+      if (payloadLength > 0) {
+        payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength);
+      }
+    } else {
+      posOut.write(delta);
+    }
+
+    lastPosition = position;
+  }
+
+  /** Called when we are done adding positions & payloads */
+  @Override
+  public void finishDoc() {       
+    lastPosition = 0;
+  }
+
+  private static class SepTermState extends BlockTermState {
+    public IntIndexOutput.Index docIndex;
+    public IntIndexOutput.Index freqIndex;
+    public IntIndexOutput.Index posIndex;
+    public long payloadFP;
+    public long skipFP;
+  }
+
+  /** Called when we are done adding docs to this term */
+  @Override
+  public void finishTerm(BlockTermState _state)  {
+    SepTermState state = (SepTermState)_state;
+    // TODO: -- wasteful we are counting this in two places?
+    Debug.Assert( state.docFreq > 0;
+    Debug.Assert( state.docFreq == df;
+
+    state.docIndex = docOut.index();
+    state.docIndex.copyFrom(docIndex, false);
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      state.freqIndex = freqOut.index();
+      state.freqIndex.copyFrom(freqIndex, false);
+      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        state.posIndex = posOut.index();
+        state.posIndex.copyFrom(posIndex, false);
+      } else {
+        state.posIndex = null;
+      }
+    } else {
+      state.freqIndex = null;
+      state.posIndex = null;
+    }
+
+    if (df >= skipMinimum) {
+      state.skipFP = skipOut.getFilePointer();
+      //System.out.println("  skipFP=" + skipFP);
+      skipListWriter.writeSkip(skipOut);
+      //System.out.println("    numBytes=" + (skipOut.getFilePointer()-skipFP));
+    } else {
+      state.skipFP = -1;
+    }
+    state.payloadFP = payloadStart;
+
+    lastDocID = 0;
+    df = 0;
+  }
+
+  @Override
+  public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, bool absolute)  {
+    SepTermState state = (SepTermState)_state;
+    if (absolute) {
+      lastSkipFP = 0;
+      lastPayloadFP = 0;
+      lastState = state;
+    }
+    lastState.docIndex.copyFrom(state.docIndex, false);
+    lastState.docIndex.write(out, absolute);
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      lastState.freqIndex.copyFrom(state.freqIndex, false);
+      lastState.freqIndex.write(out, absolute);
+      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        lastState.posIndex.copyFrom(state.posIndex, false);
+        lastState.posIndex.write(out, absolute);
+        if (storePayloads) {
+          if (absolute) {
+            out.writeVLong(state.payloadFP);
+          } else {
+            out.writeVLong(state.payloadFP - lastPayloadFP);
+          }
+          lastPayloadFP = state.payloadFP;
+        }
+      }
+    }
+    if (state.skipFP != -1) {
+      if (absolute) {
+        out.writeVLong(state.skipFP);
+      } else {
+        out.writeVLong(state.skipFP - lastSkipFP);
+      }
+      lastSkipFP = state.skipFP;
+    }
+  }
+
+  @Override
+  public void close()  {
+    IOUtils.close(docOut, skipOut, freqOut, posOut, payloadOut);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Sep/SepSkipListReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Sep/SepSkipListReader.cs b/src/Lucene.Net.Codecs/Sep/SepSkipListReader.cs
new file mode 100644
index 0000000..df6dda1
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Sep/SepSkipListReader.cs
@@ -0,0 +1,209 @@
+package org.apache.lucene.codecs.sep;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.codecs.MultiLevelSkipListReader;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+
+/**
+ * Implements the skip list reader for the default posting list format
+ * that stores positions and payloads.
+ *
+ * @lucene.experimental
+ */
+
+// TODO: rewrite this as recursive classes?
+class SepSkipListReader extends MultiLevelSkipListReader {
+  private bool currentFieldStoresPayloads;
+  private IntIndexInput.Index freqIndex[];
+  private IntIndexInput.Index docIndex[];
+  private IntIndexInput.Index posIndex[];
+  private long payloadPointer[];
+  private int payloadLength[];
+
+  private final IntIndexInput.Index lastFreqIndex;
+  private final IntIndexInput.Index lastDocIndex;
+  // TODO: -- make private again
+  final IntIndexInput.Index lastPosIndex;
+  
+  private long lastPayloadPointer;
+  private int lastPayloadLength;
+                           
+  SepSkipListReader(IndexInput skipStream,
+                    IntIndexInput freqIn,
+                    IntIndexInput docIn,
+                    IntIndexInput posIn,
+                    int maxSkipLevels,
+                    int skipInterval)
+     {
+    super(skipStream, maxSkipLevels, skipInterval);
+    if (freqIn != null) {
+      freqIndex = new IntIndexInput.Index[maxSkipLevels];
+    }
+    docIndex = new IntIndexInput.Index[maxSkipLevels];
+    if (posIn != null) {
+      posIndex = new IntIndexInput.Index[maxNumberOfSkipLevels];
+    }
+    for(int i=0;i<maxSkipLevels;i++) {
+      if (freqIn != null) {
+        freqIndex[i] = freqIn.index();
+      }
+      docIndex[i] = docIn.index();
+      if (posIn != null) {
+        posIndex[i] = posIn.index();
+      }
+    }
+    payloadPointer = new long[maxSkipLevels];
+    payloadLength = new int[maxSkipLevels];
+
+    if (freqIn != null) {
+      lastFreqIndex = freqIn.index();
+    } else {
+      lastFreqIndex = null;
+    }
+    lastDocIndex = docIn.index();
+    if (posIn != null) {
+      lastPosIndex = posIn.index();
+    } else {
+      lastPosIndex = null;
+    }
+  }
+  
+  IndexOptions indexOptions;
+
+  void setIndexOptions(IndexOptions v) {
+    indexOptions = v;
+  }
+
+  void init(long skipPointer,
+            IntIndexInput.Index docBaseIndex,
+            IntIndexInput.Index freqBaseIndex,
+            IntIndexInput.Index posBaseIndex,
+            long payloadBasePointer,
+            int df,
+            bool storesPayloads) {
+
+    super.init(skipPointer, df);
+    this.currentFieldStoresPayloads = storesPayloads;
+
+    lastPayloadPointer = payloadBasePointer;
+
+    for(int i=0;i<maxNumberOfSkipLevels;i++) {
+      docIndex[i].copyFrom(docBaseIndex);
+      if (freqIndex != null) {
+        freqIndex[i].copyFrom(freqBaseIndex);
+      }
+      if (posBaseIndex != null) {
+        posIndex[i].copyFrom(posBaseIndex);
+      }
+    }
+    Arrays.fill(payloadPointer, payloadBasePointer);
+    Arrays.fill(payloadLength, 0);
+  }
+
+  long getPayloadPointer() {
+    return lastPayloadPointer;
+  }
+  
+  /** Returns the payload length of the payload stored just before 
+   * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} 
+   * has skipped.  */
+  int getPayloadLength() {
+    return lastPayloadLength;
+  }
+  
+  @Override
+  protected void seekChild(int level)  {
+    super.seekChild(level);
+    payloadPointer[level] = lastPayloadPointer;
+    payloadLength[level] = lastPayloadLength;
+  }
+  
+  @Override
+  protected void setLastSkipData(int level) {
+    super.setLastSkipData(level);
+
+    lastPayloadPointer = payloadPointer[level];
+    lastPayloadLength = payloadLength[level];
+    if (freqIndex != null) {
+      lastFreqIndex.copyFrom(freqIndex[level]);
+    }
+    lastDocIndex.copyFrom(docIndex[level]);
+    if (lastPosIndex != null) {
+      lastPosIndex.copyFrom(posIndex[level]);
+    }
+
+    if (level > 0) {
+      if (freqIndex != null) {
+        freqIndex[level-1].copyFrom(freqIndex[level]);
+      }
+      docIndex[level-1].copyFrom(docIndex[level]);
+      if (posIndex != null) {
+        posIndex[level-1].copyFrom(posIndex[level]);
+      }
+    }
+  }
+
+  IntIndexInput.Index getFreqIndex() {
+    return lastFreqIndex;
+  }
+
+  IntIndexInput.Index getPosIndex() {
+    return lastPosIndex;
+  }
+
+  IntIndexInput.Index getDocIndex() {
+    return lastDocIndex;
+  }
+
+  @Override
+  protected int readSkipData(int level, IndexInput skipStream)  {
+    int delta;
+    Debug.Assert( indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
+    if (currentFieldStoresPayloads) {
+      // the current field stores payloads.
+      // if the doc delta is odd then we have
+      // to read the current payload length
+      // because it differs from the length of the
+      // previous payload
+      delta = skipStream.readVInt();
+      if ((delta & 1) != 0) {
+        payloadLength[level] = skipStream.readVInt();
+      }
+      delta >>>= 1;
+    } else {
+      delta = skipStream.readVInt();
+    }
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      freqIndex[level].read(skipStream, false);
+    }
+    docIndex[level].read(skipStream, false);
+    if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      posIndex[level].read(skipStream, false);
+      if (currentFieldStoresPayloads) {
+        payloadPointer[level] += skipStream.readVInt();
+      }
+    }
+    
+    return delta;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs b/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs
new file mode 100644
index 0000000..7ace983
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Sep/SepSkipListWriter.cs
@@ -0,0 +1,200 @@
+package org.apache.lucene.codecs.sep;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.codecs.MultiLevelSkipListWriter;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+
+// TODO: -- skip data should somehow be more local to the
+// particular stream (doc, freq, pos, payload)
+
+/**
+ * Implements the skip list writer for the default posting list format
+ * that stores positions and payloads.
+ *
+ * @lucene.experimental
+ */
+class SepSkipListWriter extends MultiLevelSkipListWriter {
+  private int[] lastSkipDoc;
+  private int[] lastSkipPayloadLength;
+  private long[] lastSkipPayloadPointer;
+
+  private IntIndexOutput.Index[] docIndex;
+  private IntIndexOutput.Index[] freqIndex;
+  private IntIndexOutput.Index[] posIndex;
+  
+  private IntIndexOutput freqOutput;
+  // TODO: -- private again
+  IntIndexOutput posOutput;
+  // TODO: -- private again
+  IndexOutput payloadOutput;
+
+  private int curDoc;
+  private bool curStorePayloads;
+  private int curPayloadLength;
+  private long curPayloadPointer;
+  
+  SepSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount,
+                    IntIndexOutput freqOutput,
+                    IntIndexOutput docOutput,
+                    IntIndexOutput posOutput,
+                    IndexOutput payloadOutput)
+     {
+    super(skipInterval, numberOfSkipLevels, docCount);
+
+    this.freqOutput = freqOutput;
+    this.posOutput = posOutput;
+    this.payloadOutput = payloadOutput;
+    
+    lastSkipDoc = new int[numberOfSkipLevels];
+    lastSkipPayloadLength = new int[numberOfSkipLevels];
+    // TODO: -- also cutover normal IndexOutput to use getIndex()?
+    lastSkipPayloadPointer = new long[numberOfSkipLevels];
+
+    freqIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+    docIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+    posIndex = new IntIndexOutput.Index[numberOfSkipLevels];
+
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      if (freqOutput != null) {
+        freqIndex[i] = freqOutput.index();
+      }
+      docIndex[i] = docOutput.index();
+      if (posOutput != null) {
+        posIndex[i] = posOutput.index();
+      }
+    }
+  }
+
+  IndexOptions indexOptions;
+
+  void setIndexOptions(IndexOptions v) {
+    indexOptions = v;
+  }
+
+  void setPosOutput(IntIndexOutput posOutput)  {
+    this.posOutput = posOutput;
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      posIndex[i] = posOutput.index();
+    }
+  }
+
+  void setPayloadOutput(IndexOutput payloadOutput) {
+    this.payloadOutput = payloadOutput;
+  }
+
+  /**
+   * Sets the values for the current skip data. 
+   */
+  // Called @ every index interval (every 128th (by default)
+  // doc)
+  void setSkipData(int doc, bool storePayloads, int payloadLength) {
+    this.curDoc = doc;
+    this.curStorePayloads = storePayloads;
+    this.curPayloadLength = payloadLength;
+    if (payloadOutput != null) {
+      this.curPayloadPointer = payloadOutput.getFilePointer();
+    }
+  }
+
+  // Called @ start of new term
+  protected void resetSkip(IntIndexOutput.Index topDocIndex, IntIndexOutput.Index topFreqIndex, IntIndexOutput.Index topPosIndex)
+     {
+    super.resetSkip();
+
+    Arrays.fill(lastSkipDoc, 0);
+    Arrays.fill(lastSkipPayloadLength, -1);  // we don't have to write the first length in the skip list
+    for(int i=0;i<numberOfSkipLevels;i++) {
+      docIndex[i].copyFrom(topDocIndex, true);
+      if (freqOutput != null) {
+        freqIndex[i].copyFrom(topFreqIndex, true);
+      }
+      if (posOutput != null) {
+        posIndex[i].copyFrom(topPosIndex, true);
+      }
+    }
+    if (payloadOutput != null) {
+      Arrays.fill(lastSkipPayloadPointer, payloadOutput.getFilePointer());
+    }
+  }
+  
+  @Override
+  protected void writeSkipData(int level, IndexOutput skipBuffer)  {
+    // To efficiently store payloads in the posting lists we do not store the length of
+    // every payload. Instead we omit the length for a payload if the previous payload had
+    // the same length.
+    // However, in order to support skipping the payload length at every skip point must be known.
+    // So we use the same length encoding that we use for the posting lists for the skip data as well:
+    // Case 1: current field does not store payloads
+    //           SkipDatum                 --> DocSkip, FreqSkip, ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           DocSkip records the document number before every SkipInterval th  document in TermFreqs. 
+    //           Document numbers are represented as differences from the previous value in the sequence.
+    // Case 2: current field stores payloads
+    //           SkipDatum                 --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
+    //           DocSkip,FreqSkip,ProxSkip --> VInt
+    //           PayloadLength             --> VInt    
+    //         In this case DocSkip/2 is the difference between
+    //         the current and the previous value. If DocSkip
+    //         is odd, then a PayloadLength encoded as VInt follows,
+    //         if DocSkip is even, then it is assumed that the
+    //         current payload length equals the length at the previous
+    //         skip point
+
+    Debug.Assert( indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
+
+    if (curStorePayloads) {
+      int delta = curDoc - lastSkipDoc[level];
+      if (curPayloadLength == lastSkipPayloadLength[level]) {
+        // the current payload length equals the length at the previous skip point,
+        // so we don't store the length again
+        skipBuffer.writeVInt(delta << 1);
+      } else {
+        // the payload length is different from the previous one. We shift the DocSkip, 
+        // set the lowest bit and store the current payload length as VInt.
+        skipBuffer.writeVInt(delta << 1 | 1);
+        skipBuffer.writeVInt(curPayloadLength);
+        lastSkipPayloadLength[level] = curPayloadLength;
+      }
+    } else {
+      // current field does not store payloads
+      skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
+    }
+
+    if (indexOptions != IndexOptions.DOCS_ONLY) {
+      freqIndex[level].mark();
+      freqIndex[level].write(skipBuffer, false);
+    }
+    docIndex[level].mark();
+    docIndex[level].write(skipBuffer, false);
+    if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      posIndex[level].mark();
+      posIndex[level].write(skipBuffer, false);
+      if (curStorePayloads) {
+        skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level]));
+      }
+    }
+
+    lastSkipDoc[level] = curDoc;
+    lastSkipPayloadPointer[level] = curPayloadPointer;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
new file mode 100644
index 0000000..a26ed0e
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextCodec.cs
@@ -0,0 +1,89 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+
+/**
+ * plain text index format.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public final class SimpleTextCodec extends Codec {
+  private final PostingsFormat postings = new SimpleTextPostingsFormat();
+  private final StoredFieldsFormat storedFields = new SimpleTextStoredFieldsFormat();
+  private final SegmentInfoFormat segmentInfos = new SimpleTextSegmentInfoFormat();
+  private final FieldInfosFormat fieldInfosFormat = new SimpleTextFieldInfosFormat();
+  private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat();
+  private final NormsFormat normsFormat = new SimpleTextNormsFormat();
+  private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
+  private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
+  
+  public SimpleTextCodec() {
+    super("SimpleText");
+  }
+  
+  @Override
+  public PostingsFormat postingsFormat() {
+    return postings;
+  }
+
+  @Override
+  public StoredFieldsFormat storedFieldsFormat() {
+    return storedFields;
+  }
+  
+  @Override
+  public TermVectorsFormat termVectorsFormat() {
+    return vectorsFormat;
+  }
+  
+  @Override
+  public FieldInfosFormat fieldInfosFormat() {
+    return fieldInfosFormat;
+  }
+
+  @Override
+  public SegmentInfoFormat segmentInfoFormat() {
+    return segmentInfos;
+  }
+
+  @Override
+  public NormsFormat normsFormat() {
+    return normsFormat;
+  }
+  
+  @Override
+  public LiveDocsFormat liveDocsFormat() {
+    return liveDocs;
+  }
+
+  @Override
+  public DocValuesFormat docValuesFormat() {
+    return dvFormat;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesFormat.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesFormat.cs
new file mode 100644
index 0000000..83dd776
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesFormat.cs
@@ -0,0 +1,137 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * plain text doc values format.
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * <p>
+ * the .dat file contains the data.
+ *  for numbers this is a "fixed-width" file, for example a single byte range:
+ *  <pre>
+ *  field myField
+ *    type NUMERIC
+ *    minvalue 0
+ *    pattern 000
+ *  005
+ *  T
+ *  234
+ *  T
+ *  123
+ *  T
+ *  ...
+ *  </pre>
+ *  so a document's value (delta encoded from minvalue) can be retrieved by 
+ *  seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline. 
+ *  The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
+ *  
+ *  for bytes this is also a "fixed-width" file, for example:
+ *  <pre>
+ *  field myField
+ *    type BINARY
+ *    maxlength 6
+ *    pattern 0
+ *  length 6
+ *  foobar[space][space]
+ *  T
+ *  length 3
+ *  baz[space][space][space][space][space]
+ *  T
+ *  ...
+ *  </pre>
+ *  so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc
+ *  the extra 9 is 2 newlines, plus "length " itself.
+ *  the extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
+ *  
+ *  for sorted bytes this is a fixed-width file, for example:
+ *  <pre>
+ *  field myField
+ *    type SORTED
+ *    numvalues 10
+ *    maxLength 8
+ *    pattern 0
+ *    ordpattern 00
+ *  length 6
+ *  foobar[space][space]
+ *  length 3
+ *  baz[space][space][space][space][space]
+ *  ...
+ *  03
+ *  06
+ *  01
+ *  10
+ *  ...
+ *  </pre>
+ *  so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
+ *  a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
+ *  an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
+ *  
+ *  for sorted set this is a fixed-width file very similar to the SORTED case, for example:
+ *  <pre>
+ *  field myField
+ *    type SORTED_SET
+ *    numvalues 10
+ *    maxLength 8
+ *    pattern 0
+ *    ordpattern XXXXX
+ *  length 6
+ *  foobar[space][space]
+ *  length 3
+ *  baz[space][space][space][space][space]
+ *  ...
+ *  0,3,5   
+ *  1,2
+ *  
+ *  10
+ *  ...
+ *  </pre>
+ *  so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
+ *  a document's ord list can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
+ *  this is a comma-separated list, and its padded with spaces to be fixed width. so trim() and split() it.
+ *  and beware the empty string!
+ *  an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
+ *   
+ *  the reader can just scan this file when it opens, skipping over the data blocks
+ *  and saving the offset/etc for each field. 
+ *  @lucene.experimental
+ */
+public class SimpleTextDocValuesFormat extends DocValuesFormat {
+  
+  public SimpleTextDocValuesFormat() {
+    super("SimpleText");
+  }
+
+  @Override
+  public DocValuesConsumer fieldsConsumer(SegmentWriteState state)  {
+    return new SimpleTextDocValuesWriter(state, "dat");
+  }
+
+  @Override
+  public DocValuesProducer fieldsProducer(SegmentReadState state)  {
+    return new SimpleTextDocValuesReader(state, "dat");
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
new file mode 100644
index 0000000..63c3cf8
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesReader.cs
@@ -0,0 +1,489 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+class SimpleTextDocValuesReader extends DocValuesProducer {
+
+  static class OneField {
+    long dataStartFilePointer;
+    String pattern;
+    String ordPattern;
+    int maxLength;
+    bool fixedLength;
+    long minValue;
+    long numValues;
+  }
+
+  final int maxDoc;
+  final IndexInput data;
+  final BytesRef scratch = new BytesRef();
+  final Map<String,OneField> fields = new HashMap<>();
+  
+  public SimpleTextDocValuesReader(SegmentReadState state, String ext)  {
+    // System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " file=" + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext));
+    data = state.directory.openInput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
+    maxDoc = state.segmentInfo.getDocCount();
+    while(true) {
+      readLine();
+      //System.out.println("READ field=" + scratch.utf8ToString());
+      if (scratch.equals(END)) {
+        break;
+      }
+      Debug.Assert( startsWith(FIELD) : scratch.utf8ToString();
+      String fieldName = stripPrefix(FIELD);
+      //System.out.println("  field=" + fieldName);
+
+      OneField field = new OneField();
+      fields.put(fieldName, field);
+
+      readLine();
+      Debug.Assert( startsWith(TYPE) : scratch.utf8ToString();
+
+      DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
+      Debug.Assert( dvType != null;
+      if (dvType == DocValuesType.NUMERIC) {
+        readLine();
+        Debug.Assert( startsWith(MINVALUE): "got " + scratch.utf8ToString() + " field=" + fieldName + " ext=" + ext;
+        field.minValue = Long.parseLong(stripPrefix(MINVALUE));
+        readLine();
+        Debug.Assert( startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (1+field.pattern.length()+2) * maxDoc);
+      } else if (dvType == DocValuesType.BINARY) {
+        readLine();
+        Debug.Assert( startsWith(MAXLENGTH);
+        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
+        readLine();
+        Debug.Assert( startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength+2) * maxDoc);
+      } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) {
+        readLine();
+        Debug.Assert( startsWith(NUMVALUES);
+        field.numValues = Long.parseLong(stripPrefix(NUMVALUES));
+        readLine();
+        Debug.Assert( startsWith(MAXLENGTH);
+        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
+        readLine();
+        Debug.Assert( startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        readLine();
+        Debug.Assert( startsWith(ORDPATTERN);
+        field.ordPattern = stripPrefix(ORDPATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
+      } else {
+        throw new Debug.Assert(ionError();
+      }
+    }
+
+    // We should only be called from above if at least one
+    // field has DVs:
+    Debug.Assert( !fields.isEmpty();
+  }
+
+  @Override
+  public NumericDocValues getNumeric(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+    Debug.Assert( field != null;
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    Debug.Assert( field != null: "field=" + fieldInfo.name + " fields=" + fields;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    decoder.setParseBigDecimal(true);
+
+    return new NumericDocValues() {
+      @Override
+      public long get(int docID) {
+        try {
+          //System.out.println(Thread.currentThread().getName() + ": get docID=" + docID + " in=" + in);
+          if (docID < 0 || docID >= maxDoc) {
+            throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+          }
+          in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*docID);
+          SimpleTextUtil.readLine(in, scratch);
+          //System.out.println("parsing delta: " + scratch.utf8ToString());
+          BigDecimal bd;
+          try {
+            bd = (BigDecimal) decoder.parse(scratch.utf8ToString());
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse BigDecimal value (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          SimpleTextUtil.readLine(in, scratch); // read the line telling us if its real or not
+          return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+    };
+  }
+  
+  private Bits getNumericDocsWithField(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    return new Bits() {
+      @Override
+      public bool get(int index) {
+        try {
+          in.seek(field.dataStartFilePointer + (1+field.pattern.length()+2)*index);
+          SimpleTextUtil.readLine(in, scratch); // data
+          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+          return scratch.bytes[scratch.offset] == (byte) 'T';
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+
+      @Override
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
+
+  @Override
+  public BinaryDocValues getBinary(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    Debug.Assert( field != null;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new BinaryDocValues() {
+      @Override
+      public void get(int docID, BytesRef result) {
+        try {
+          if (docID < 0 || docID >= maxDoc) {
+            throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+          }
+          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*docID);
+          SimpleTextUtil.readLine(in, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, LENGTH);
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          result.bytes = new byte[len];
+          result.offset = 0;
+          result.length = len;
+          in.readBytes(result.bytes, 0, len);
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+    };
+  }
+  
+  private Bits getBinaryDocsWithField(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new Bits() {
+      @Override
+      public bool get(int index) {
+        try {
+          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength+2)*index);
+          SimpleTextUtil.readLine(in, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, LENGTH);
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          // skip past bytes
+          byte bytes[] = new byte[len];
+          in.readBytes(bytes, 0, len);
+          SimpleTextUtil.readLine(in, scratch); // newline
+          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
+          return scratch.bytes[scratch.offset] == (byte) 'T';
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public int length() {
+        return maxDoc;
+      }
+    };
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    Debug.Assert( field != null;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+    final DecimalFormat ordDecoder = new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        if (docID < 0 || docID >= maxDoc) {
+          throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+        }
+        try {
+          in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
+          SimpleTextUtil.readLine(in, scratch);
+          try {
+            return (int) ordDecoder.parse(scratch.utf8ToString()).longValue()-1;
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse ord (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        try {
+          if (ord < 0 || ord >= field.numValues) {
+            throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
+          }
+          in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
+          SimpleTextUtil.readLine(in, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in;
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          result.bytes = new byte[len];
+          result.offset = 0;
+          result.length = len;
+          in.readBytes(result.bytes, 0, len);
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public int getValueCount() {
+        return (int)field.numValues;
+      }
+    };
+  }
+
+  @Override
+  public SortedSetDocValues getSortedSet(FieldInfo fieldInfo)  {
+    final OneField field = fields.get(fieldInfo.name);
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    Debug.Assert( field != null;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+    
+    return new SortedSetDocValues() {
+      String[] currentOrds = new String[0];
+      int currentIndex = 0;
+      
+      @Override
+      public long nextOrd() {
+        if (currentIndex == currentOrds.length) {
+          return NO_MORE_ORDS;
+        } else {
+          return Long.parseLong(currentOrds[currentIndex++]);
+        }
+      }
+
+      @Override
+      public void setDocument(int docID) {
+        if (docID < 0 || docID >= maxDoc) {
+          throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+        }
+        try {
+          in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
+          SimpleTextUtil.readLine(in, scratch);
+          String ordList = scratch.utf8ToString().trim();
+          if (ordList.isEmpty()) {
+            currentOrds = new String[0];
+          } else {
+            currentOrds = ordList.split(",");
+          }
+          currentIndex = 0;
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public void lookupOrd(long ord, BytesRef result) {
+        try {
+          if (ord < 0 || ord >= field.numValues) {
+            throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
+          }
+          in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
+          SimpleTextUtil.readLine(in, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in;
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, StandardCharsets.UTF_8)).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length (resource=" + in + ")");
+            e.initCause(pe);
+            throw e;
+          }
+          result.bytes = new byte[len];
+          result.offset = 0;
+          result.length = len;
+          in.readBytes(result.bytes, 0, len);
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public long getValueCount() {
+        return field.numValues;
+      }
+    };
+  }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field)  {
+    switch (field.getDocValuesType()) {
+      case SORTED_SET:
+        return DocValues.docsWithValue(getSortedSet(field), maxDoc);
+      case SORTED:
+        return DocValues.docsWithValue(getSorted(field), maxDoc);
+      case BINARY:
+        return getBinaryDocsWithField(field);
+      case NUMERIC:
+        return getNumericDocsWithField(field);
+      default:
+        throw new Debug.Assert(ionError();
+    }
+  }
+
+  @Override
+  public void close()  {
+    data.close();
+  }
+
+  /** Used only in ctor: */
+  private void readLine()  {
+    SimpleTextUtil.readLine(data, scratch);
+    //System.out.println("line: " + scratch.utf8ToString());
+  }
+
+  /** Used only in ctor: */
+  private bool startsWith(BytesRef prefix) {
+    return StringHelper.startsWith(scratch, prefix);
+  }
+
+  /** Used only in ctor: */
+  private String stripPrefix(BytesRef prefix)  {
+    return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, StandardCharsets.UTF_8);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return 0;
+  }
+
+  @Override
+  public void checkIntegrity()  {
+    BytesRef scratch = new BytesRef();
+    IndexInput clone = data.clone();
+    clone.seek(0);
+    ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
+    while(true) {
+      SimpleTextUtil.readLine(input, scratch);
+      if (scratch.equals(END)) {
+        SimpleTextUtil.checkFooter(input);
+        break;
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
new file mode 100644
index 0000000..6f89c10
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesWriter.cs
@@ -0,0 +1,411 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+class SimpleTextDocValuesWriter extends DocValuesConsumer {
+  final static BytesRef END     = new BytesRef("END");
+  final static BytesRef FIELD   = new BytesRef("field ");
+  final static BytesRef TYPE    = new BytesRef("  type ");
+  // used for numerics
+  final static BytesRef MINVALUE = new BytesRef("  minvalue ");
+  final static BytesRef PATTERN  = new BytesRef("  pattern ");
+  // used for bytes
+  final static BytesRef LENGTH = new BytesRef("length ");
+  final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
+  // used for sorted bytes
+  final static BytesRef NUMVALUES = new BytesRef("  numvalues ");
+  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern ");
+  
+  IndexOutput data;
+  final BytesRef scratch = new BytesRef();
+  final int numDocs;
+  private final Set<String> fieldsSeen = new HashSet<>(); // for Debug.Assert(ing
+  
+  public SimpleTextDocValuesWriter(SegmentWriteState state, String ext)  {
+    // System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
+    data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
+    numDocs = state.segmentInfo.getDocCount();
+  }
+
+  // for Debug.Assert(ing
+  private bool fieldSeen(String field) {
+    Debug.Assert( !fieldsSeen.contains(field): "field \"" + field + "\" was added more than once during flush";
+    fieldsSeen.add(field);
+    return true;
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values)  {
+    Debug.Assert( fieldSeen(field.name);
+    Debug.Assert( (field.getDocValuesType() == FieldInfo.DocValuesType.NUMERIC ||
+            field.getNormType() == FieldInfo.DocValuesType.NUMERIC);
+    writeFieldEntry(field, FieldInfo.DocValuesType.NUMERIC);
+
+    // first pass to find min/max
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    for(Number n : values) {
+      long v = n == null ? 0 : n.longValue();
+      minValue = Math.min(minValue, v);
+      maxValue = Math.max(maxValue, v);
+    }
+    
+    // write our minimum value to the .dat, all entries are deltas from that
+    SimpleTextUtil.write(data, MINVALUE);
+    SimpleTextUtil.write(data, Long.toString(minValue), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // build up our fixed-width "simple text packed ints"
+    // format
+    BigInteger maxBig = BigInteger.valueOf(maxValue);
+    BigInteger minBig = BigInteger.valueOf(minValue);
+    BigInteger diffBig = maxBig.subtract(minBig);
+    int maxBytesPerValue = diffBig.toString().length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesPerValue; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern to the .dat
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+
+    final String patternString = sb.toString();
+    
+    final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
+    
+    int numDocsWritten = 0;
+
+    // second pass to write the values
+    for(Number n : values) {
+      long value = n == null ? 0 : n.longValue();
+      Debug.Assert( value >= minValue;
+      Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
+      String s = encoder.format(delta);
+      Debug.Assert( s.length() == patternString.length();
+      SimpleTextUtil.write(data, s, scratch);
+      SimpleTextUtil.writeNewline(data);
+      if (n == null) {
+        SimpleTextUtil.write(data, "F", scratch);
+      } else {
+        SimpleTextUtil.write(data, "T", scratch);
+      }
+      SimpleTextUtil.writeNewline(data);
+      numDocsWritten++;
+      Debug.Assert( numDocsWritten <= numDocs;
+    }
+
+    Debug.Assert( numDocs == numDocsWritten: "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, Iterable<BytesRef> values)  {
+    Debug.Assert( fieldSeen(field.name);
+    Debug.Assert( field.getDocValuesType() == DocValuesType.BINARY;
+    int maxLength = 0;
+    for(BytesRef value : values) {
+      final int length = value == null ? 0 : value.length;
+      maxLength = Math.max(maxLength, length);
+    }
+    writeFieldEntry(field, FieldInfo.DocValuesType.BINARY);
+
+    // write maxLength
+    SimpleTextUtil.write(data, MAXLENGTH);
+    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    int maxBytesLength = Long.toString(maxLength).length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesLength; i++) {
+      sb.append('0');
+    }
+    // write our pattern for encoding lengths
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+
+    int numDocsWritten = 0;
+    for(BytesRef value : values) {
+      // write length
+      final int length = value == null ? 0 : value.length;
+      SimpleTextUtil.write(data, LENGTH);
+      SimpleTextUtil.write(data, encoder.format(length), scratch);
+      SimpleTextUtil.writeNewline(data);
+        
+      // write bytes -- don't use SimpleText.write
+      // because it escapes:
+      if (value != null) {
+        data.writeBytes(value.bytes, value.offset, value.length);
+      }
+
+      // pad to fit
+      for (int i = length; i < maxLength; i++) {
+        data.writeByte((byte)' ');
+      }
+      SimpleTextUtil.writeNewline(data);
+      if (value == null) {
+        SimpleTextUtil.write(data, "F", scratch);
+      } else {
+        SimpleTextUtil.write(data, "T", scratch);
+      }
+      SimpleTextUtil.writeNewline(data);
+      numDocsWritten++;
+    }
+
+    Debug.Assert( numDocs == numDocsWritten;
+  }
+  
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd)  {
+    Debug.Assert( fieldSeen(field.name);
+    Debug.Assert( field.getDocValuesType() == DocValuesType.SORTED;
+    writeFieldEntry(field, FieldInfo.DocValuesType.SORTED);
+
+    int valueCount = 0;
+    int maxLength = -1;
+    for(BytesRef value : values) {
+      maxLength = Math.max(maxLength, value.length);
+      valueCount++;
+    }
+
+    // write numValues
+    SimpleTextUtil.write(data, NUMVALUES);
+    SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // write maxLength
+    SimpleTextUtil.write(data, MAXLENGTH);
+    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    int maxBytesLength = Integer.toString(maxLength).length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesLength; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern for encoding lengths
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+    
+    int maxOrdBytes = Long.toString(valueCount+1L).length();
+    sb.setLength(0);
+    for (int i = 0; i < maxOrdBytes; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern for ords
+    SimpleTextUtil.write(data, ORDPATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+
+    // for Debug.Assert(s:
+    int valuesSeen = 0;
+
+    for(BytesRef value : values) {
+      // write length
+      SimpleTextUtil.write(data, LENGTH);
+      SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+      SimpleTextUtil.writeNewline(data);
+        
+      // write bytes -- don't use SimpleText.write
+      // because it escapes:
+      data.writeBytes(value.bytes, value.offset, value.length);
+
+      // pad to fit
+      for (int i = value.length; i < maxLength; i++) {
+        data.writeByte((byte)' ');
+      }
+      SimpleTextUtil.writeNewline(data);
+      valuesSeen++;
+      Debug.Assert( valuesSeen <= valueCount;
+    }
+
+    Debug.Assert( valuesSeen == valueCount;
+
+    for(Number ord : docToOrd) {
+      SimpleTextUtil.write(data, ordEncoder.format(ord.longValue()+1), scratch);
+      SimpleTextUtil.writeNewline(data);
+    }
+  }
+
+  @Override
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords)  {
+    Debug.Assert( fieldSeen(field.name);
+    Debug.Assert( field.getDocValuesType() == DocValuesType.SORTED_SET;
+    writeFieldEntry(field, FieldInfo.DocValuesType.SORTED_SET);
+
+    long valueCount = 0;
+    int maxLength = 0;
+    for(BytesRef value : values) {
+      maxLength = Math.max(maxLength, value.length);
+      valueCount++;
+    }
+
+    // write numValues
+    SimpleTextUtil.write(data, NUMVALUES);
+    SimpleTextUtil.write(data, Long.toString(valueCount), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // write maxLength
+    SimpleTextUtil.write(data, MAXLENGTH);
+    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    int maxBytesLength = Integer.toString(maxLength).length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesLength; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern for encoding lengths
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+    
+    // compute ord pattern: this is funny, we encode all values for all docs to find the maximum length
+    int maxOrdListLength = 0;
+    StringBuilder sb2 = new StringBuilder();
+    Iterator<Number> ordStream = ords.iterator();
+    for (Number n : docToOrdCount) {
+      sb2.setLength(0);
+      int count = n.intValue();
+      for (int i = 0; i < count; i++) {
+        long ord = ordStream.next().longValue();
+        if (sb2.length() > 0) {
+          sb2.append(",");
+        }
+        sb2.append(Long.toString(ord));
+      }
+      maxOrdListLength = Math.max(maxOrdListLength, sb2.length());
+    }
+     
+    sb2.setLength(0);
+    for (int i = 0; i < maxOrdListLength; i++) {
+      sb2.append('X');
+    }
+    
+    // write our pattern for ord lists
+    SimpleTextUtil.write(data, ORDPATTERN);
+    SimpleTextUtil.write(data, sb2.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // for Debug.Assert(s:
+    long valuesSeen = 0;
+
+    for(BytesRef value : values) {
+      // write length
+      SimpleTextUtil.write(data, LENGTH);
+      SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+      SimpleTextUtil.writeNewline(data);
+        
+      // write bytes -- don't use SimpleText.write
+      // because it escapes:
+      data.writeBytes(value.bytes, value.offset, value.length);
+
+      // pad to fit
+      for (int i = value.length; i < maxLength; i++) {
+        data.writeByte((byte)' ');
+      }
+      SimpleTextUtil.writeNewline(data);
+      valuesSeen++;
+      Debug.Assert( valuesSeen <= valueCount;
+    }
+
+    Debug.Assert( valuesSeen == valueCount;
+
+    ordStream = ords.iterator();
+    
+    // write the ords for each doc comma-separated
+    for(Number n : docToOrdCount) {
+      sb2.setLength(0);
+      int count = n.intValue();
+      for (int i = 0; i < count; i++) {
+        long ord = ordStream.next().longValue();
+        if (sb2.length() > 0) {
+          sb2.append(",");
+        }
+        sb2.append(Long.toString(ord));
+      }
+      // now pad to fit: these are numbers so spaces work well. reader calls trim()
+      int numPadding = maxOrdListLength - sb2.length();
+      for (int i = 0; i < numPadding; i++) {
+        sb2.append(' ');
+      }
+      SimpleTextUtil.write(data, sb2.toString(), scratch);
+      SimpleTextUtil.writeNewline(data);
+    }
+  }
+
+  /** write the header for this field */
+  private void writeFieldEntry(FieldInfo field, FieldInfo.DocValuesType type)  {
+    SimpleTextUtil.write(data, FIELD);
+    SimpleTextUtil.write(data, field.name, scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    SimpleTextUtil.write(data, TYPE);
+    SimpleTextUtil.write(data, type.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+  }
+  
+  @Override
+  public void close()  {
+    if (data != null) {
+      bool success = false;
+      try {
+        Debug.Assert( !fieldsSeen.isEmpty();
+        // TODO: sheisty to do this here?
+        SimpleTextUtil.write(data, END);
+        SimpleTextUtil.writeNewline(data);
+        SimpleTextUtil.writeChecksum(data, scratch);
+        success = true;
+      } finally {
+        if (success) {
+          IOUtils.close(data);
+        } else {
+          IOUtils.closeWhileHandlingException(data);
+        }
+        data = null;
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosFormat.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosFormat.cs
new file mode 100644
index 0000000..9c6b0e3
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosFormat.cs
@@ -0,0 +1,45 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FieldInfosReader;
+import org.apache.lucene.codecs.FieldInfosWriter;
+
+/**
+ * plaintext field infos format
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
+  private final FieldInfosReader reader = new SimpleTextFieldInfosReader();
+  private final FieldInfosWriter writer = new SimpleTextFieldInfosWriter();
+
+  @Override
+  public FieldInfosReader getFieldInfosReader()  {
+    return reader;
+  }
+
+  @Override
+  public FieldInfosWriter getFieldInfosWriter()  {
+    return writer;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosReader.cs
new file mode 100644
index 0000000..ae01b58
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosReader.cs
@@ -0,0 +1,157 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.codecs.FieldInfosReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldInfosWriter.*;
+
+/**
+ * reads plaintext field infos files
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextFieldInfosReader extends FieldInfosReader {
+
+  @Override
+  public FieldInfos read(Directory directory, String segmentName, String segmentSuffix, IOContext iocontext)  {
+    final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
+    ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
+    BytesRef scratch = new BytesRef();
+    
+    bool success = false;
+    try {
+      
+      SimpleTextUtil.readLine(input, scratch);
+      Debug.Assert( StringHelper.startsWith(scratch, NUMFIELDS);
+      final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
+      FieldInfo infos[] = new FieldInfo[size];
+
+      for (int i = 0; i < size; i++) {
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, NAME);
+        String name = readString(NAME.length, scratch);
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, NUMBER);
+        int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
+
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, ISINDEXED);
+        bool isIndexed = bool.parsebool(readString(ISINDEXED.length, scratch));
+        
+        final IndexOptions indexOptions;
+        if (isIndexed) {
+          SimpleTextUtil.readLine(input, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, INDEXOPTIONS);
+          indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));          
+        } else {
+          indexOptions = null;
+        }
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, STORETV);
+        bool storeTermVector = bool.parsebool(readString(STORETV.length, scratch));
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, PAYLOADS);
+        bool storePayloads = bool.parsebool(readString(PAYLOADS.length, scratch));
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, NORMS);
+        bool omitNorms = !bool.parsebool(readString(NORMS.length, scratch));
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, NORMS_TYPE);
+        String nrmType = readString(NORMS_TYPE.length, scratch);
+        final DocValuesType normsType = docValuesType(nrmType);
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, DOCVALUES);
+        String dvType = readString(DOCVALUES.length, scratch);
+        final DocValuesType docValuesType = docValuesType(dvType);
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, DOCVALUES_GEN);
+        final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
+        
+        SimpleTextUtil.readLine(input, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, NUM_ATTS);
+        int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
+        Map<String,String> atts = new HashMap<>();
+
+        for (int j = 0; j < numAtts; j++) {
+          SimpleTextUtil.readLine(input, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, ATT_KEY);
+          String key = readString(ATT_KEY.length, scratch);
+        
+          SimpleTextUtil.readLine(input, scratch);
+          Debug.Assert( StringHelper.startsWith(scratch, ATT_VALUE);
+          String value = readString(ATT_VALUE.length, scratch);
+          atts.put(key, value);
+        }
+
+        infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, 
+          omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(atts));
+        infos[i].setDocValuesGen(dvGen);
+      }
+
+      SimpleTextUtil.checkFooter(input);
+      
+      FieldInfos fieldInfos = new FieldInfos(infos);
+      success = true;
+      return fieldInfos;
+    } finally {
+      if (success) {
+        input.close();
+      } else {
+        IOUtils.closeWhileHandlingException(input);
+      }
+    }
+  }
+
+  public DocValuesType docValuesType(String dvType) {
+    if ("false".equals(dvType)) {
+      return null;
+    } else {
+      return DocValuesType.valueOf(dvType);
+    }
+  }
+  
+  private String readString(int offset, BytesRef scratch) {
+    return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, StandardCharsets.UTF_8);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosWriter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosWriter.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosWriter.cs
new file mode 100644
index 0000000..b3bdbed
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldInfosWriter.cs
@@ -0,0 +1,149 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * writes plaintext field infos files
+ * <p>
+ * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
+ * @lucene.experimental
+ */
+public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
+  
+  /** Extension of field infos */
+  static final String FIELD_INFOS_EXTENSION = "inf";
+  
+  static final BytesRef NUMFIELDS       =  new BytesRef("number of fields ");
+  static final BytesRef NAME            =  new BytesRef("  name ");
+  static final BytesRef NUMBER          =  new BytesRef("  number ");
+  static final BytesRef ISINDEXED       =  new BytesRef("  indexed ");
+  static final BytesRef STORETV         =  new BytesRef("  term vectors ");
+  static final BytesRef STORETVPOS      =  new BytesRef("  term vector positions ");
+  static final BytesRef STORETVOFF      =  new BytesRef("  term vector offsets ");
+  static final BytesRef PAYLOADS        =  new BytesRef("  payloads ");
+  static final BytesRef NORMS           =  new BytesRef("  norms ");
+  static final BytesRef NORMS_TYPE      =  new BytesRef("  norms type ");
+  static final BytesRef DOCVALUES       =  new BytesRef("  doc values ");
+  static final BytesRef DOCVALUES_GEN   =  new BytesRef("  doc values gen ");
+  static final BytesRef INDEXOPTIONS    =  new BytesRef("  index options ");
+  static final BytesRef NUM_ATTS        =  new BytesRef("  attributes ");
+  final static BytesRef ATT_KEY         =  new BytesRef("    key ");
+  final static BytesRef ATT_VALUE       =  new BytesRef("    value ");
+  
+  @Override
+  public void write(Directory directory, String segmentName, String segmentSuffix, FieldInfos infos, IOContext context)  {
+    final String fileName = IndexFileNames.segmentFileName(segmentName, segmentSuffix, FIELD_INFOS_EXTENSION);
+    IndexOutput out = directory.createOutput(fileName, context);
+    BytesRef scratch = new BytesRef();
+    bool success = false;
+    try {
+      SimpleTextUtil.write(out, NUMFIELDS);
+      SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
+      SimpleTextUtil.writeNewline(out);
+      
+      for (FieldInfo fi : infos) {
+        SimpleTextUtil.write(out, NAME);
+        SimpleTextUtil.write(out, fi.name, scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, NUMBER);
+        SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, ISINDEXED);
+        SimpleTextUtil.write(out, bool.toString(fi.isIndexed()), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        if (fi.isIndexed()) {
+          Debug.Assert( fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
+          SimpleTextUtil.write(out, INDEXOPTIONS);
+          SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
+          SimpleTextUtil.writeNewline(out);
+        }
+        
+        SimpleTextUtil.write(out, STORETV);
+        SimpleTextUtil.write(out, bool.toString(fi.hasVectors()), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, PAYLOADS);
+        SimpleTextUtil.write(out, bool.toString(fi.hasPayloads()), scratch);
+        SimpleTextUtil.writeNewline(out);
+               
+        SimpleTextUtil.write(out, NORMS);
+        SimpleTextUtil.write(out, bool.toString(!fi.omitsNorms()), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, NORMS_TYPE);
+        SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, DOCVALUES);
+        SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
+        SimpleTextUtil.writeNewline(out);
+        
+        SimpleTextUtil.write(out, DOCVALUES_GEN);
+        SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
+        SimpleTextUtil.writeNewline(out);
+               
+        Map<String,String> atts = fi.attributes();
+        int numAtts = atts == null ? 0 : atts.size();
+        SimpleTextUtil.write(out, NUM_ATTS);
+        SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
+        SimpleTextUtil.writeNewline(out);
+      
+        if (numAtts > 0) {
+          for (Map.Entry<String,String> entry : atts.entrySet()) {
+            SimpleTextUtil.write(out, ATT_KEY);
+            SimpleTextUtil.write(out, entry.getKey(), scratch);
+            SimpleTextUtil.writeNewline(out);
+          
+            SimpleTextUtil.write(out, ATT_VALUE);
+            SimpleTextUtil.write(out, entry.getValue(), scratch);
+            SimpleTextUtil.writeNewline(out);
+          }
+        }
+      }
+      SimpleTextUtil.writeChecksum(out, scratch);
+      success = true;
+    } finally {
+      if (success) {
+        out.close();
+      } else {
+        IOUtils.closeWhileHandlingException(out);
+      }
+    }
+  }
+  
+  private static String getDocValuesType(DocValuesType type) {
+    return type == null ? "false" : type.toString();
+  }
+}


Mime
View raw message