lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [01/51] [abbrv] [partial] Cleaning up and getting ready to development towards v4.8
Date Sat, 06 Sep 2014 19:36:12 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/master fb5123036 -> 1da1cb5b5


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
new file mode 100644
index 0000000..63e999a
--- /dev/null
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextFieldsReader.cs
@@ -0,0 +1,690 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.BufferedChecksumIndexInput;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PairOutputs;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
+import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
+
+class SimpleTextFieldsReader extends FieldsProducer {
+  private final TreeMap<String,Long> fields;
+  private final IndexInput in;
+  private final FieldInfos fieldInfos;
+  private final int maxDoc;
+
+  public SimpleTextFieldsReader(SegmentReadState state)  {
+    this.maxDoc = state.segmentInfo.getDocCount();
+    fieldInfos = state.fieldInfos;
+    in = state.directory.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name,
state.segmentSuffix), state.context);
+    bool success = false;
+    try {
+      fields = readFields(in.clone());
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+  
+  private TreeMap<String,Long> readFields(IndexInput in)  {
+    ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
+    BytesRef scratch = new BytesRef(10);
+    TreeMap<String,Long> fields = new TreeMap<>();
+    
+    while (true) {
+      SimpleTextUtil.readLine(input, scratch);
+      if (scratch.equals(END)) {
+        SimpleTextUtil.checkFooter(input);
+        return fields;
+      } else if (StringHelper.startsWith(scratch, FIELD)) {
+        String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length
- FIELD.length, StandardCharsets.UTF_8);
+        fields.put(fieldName, input.getFilePointer());
+      }
+    }
+  }
+
+  private class SimpleTextTermsEnum extends TermsEnum {
+    private final IndexOptions indexOptions;
+    private int docFreq;
+    private long totalTermFreq;
+    private long docsStart;
+    private bool ended;
+    private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
fstEnum;
+
+    public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
fst, IndexOptions indexOptions) {
+      this.indexOptions = indexOptions;
+      fstEnum = new BytesRefFSTEnum<>(fst);
+    }
+
+    @Override
+    public bool seekExact(BytesRef text)  {
+
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
result = fstEnum.seekExact(text);
+      if (result != null) {
+        PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
+        PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
+        docsStart = pair1.output1;
+        docFreq = pair2.output1.intValue();
+        totalTermFreq = pair2.output2;
+        return true;
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text)  {
+
+      //System.out.println("seek to text=" + text.utf8ToString());
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
result = fstEnum.seekCeil(text);
+      if (result == null) {
+        //System.out.println("  end");
+        return SeekStatus.END;
+      } else {
+        //System.out.println("  got text=" + term.utf8ToString());
+        PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
+        PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
+        docsStart = pair1.output1;
+        docFreq = pair2.output1.intValue();
+        totalTermFreq = pair2.output2;
+
+        if (result.input.equals(text)) {
+          //System.out.println("  match docsStart=" + docsStart);
+          return SeekStatus.FOUND;
+        } else {
+          //System.out.println("  not match docsStart=" + docsStart);
+          return SeekStatus.NOT_FOUND;
+        }
+      }
+    }
+
+    @Override
+    public BytesRef next()  {
+      Debug.Assert( !ended;
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
result = fstEnum.next();
+      if (result != null) {
+        PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
+        PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
+        docsStart = pair1.output1;
+        docFreq = pair2.output1.intValue();
+        totalTermFreq = pair2.output2;
+        return result.input;
+      } else {
+        return null;
+      }
+    }
+
+    @Override
+    public BytesRef term() {
+      return fstEnum.current().input;
+    }
+
+    @Override
+    public long ord()  {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void seekExact(long ord) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int docFreq() {
+      return docFreq;
+    }
+
+    @Override
+    public long totalTermFreq() {
+      return indexOptions == IndexOptions.DOCS_ONLY ? -1 : totalTermFreq;
+    }
+ 
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)  {
+      SimpleTextDocsEnum docsEnum;
+      if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum)
reuse).canReuse(SimpleTextFieldsReader.this.in)) {
+        docsEnum = (SimpleTextDocsEnum) reuse;
+      } else {
+        docsEnum = new SimpleTextDocsEnum();
+      }
+      return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY,
docFreq);
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse,
int flags)  {
+
+      if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+        // Positions were not indexed
+        return null;
+      }
+
+      SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
+      if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum &&
((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
+        docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
+      } else {
+        docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
+      } 
+      return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
+    }
+    
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+  }
+
+  private class SimpleTextDocsEnum extends DocsEnum {
+    private final IndexInput inStart;
+    private final IndexInput in;
+    private bool omitTF;
+    private int docID = -1;
+    private int tf;
+    private Bits liveDocs;
+    private final BytesRef scratch = new BytesRef(10);
+    private final CharsRef scratchUTF16 = new CharsRef(10);
+    private int cost;
+    
+    public SimpleTextDocsEnum() {
+      this.inStart = SimpleTextFieldsReader.this.in;
+      this.in = this.inStart.clone();
+    }
+
+    public bool canReuse(IndexInput in) {
+      return in == inStart;
+    }
+
+    public SimpleTextDocsEnum reset(long fp, Bits liveDocs, bool omitTF, int docFreq)  {
+      this.liveDocs = liveDocs;
+      in.seek(fp);
+      this.omitTF = omitTF;
+      docID = -1;
+      tf = 1;
+      cost = docFreq;
+      return this;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int freq()  {
+      return tf;
+    }
+
+    @Override
+    public int nextDoc()  {
+      if (docID == NO_MORE_DOCS) {
+        return docID;
+      }
+      bool first = true;
+      int termFreq = 0;
+      while(true) {
+        final long lineStart = in.getFilePointer();
+        SimpleTextUtil.readLine(in, scratch);
+        if (StringHelper.startsWith(scratch, DOC)) {
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            in.seek(lineStart);
+            if (!omitTF) {
+              tf = termFreq;
+            }
+            return docID;
+          }
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length,
scratchUTF16);
+          docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+          termFreq = 0;
+          first = false;
+        } else if (StringHelper.startsWith(scratch, FREQ)) {
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length,
scratchUTF16);
+          termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+        } else if (StringHelper.startsWith(scratch, POS)) {
+          // skip termFreq++;
+        } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
+          // skip
+        } else {
+          Debug.Assert( StringHelper.startsWith(scratch, TERM) || StringHelper.startsWith(scratch,
FIELD) || StringHelper.startsWith(scratch, END): "scratch=" + scratch.utf8ToString();
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            in.seek(lineStart);
+            if (!omitTF) {
+              tf = termFreq;
+            }
+            return docID;
+          }
+          return docID = NO_MORE_DOCS;
+        }
+      }
+    }
+
+    @Override
+    public int advance(int target)  {
+      // Naive -- better to index skip data
+      return slowAdvance(target);
+    }
+    
+    @Override
+    public long cost() {
+      return cost;
+    }
+  }
+
+  private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum {
+    private final IndexInput inStart;
+    private final IndexInput in;
+    private int docID = -1;
+    private int tf;
+    private Bits liveDocs;
+    private final BytesRef scratch = new BytesRef(10);
+    private final BytesRef scratch2 = new BytesRef(10);
+    private final CharsRef scratchUTF16 = new CharsRef(10);
+    private final CharsRef scratchUTF16_2 = new CharsRef(10);
+    private BytesRef payload;
+    private long nextDocStart;
+    private bool readOffsets;
+    private bool readPositions;
+    private int startOffset;
+    private int endOffset;
+    private int cost;
+
+    public SimpleTextDocsAndPositionsEnum() {
+      this.inStart = SimpleTextFieldsReader.this.in;
+      this.in = inStart.clone();
+    }
+
+    public bool canReuse(IndexInput in) {
+      return in == inStart;
+    }
+
+    public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions,
int docFreq) {
+      this.liveDocs = liveDocs;
+      nextDocStart = fp;
+      docID = -1;
+      readPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >=
0;
+      readOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
+      if (!readOffsets) {
+        startOffset = -1;
+        endOffset = -1;
+      }
+      cost = docFreq;
+      return this;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int freq()  {
+      return tf;
+    }
+
+    @Override
+    public int nextDoc()  {
+      bool first = true;
+      in.seek(nextDocStart);
+      long posStart = 0;
+      while(true) {
+        final long lineStart = in.getFilePointer();
+        SimpleTextUtil.readLine(in, scratch);
+        //System.out.println("NEXT DOC: " + scratch.utf8ToString());
+        if (StringHelper.startsWith(scratch, DOC)) {
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            nextDocStart = lineStart;
+            in.seek(posStart);
+            return docID;
+          }
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length,
scratchUTF16);
+          docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+          tf = 0;
+          first = false;
+        } else if (StringHelper.startsWith(scratch, FREQ)) {
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length,
scratchUTF16);
+          tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+          posStart = in.getFilePointer();
+        } else if (StringHelper.startsWith(scratch, POS)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
+          // skip
+        } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
+          // skip
+        } else {
+          Debug.Assert( StringHelper.startsWith(scratch, TERM) || StringHelper.startsWith(scratch,
FIELD) || StringHelper.startsWith(scratch, END);
+          if (!first && (liveDocs == null || liveDocs.get(docID))) {
+            nextDocStart = lineStart;
+            in.seek(posStart);
+            return docID;
+          }
+          return docID = NO_MORE_DOCS;
+        }
+      }
+    }
+
+    @Override
+    public int advance(int target)  {
+      // Naive -- better to index skip data
+      return slowAdvance(target);
+    }
+
+    @Override
+    public int nextPosition()  {
+      final int pos;
+      if (readPositions) {
+        SimpleTextUtil.readLine(in, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length,
scratchUTF16_2);
+        pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+      } else {
+        pos = -1;
+      }
+
+      if (readOffsets) {
+        SimpleTextUtil.readLine(in, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, START_OFFSET): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+START_OFFSET.length, scratch.length-START_OFFSET.length,
scratchUTF16_2);
+        startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+        SimpleTextUtil.readLine(in, scratch);
+        Debug.Assert( StringHelper.startsWith(scratch, END_OFFSET): "got line=" + scratch.utf8ToString();
+        UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+END_OFFSET.length, scratch.length-END_OFFSET.length,
scratchUTF16_2);
+        endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
+      }
+
+      final long fp = in.getFilePointer();
+      SimpleTextUtil.readLine(in, scratch);
+      if (StringHelper.startsWith(scratch, PAYLOAD)) {
+        final int len = scratch.length - PAYLOAD.length;
+        if (scratch2.bytes.length < len) {
+          scratch2.grow(len);
+        }
+        System.arraycopy(scratch.bytes, PAYLOAD.length, scratch2.bytes, 0, len);
+        scratch2.length = len;
+        payload = scratch2;
+      } else {
+        payload = null;
+        in.seek(fp);
+      }
+      return pos;
+    }
+
+    @Override
+    public int startOffset()  {
+      return startOffset;
+    }
+
+    @Override
+    public int endOffset()  {
+      return endOffset;
+    }
+
+    @Override
+    public BytesRef getPayload() {
+      return payload;
+    }
+    
+    @Override
+    public long cost() {
+      return cost;
+    }
+  }
+
+  static class TermData {
+    public long docsStart;
+    public int docFreq;
+
+    public TermData(long docsStart, int docFreq) {
+      this.docsStart = docsStart;
+      this.docFreq = docFreq;
+    }
+  }
+
+  private class SimpleTextTerms extends Terms {
+    private final long termsStart;
+    private final FieldInfo fieldInfo;
+    private final int maxDoc;
+    private long sumTotalTermFreq;
+    private long sumDocFreq;
+    private int docCount;
+    private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
+    private int termCount;
+    private final BytesRef scratch = new BytesRef(10);
+    private final CharsRef scratchUTF16 = new CharsRef(10);
+
+    public SimpleTextTerms(String field, long termsStart, int maxDoc)  {
+      this.maxDoc = maxDoc;
+      this.termsStart = termsStart;
+      fieldInfo = fieldInfos.fieldInfo(field);
+      loadTerms();
+    }
+
+    private void loadTerms()  {
+      PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
+      final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>
b;
+      final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs,
posIntOutputs);
+      final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
+                                                                                        
                             outputsInner);
+      b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
+      IndexInput in = SimpleTextFieldsReader.this.in.clone();
+      in.seek(termsStart);
+      final BytesRef lastTerm = new BytesRef(10);
+      long lastDocsStart = -1;
+      int docFreq = 0;
+      long totalTermFreq = 0;
+      FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
+      final IntsRef scratchIntsRef = new IntsRef();
+      while(true) {
+        SimpleTextUtil.readLine(in, scratch);
+        if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
+          if (lastDocsStart != -1) {
+            b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
+                  outputs.newPair(lastDocsStart,
+                                  outputsInner.newPair((long) docFreq, totalTermFreq)));
+            sumTotalTermFreq += totalTermFreq;
+          }
+          break;
+        } else if (StringHelper.startsWith(scratch, DOC)) {
+          docFreq++;
+          sumDocFreq++;
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length,
scratchUTF16);
+          int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+          visitedDocs.set(docID);
+        } else if (StringHelper.startsWith(scratch, FREQ)) {
+          UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length,
scratchUTF16);
+          totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
+        } else if (StringHelper.startsWith(scratch, TERM)) {
+          if (lastDocsStart != -1) {
+            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
+                                                                            outputsInner.newPair((long)
docFreq, totalTermFreq)));
+          }
+          lastDocsStart = in.getFilePointer();
+          final int len = scratch.length - TERM.length;
+          if (len > lastTerm.length) {
+            lastTerm.grow(len);
+          }
+          System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
+          lastTerm.length = len;
+          docFreq = 0;
+          sumTotalTermFreq += totalTermFreq;
+          totalTermFreq = 0;
+          termCount++;
+        }
+      }
+      docCount = visitedDocs.cardinality();
+      fst = b.finish();
+      /*
+      PrintStream ps = new PrintStream("out.dot");
+      fst.toDot(ps);
+      ps.close();
+      System.out.println("SAVED out.dot");
+      */
+      //System.out.println("FST " + fst.sizeInBytes());
+    }
+    
+    /** Returns approximate RAM bytes used */
+    public long ramBytesUsed() {
+      return (fst!=null) ? fst.sizeInBytes() : 0;
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse)  {
+      if (fst != null) {
+        return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
+      } else {
+        return TermsEnum.EMPTY;
+      }
+    }
+
+    @Override
+    public Comparator<BytesRef> getComparator() {
+      return BytesRef.getUTF8SortedAsUnicodeComparator();
+    }
+
+    @Override
+    public long size() {
+      return (long) termCount;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
+    }
+
+    @Override
+    public long getSumDocFreq()  {
+      return sumDocFreq;
+    }
+
+    @Override
+    public int getDocCount()  {
+      return docCount;
+    }
+
+    @Override
+    public bool hasFreqs() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+    }
+
+    @Override
+    public bool hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
>= 0;
+    }
+
+    @Override
+    public bool hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
>= 0;
+    }
+    
+    @Override
+    public bool hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return Collections.unmodifiableSet(fields.keySet()).iterator();
+  }
+
+  private final Map<String,SimpleTextTerms> termsCache = new HashMap<>();
+
+  @Override
+  synchronized public Terms terms(String field)  {
+    Terms terms = termsCache.get(field);
+    if (terms == null) {
+      Long fp = fields.get(field);
+      if (fp == null) {
+        return null;
+      } else {
+        terms = new SimpleTextTerms(field, fp, maxDoc);
+        termsCache.put(field, (SimpleTextTerms) terms);
+      }
+    }
+    return terms;
+  }
+
+  @Override
+  public int size() {
+    return -1;
+  }
+
+  @Override
+  public void close()  {
+    in.close();
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    long sizeInBytes = 0;
+    for(SimpleTextTerms simpleTextTerms : termsCache.values()) {
+      sizeInBytes += (simpleTextTerms!=null) ? simpleTextTerms.ramBytesUsed() : 0;
+    }
+    return sizeInBytes;
+  }
+
+  @Override
+  public void checkIntegrity()  {}
+}


Mime
View raw message