lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [07/51] [abbrv] [partial] Cleaning up and getting ready to development towards v4.8
Date Sat, 06 Sep 2014 19:36:18 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
new file mode 100644
index 0000000..85c711a
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
@@ -0,0 +1,198 @@
+package org.apache.lucene.codecs.intblock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Naive int block API that writes vInts.  This is
+ *  expected to give poor performance; it's really only for
+ *  testing the pluggability.  One should typically use pfor instead. */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+
+// TODO: much of this can be shared code w/ the fixed case
+
+/** Abstract base class that reads variable-size blocks of ints
+ *  from an IndexInput.  While this is a simple approach, a
+ *  more performant approach would directly create an impl
+ *  of IntIndexInput inside Directory.  Wrapping a generic
+ *  IndexInput will likely cost performance.
+ *
+ * @lucene.experimental
+ */
+public abstract class VariableIntBlockIndexInput extends IntIndexInput {
+
+  protected final IndexInput in;
+  protected final int maxBlockSize;
+
+  protected VariableIntBlockIndexInput(final IndexInput in)  {
+    this.in = in;
+    maxBlockSize = in.readInt();
+  }
+
+  @Override
+  public IntIndexInput.Reader reader()  {
+    final int[] buffer = new int[maxBlockSize];
+    final IndexInput clone = in.clone();
+    // TODO: can this be simplified?
+    return new Reader(clone, buffer, this.getBlockReader(clone, buffer));
+  }
+
+  @Override
+  public void close()  {
+    in.close();
+  }
+
+  @Override
+  public IntIndexInput.Index index() {
+    return new Index();
+  }
+
+  protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) ;
+
+  /**
+   * Interface for variable-size block decoders.
+   * <p>
+   * Implementations should decode into the buffer in {@link #readBlock}.
+   */
+  public interface BlockReader {
+    public int readBlock() ;
+    public void seek(long pos) ;
+  }
+
+  private static class Reader extends IntIndexInput.Reader {
+    private final IndexInput in;
+
+    public final int[] pending;
+    int upto;
+
+    private bool seekPending;
+    private long pendingFP;
+    private int pendingUpto;
+    private long lastBlockFP;
+    private int blockSize;
+    private final BlockReader blockReader;
+
+    public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) {
+      this.in = in;
+      this.pending = pending;
+      this.blockReader = blockReader;
+    }
+
+    void seek(final long fp, final int upto) {
+      // TODO: should we do this in real-time, not lazy?
+      pendingFP = fp;
+      pendingUpto = upto;
+      Debug.Assert( pendingUpto >= 0: "pendingUpto=" + pendingUpto;
+      seekPending = true;
+    }
+
+    private final void maybeSeek()  {
+      if (seekPending) {
+        if (pendingFP != lastBlockFP) {
+          // need new block
+          in.seek(pendingFP);
+          blockReader.seek(pendingFP);
+          lastBlockFP = pendingFP;
+          blockSize = blockReader.readBlock();
+        }
+        upto = pendingUpto;
+
+        // TODO: if we were more clever when writing the
+        // index, such that a seek point wouldn't be written
+        // until the int encoder "committed", we could avoid
+        // this (likely minor) inefficiency:
+
+        // This is necessary for int encoders that are
+        // non-causal, ie must see future int values to
+        // encode the current ones.
+        while(upto >= blockSize) {
+          upto -= blockSize;
+          lastBlockFP = in.getFilePointer();
+          blockSize = blockReader.readBlock();
+        }
+        seekPending = false;
+      }
+    }
+
+    @Override
+    public int next()  {
+      this.maybeSeek();
+      if (upto == blockSize) {
+        lastBlockFP = in.getFilePointer();
+        blockSize = blockReader.readBlock();
+        upto = 0;
+      }
+
+      return pending[upto++];
+    }
+  }
+
+  private class Index extends IntIndexInput.Index {
+    private long fp;
+    private int upto;
+
+    @Override
+    public void read(final DataInput indexIn, final bool absolute)  {
+      if (absolute) {
+        upto = indexIn.readVInt();
+        fp = indexIn.readVLong();
+      } else {
+        final int uptoDelta = indexIn.readVInt();
+        if ((uptoDelta & 1) == 1) {
+          // same block
+          upto += uptoDelta >>> 1;
+        } else {
+          // new block
+          upto = uptoDelta >>> 1;
+          fp += indexIn.readVLong();
+        }
+      }
+      // TODO: we can't do this Debug.Assert( because non-causal
+      // int encoders can have upto over the buffer size
+      //Debug.Assert( upto < maxBlockSize: "upto=" + upto + " max=" + maxBlockSize;
+    }
+
+    @Override
+    public String toString() {
+      return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize;
+    }
+
+    @Override
+    public void seek(final IntIndexInput.Reader other)  {
+      ((Reader) other).seek(fp, upto);
+    }
+
+    @Override
+    public void copyFrom(final IntIndexInput.Index other) {
+      final Index idx = (Index) other;
+      fp = idx.fp;
+      upto = idx.upto;
+    }
+
+    @Override
+    public Index clone() {
+      Index other = new Index();
+      other.fp = fp;
+      other.upto = upto;
+      return other;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
new file mode 100644
index 0000000..574b7f4
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
@@ -0,0 +1,136 @@
+package org.apache.lucene.codecs.intblock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Naive int block API that writes vInts.  This is
+ *  expected to give poor performance; it's really only for
+ *  testing the pluggability.  One should typically use pfor instead. */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+
+// TODO: much of this can be shared code w/ the fixed case
+
+/** Abstract base class that writes variable-size blocks of ints
+ *  to an IndexOutput.  While this is a simple approach, a
+ *  more performant approach would directly create an impl
+ *  of IntIndexOutput inside Directory.  Wrapping a generic
+ *  IndexInput will likely cost performance.
+ *
+ * @lucene.experimental
+ */
+public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
+
+  protected final IndexOutput out;
+
+  private int upto;
+  private bool hitExcDuringWrite;
+
+  // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
+  // if its less than 128 we should set that as max and use byte?
+
+  /** NOTE: maxBlockSize must be the maximum block size 
+   *  plus the max non-causal lookahead of your codec.  EG Simple9
+   *  requires lookahead=1 because on seeing the Nth value
+   *  it knows it must now encode the N-1 values before it. */
+  protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize)  {
+    this.out = out;
+    out.writeInt(maxBlockSize);
+  }
+
+  /** Called one value at a time.  Return the number of
+   *  buffered input values that have been written to out. */
+  protected abstract int add(int value) ;
+
+  @Override
+  public IntIndexOutput.Index index() {
+    return new Index();
+  }
+
+  private class Index extends IntIndexOutput.Index {
+    long fp;
+    int upto;
+    long lastFP;
+    int lastUpto;
+
+    @Override
+    public void mark()  {
+      fp = out.getFilePointer();
+      upto = VariableIntBlockIndexOutput.this.upto;
+    }
+
+    @Override
+    public void copyFrom(IntIndexOutput.Index other, bool copyLast)  {
+      Index idx = (Index) other;
+      fp = idx.fp;
+      upto = idx.upto;
+      if (copyLast) {
+        lastFP = fp;
+        lastUpto = upto;
+      }
+    }
+
+    @Override
+    public void write(DataOutput indexOut, bool absolute)  {
+      Debug.Assert( upto >= 0;
+      if (absolute) {
+        indexOut.writeVInt(upto);
+        indexOut.writeVLong(fp);
+      } else if (fp == lastFP) {
+        // same block
+        Debug.Assert( upto >= lastUpto;
+        int uptoDelta = upto - lastUpto;
+        indexOut.writeVInt(uptoDelta << 1 | 1);
+      } else {      
+        // new block
+        indexOut.writeVInt(upto << 1);
+        indexOut.writeVLong(fp - lastFP);
+      }
+      lastUpto = upto;
+      lastFP = fp;
+    }
+  }
+
+  @Override
+  public void write(int v)  {
+    hitExcDuringWrite = true;
+    upto -= add(v)-1;
+    hitExcDuringWrite = false;
+    Debug.Assert( upto >= 0;
+  }
+
+  @Override
+  public void close()  {
+    try {
+      if (!hitExcDuringWrite) {
+        // stuff 0s in until the "real" data is flushed:
+        int stuffed = 0;
+        while(upto > stuffed) {
+          upto -= add(0)-1;
+          Debug.Assert( upto >= 0;
+          stuffed += 1;
+        }
+      }
+    } finally {
+      out.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
new file mode 100644
index 0000000..3f014ce
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <ProjectGuid>{3F79B6D4-4359-4F83-B64F-07F4F6262425}</ProjectGuid>
+    <OutputType>Library</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <RootNamespace>Lucene.Net.Codecs</RootNamespace>
+    <AssemblyName>Lucene.Net.Codecs</AssemblyName>
+    <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+    <FileAlignment>512</FileAlignment>
+    <TargetFrameworkProfile />
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+    <DebugSymbols>true</DebugSymbols>
+    <DebugType>full</DebugType>
+    <Optimize>false</Optimize>
+    <OutputPath>bin\Debug\</OutputPath>
+    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+    <DebugType>pdbonly</DebugType>
+    <Optimize>true</Optimize>
+    <OutputPath>bin\Release\</OutputPath>
+    <DefineConstants>TRACE</DefineConstants>
+    <ErrorReport>prompt</ErrorReport>
+    <WarningLevel>4</WarningLevel>
+  </PropertyGroup>
+  <ItemGroup>
+    <Reference Include="System" />
+    <Reference Include="System.Core" />
+    <Reference Include="System.Xml.Linq" />
+    <Reference Include="System.Data.DataSetExtensions" />
+    <Reference Include="Microsoft.CSharp" />
+    <Reference Include="System.Data" />
+    <Reference Include="System.Xml" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Appending\AppendingCodec.cs" />
+    <Compile Include="Appending\AppendingPostingsFormat.cs" />
+    <Compile Include="Appending\AppendingTermsReader.cs" />
+    <Compile Include="BlockTerms\BlockTermsFieldAndTerm.cs" />
+    <Compile Include="BlockTerms\BlockTermsReader.cs" />
+    <Compile Include="BlockTerms\BlockTermsWriter.cs" />
+    <Compile Include="BlockTerms\FixedGapTermsIndexReader.cs" />
+    <Compile Include="BlockTerms\FixedGapTermsIndexWriter.cs" />
+    <Compile Include="BlockTerms\TermsIndexReaderBase.cs" />
+    <Compile Include="BlockTerms\TermsIndexWriterBase.cs" />
+    <Compile Include="BlockTerms\VariableGapTermsIndexReader.cs" />
+    <Compile Include="BlockTerms\VariableGapTermsIndexWriter.cs" />
+    <Compile Include="Bloom\BloomFilterFactory.cs" />
+    <Compile Include="Bloom\BloomFilteringPostingsFormat.cs" />
+    <Compile Include="Bloom\DefaultBloomFilterFactory.cs" />
+    <Compile Include="Bloom\FuzzySet.cs" />
+    <Compile Include="Bloom\HashFunction.cs" />
+    <Compile Include="Bloom\MurmurHash2.cs" />
+    <Compile Include="DiskDV\DiskDocValuesFormat.cs" />
+    <Compile Include="DiskDV\DiskDocValuesProducer.cs" />
+    <Compile Include="DiskDV\DiskNormsFormat.cs" />
+    <Compile Include="Intblock\FixedIntBlockIndexInput.cs" />
+    <Compile Include="Intblock\FixedIntBlockIndexOutput.cs" />
+    <Compile Include="Intblock\IBlockReader.cs" />
+    <Compile Include="Intblock\Index.cs" />
+    <Compile Include="Intblock\Reader.cs" />
+    <Compile Include="Intblock\VariableIntBlockIndexInput.cs" />
+    <Compile Include="Intblock\VariableIntBlockIndexOutput.cs" />
+    <Compile Include="Memory\DirectDocValuesConsumer.cs" />
+    <Compile Include="Memory\DirectDocValuesFormat.cs" />
+    <Compile Include="Memory\DirectDocValuesProducer.cs" />
+    <Compile Include="Memory\DirectPostingsFormat.cs" />
+    <Compile Include="Memory\FSTOrdPostingsFormat.cs" />
+    <Compile Include="Memory\FSTOrdPulsing41PostingsFormat.cs" />
+    <Compile Include="Memory\FSTOrdTermsReader.cs" />
+    <Compile Include="Memory\FSTOrdTermsWriter.cs" />
+    <Compile Include="Memory\FSTPostingsFormat.cs" />
+    <Compile Include="Memory\FSTPulsing41PostingsFormat.cs" />
+    <Compile Include="Memory\FSTTermOutputs.cs" />
+    <Compile Include="Memory\FSTTermsReader.cs" />
+    <Compile Include="Memory\FSTTermsWriter.cs" />
+    <Compile Include="Memory\MemoryDocValuesConsumer.cs" />
+    <Compile Include="Memory\MemoryDocValuesFormat.cs" />
+    <Compile Include="Memory\MemoryDocValuesProducer.cs" />
+    <Compile Include="Memory\MemoryPostingsFormat.cs" />
+    <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Pulsing\Pulsing41PostingsFormat.cs" />
+    <Compile Include="Pulsing\PulsingPostingsFormat.cs" />
+    <Compile Include="Pulsing\PulsingPostingsReader.cs" />
+    <Compile Include="Pulsing\PulsingPostingsWriter.cs" />
+    <Compile Include="Sep\IntIndexInput.cs" />
+    <Compile Include="Sep\IntIndexOutput.cs" />
+    <Compile Include="Sep\IntStreamFactory.cs" />
+    <Compile Include="Sep\SepPostingsReader.cs" />
+    <Compile Include="Sep\SepPostingsWriter.cs" />
+    <Compile Include="Sep\SepSkipListReader.cs" />
+    <Compile Include="Sep\SepSkipListWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextCodec.cs" />
+    <Compile Include="SimpleText\SimpleTextDocValuesFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextDocValuesReader.cs" />
+    <Compile Include="SimpleText\SimpleTextDocValuesWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextFieldInfosFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextFieldInfosReader.cs" />
+    <Compile Include="SimpleText\SimpleTextFieldInfosWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextFieldsReader.cs" />
+    <Compile Include="SimpleText\SimpleTextFieldsWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextLiveDocsFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextNormsFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextPostingsFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextSegmentInfoFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextSegmentInfoReader.cs" />
+    <Compile Include="SimpleText\SimpleTextSegmentInfoWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextStoredFieldsFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextStoredFieldsReader.cs" />
+    <Compile Include="SimpleText\SimpleTextStoredFieldsWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextTermVectorsFormat.cs" />
+    <Compile Include="SimpleText\SimpleTextTermVectorsReader.cs" />
+    <Compile Include="SimpleText\SimpleTextTermVectorsWriter.cs" />
+    <Compile Include="SimpleText\SimpleTextUtil.cs" />
+  </ItemGroup>
+  <ItemGroup />
+  <ItemGroup>
+    <ProjectReference Include="..\core\Lucene.Net.csproj">
+      <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+      <Name>Lucene.Net</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
+       Other similar extension points exist, see Microsoft.Common.targets.
+  <Target Name="BeforeBuild">
+  </Target>
+  <Target Name="AfterBuild">
+  </Target>
+  -->
+</Project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
new file mode 100644
index 0000000..3cf5780
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
@@ -0,0 +1,26 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.30110.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs", "Lucene.Net.Codecs.csproj", "{3F79B6D4-4359-4F83-B64F-07F4F6262425}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "..\Lucene.Net.Core\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Any CPU = Debug|Any CPU
+		Release|Any CPU = Release|Any CPU
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{3F79B6D4-4359-4F83-B64F-07F4F6262425}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{3F79B6D4-4359-4F83-B64F-07F4F6262425}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{3F79B6D4-4359-4F83-B64F-07F4F6262425}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{3F79B6D4-4359-4F83-B64F-07F4F6262425}.Release|Any CPU.Build.0 = Release|Any CPU
+		{5D4AD9BE-1FFB-41AB-9943-25737971BF57}.Debug|Any CPU.ActiveCfg = Debug|x86
+		{5D4AD9BE-1FFB-41AB-9943-25737971BF57}.Release|Any CPU.ActiveCfg = Release|x86
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
new file mode 100644
index 0000000..2e7e013
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
@@ -0,0 +1,304 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
+
+/**
+ * Writer for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesConsumer extends DocValuesConsumer {
+  IndexOutput data, meta;
+  final int maxDoc;
+
+  DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)  {
+    maxDoc = state.segmentInfo.getDocCount();
+    bool success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.createOutput(dataName, state.context);
+      CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+      String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+      meta = state.directory.createOutput(metaName, state.context);
+      CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
+    }
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values)  {
+    meta.writeVInt(field.number);
+    meta.writeByte(NUMBER);
+    addNumericFieldValues(field, values);
+  }
+
+  private void addNumericFieldValues(FieldInfo field, Iterable<Number> values)  {
+    meta.writeLong(data.getFilePointer());
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    bool missing = false;
+
+    long count = 0;
+    for (Number nv : values) {
+      if (nv != null) {
+        long v = nv.longValue();
+        minValue = Math.min(minValue, v);
+        maxValue = Math.max(maxValue, v);
+      } else {
+        missing = true;
+      }
+      count++;
+      if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) {
+        throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords");
+      }
+    }
+    meta.writeInt((int) count);
+    
+    if (missing) {
+      long start = data.getFilePointer();
+      writeMissingBitset(values);
+      meta.writeLong(start);
+      meta.writeLong(data.getFilePointer() - start);
+    } else {
+      meta.writeLong(-1L);
+    }
+
+    byte byteWidth;
+    if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+      byteWidth = 1;
+    } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) {
+      byteWidth = 2;
+    } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) {
+      byteWidth = 4;
+    } else {
+      byteWidth = 8;
+    }
+    meta.writeByte(byteWidth);
+
+    for (Number nv : values) {
+      long v;
+      if (nv != null) {
+        v = nv.longValue();
+      } else {
+        v = 0;
+      }
+
+      switch(byteWidth) {
+      case 1:
+        data.writeByte((byte) v);
+        break;
+      case 2:
+        data.writeShort((short) v);
+        break;
+      case 4:
+        data.writeInt((int) v);
+        break;
+      case 8:
+        data.writeLong(v);
+        break;
+      }
+    }
+  }
+  
+  @Override
+  public void close()  {
+    bool success = false;
+    try {
+      if (meta != null) {
+        meta.writeVInt(-1); // write EOF marker
+        CodecUtil.writeFooter(meta); // write checksum
+      }
+      if (data != null) {
+        CodecUtil.writeFooter(data);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, meta);
+      } else {
+        IOUtils.closeWhileHandlingException(data, meta);
+      }
+      data = meta = null;
+    }
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values)  {
+    meta.writeVInt(field.number);
+    meta.writeByte(BYTES);
+    addBinaryFieldValues(field, values);
+  }
+
+  private void addBinaryFieldValues(FieldInfo field, final Iterable<BytesRef> values)  {
+    // write the byte[] data
+    final long startFP = data.getFilePointer();
+    bool missing = false;
+    long totalBytes = 0;
+    int count = 0;
+    for(BytesRef v : values) {
+      if (v != null) {
+        data.writeBytes(v.bytes, v.offset, v.length);
+        totalBytes += v.length;
+        if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) {
+          throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
+        }
+      } else {
+        missing = true;
+      }
+      count++;
+    }
+
+    meta.writeLong(startFP);
+    meta.writeInt((int) totalBytes);
+    meta.writeInt(count);
+    if (missing) {
+      long start = data.getFilePointer();
+      writeMissingBitset(values);
+      meta.writeLong(start);
+      meta.writeLong(data.getFilePointer() - start);
+    } else {
+      meta.writeLong(-1L);
+    }
+    
+    int addr = 0;
+    for (BytesRef v : values) {
+      data.writeInt(addr);
+      if (v != null) {
+        addr += v.length;
+      }
+    }
+    data.writeInt(addr);
+  }
+  
+  // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+  // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+  void writeMissingBitset(Iterable<?> values)  {
+    long bits = 0;
+    int count = 0;
+    for (Object v : values) {
+      if (count == 64) {
+        data.writeLong(bits);
+        count = 0;
+        bits = 0;
+      }
+      if (v != null) {
+        bits |= 1L << (count & 0x3f);
+      }
+      count++;
+    }
+    if (count > 0) {
+      data.writeLong(bits);
+    }
+  }
+
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd)  {
+    meta.writeVInt(field.number);
+    meta.writeByte(SORTED);
+
+    // write the ordinals as numerics
+    addNumericFieldValues(field, docToOrd);
+    
+    // write the values as binary
+    addBinaryFieldValues(field, values);
+  }
+
+  // note: this might not be the most efficient... but its fairly simple
+  @Override
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords)  {
+    meta.writeVInt(field.number);
+    meta.writeByte(SORTED_SET);
+
+    // First write docToOrdCounts, except we "aggregate" the
+    // counts so they turn into addresses, and add a final
+    // value = the total aggregate:
+    addNumericFieldValues(field, new Iterable<Number>() {
+
+        // Just aggregates the count values so they become
+        // "addresses", and adds one more value in the end
+        // (the final sum):
+
+        @Override
+        public Iterator<Number> iterator() {
+          final Iterator<Number> iter = docToOrdCount.iterator();
+
+          return new Iterator<Number>() {
+
+            long sum;
+            bool ended;
+
+            @Override
+            public bool hasNext() {
+              return iter.hasNext() || !ended;
+            }
+
+            @Override
+            public Number next() {
+              long toReturn = sum;
+
+              if (iter.hasNext()) {
+                Number n = iter.next();
+                if (n != null) {
+                  sum += n.longValue();
+                }
+              } else if (!ended) {
+                ended = true;
+              } else {
+                Debug.Assert( false;
+              }
+
+              return toReturn;
+            }
+
+            @Override
+            public void remove() {
+              throw new UnsupportedOperationException();
+            }
+          };
+        }
+      });
+
+    // Write ordinals for all docs, appended into one big
+    // numerics:
+    addNumericFieldValues(field, ords);
+      
+    // write the values as binary
+    addBinaryFieldValues(field, values);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
new file mode 100644
index 0000000..1f89e43
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
@@ -0,0 +1,83 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.ArrayUtil;
+
+/** In-memory docvalues format that does no (or very little)
+ *  compression.  Indexed values are stored on disk, but
+ *  then at search time all values are loaded into memory as
+ *  simple java arrays.  For numeric values, it uses
+ *  byte[], short[], int[], long[] as necessary to fit the
+ *  range of the values.  For binary values, there is an int
+ *  (4 bytes) overhead per value.
+ *
+ *  <p>Limitations:
+ *  <ul>
+ *    <li>For binary and sorted fields the total space
+ *        required for all binary values cannot exceed about
+ *        2.1 GB (see #MAX_TOTAL_BYTES_LENGTH).</li>
+ *
+ *    <li>For sorted set fields, the sum of the size of each
+ *        document's set of values cannot exceed about 2.1 B
+ *        values (see #MAX_SORTED_SET_ORDS).  For example,
+ *        if every document has 10 values (10 instances of
+ *        {@link SortedSetDocValuesField}) added, then no
+ *        more than ~210 M documents can be added to one
+ *        segment. </li>
+ *  </ul> */
+
+public class DirectDocValuesFormat extends DocValuesFormat {
+
+  /** The sum of all byte lengths for binary field, or for
+   *  the unique values in sorted or sorted set fields, cannot
+   *  exceed this. */
+  public final static int MAX_TOTAL_BYTES_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH;
+
+  /** The sum of the number of values across all documents
+   *  in a sorted set field cannot exceed this. */
+  public final static int MAX_SORTED_SET_ORDS = ArrayUtil.MAX_ARRAY_LENGTH;
+
+  /** Sole constructor. */
+  public DirectDocValuesFormat() {
+    super("Direct");
+  }
+  
+  @Override
+  public DocValuesConsumer fieldsConsumer(SegmentWriteState state)  {
+    return new DirectDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+  
+  @Override
+  public DocValuesProducer fieldsProducer(SegmentReadState state)  {
+    return new DirectDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+  }
+  
+  static final String DATA_CODEC = "DirectDocValuesData";
+  static final String DATA_EXTENSION = "dvdd";
+  static final String METADATA_CODEC = "DirectDocValuesMetadata";
+  static final String METADATA_EXTENSION = "dvdm";
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
new file mode 100644
index 0000000..a95f384
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
@@ -0,0 +1,511 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomAccessOrds;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Reader for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesProducer extends DocValuesProducer {
+  // metadata maps (just file pointers and minimal stuff)
+  private final Map<Integer,NumericEntry> numerics = new HashMap<>();
+  private final Map<Integer,BinaryEntry> binaries = new HashMap<>();
+  private final Map<Integer,SortedEntry> sorteds = new HashMap<>();
+  private final Map<Integer,SortedSetEntry> sortedSets = new HashMap<>();
+  private final IndexInput data;
+  
+  // ram instances we have already loaded
+  private final Map<Integer,NumericDocValues> numericInstances = 
+      new HashMap<>();
+  private final Map<Integer,BinaryDocValues> binaryInstances =
+      new HashMap<>();
+  private final Map<Integer,SortedDocValues> sortedInstances =
+      new HashMap<>();
+  private final Map<Integer,SortedSetRawValues> sortedSetInstances =
+      new HashMap<>();
+  private final Map<Integer,Bits> docsWithFieldInstances = new HashMap<>();
+  
+  private final int maxDoc;
+  private final AtomicLong ramBytesUsed;
+  private final int version;
+  
+  static final byte NUMBER = 0;
+  static final byte BYTES = 1;
+  static final byte SORTED = 2;
+  static final byte SORTED_SET = 3;
+
+  static final int VERSION_START = 0;
+  static final int VERSION_CHECKSUM = 1;
+  static final int VERSION_CURRENT = VERSION_CHECKSUM;
+    
+  DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension)  {
+    maxDoc = state.segmentInfo.getDocCount();
+    String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+    // read in the entries from the metadata file.
+    ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
+    ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
+    bool success = false;
+    try {
+      version = CodecUtil.checkHeader(in, metaCodec, 
+                                      VERSION_START,
+                                      VERSION_CURRENT);
+      readFields(in);
+
+      if (version >= VERSION_CHECKSUM) {
+        CodecUtil.checkFooter(in);
+      } else {
+        CodecUtil.checkEOF(in);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(in);
+      } else {
+        IOUtils.closeWhileHandlingException(in);
+      }
+    }
+
+    success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.openInput(dataName, state.context);
+      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
+                                                 VERSION_START,
+                                                 VERSION_CURRENT);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch");
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
+  }
+
+  private NumericEntry readNumericEntry(IndexInput meta)  {
+    NumericEntry entry = new NumericEntry();
+    entry.offset = meta.readLong();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+    entry.byteWidth = meta.readByte();
+
+    return entry;
+  }
+
+  private BinaryEntry readBinaryEntry(IndexInput meta)  {
+    BinaryEntry entry = new BinaryEntry();
+    entry.offset = meta.readLong();
+    entry.numBytes = meta.readInt();
+    entry.count = meta.readInt();
+    entry.missingOffset = meta.readLong();
+    if (entry.missingOffset != -1) {
+      entry.missingBytes = meta.readLong();
+    } else {
+      entry.missingBytes = 0;
+    }
+
+    return entry;
+  }
+
+  private SortedEntry readSortedEntry(IndexInput meta)  {
+    SortedEntry entry = new SortedEntry();
+    entry.docToOrd = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private SortedSetEntry readSortedSetEntry(IndexInput meta)  {
+    SortedSetEntry entry = new SortedSetEntry();
+    entry.docToOrdAddress = readNumericEntry(meta);
+    entry.ords = readNumericEntry(meta);
+    entry.values = readBinaryEntry(meta);
+    return entry;
+  }
+
+  private void readFields(IndexInput meta)  {
+    int fieldNumber = meta.readVInt();
+    while (fieldNumber != -1) {
+      int fieldType = meta.readByte();
+      if (fieldType == NUMBER) {
+        numerics.put(fieldNumber, readNumericEntry(meta));
+      } else if (fieldType == BYTES) {
+        binaries.put(fieldNumber, readBinaryEntry(meta));
+      } else if (fieldType == SORTED) {
+        sorteds.put(fieldNumber, readSortedEntry(meta));
+      } else if (fieldType == SORTED_SET) {
+        sortedSets.put(fieldNumber, readSortedSetEntry(meta));
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
+      }
+      fieldNumber = meta.readVInt();
+    }
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return ramBytesUsed.get();
+  }
+  
+  @Override
+  public void checkIntegrity()  {
+    if (version >= VERSION_CHECKSUM) {
+      CodecUtil.checksumEntireFile(data);
+    }
+  }
+
+  @Override
+  public synchronized NumericDocValues getNumeric(FieldInfo field)  {
+    NumericDocValues instance = numericInstances.get(field.number);
+    if (instance == null) {
+      // Lazy load
+      instance = loadNumeric(numerics.get(field.number));
+      numericInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+  
+  private NumericDocValues loadNumeric(NumericEntry entry)  {
+    data.seek(entry.offset + entry.missingBytes);
+    switch (entry.byteWidth) {
+    case 1:
+      {
+        final byte[] values = new byte[entry.count];
+        data.readBytes(values, 0, entry.count);
+        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+        return new NumericDocValues() {
+          @Override
+          public long get(int idx) {
+            return values[idx];
+          }
+        };
+      }
+
+    case 2:
+      {
+        final short[] values = new short[entry.count];
+        for(int i=0;i<entry.count;i++) {
+          values[i] = data.readShort();
+        }
+        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+        return new NumericDocValues() {
+          @Override
+          public long get(int idx) {
+            return values[idx];
+          }
+        };
+      }
+
+    case 4:
+      {
+        final int[] values = new int[entry.count];
+        for(int i=0;i<entry.count;i++) {
+          values[i] = data.readInt();
+        }
+        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+        return new NumericDocValues() {
+          @Override
+          public long get(int idx) {
+            return values[idx];
+          }
+        };
+      }
+
+    case 8:
+      {
+        final long[] values = new long[entry.count];
+        for(int i=0;i<entry.count;i++) {
+          values[i] = data.readLong();
+        }
+        ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+        return new NumericDocValues() {
+          @Override
+          public long get(int idx) {
+            return values[idx];
+          }
+        };
+      }
+    
+    default:
+      throw new Debug.Assert(ionError();
+    }
+  }
+
+  @Override
+  public synchronized BinaryDocValues getBinary(FieldInfo field)  {
+    BinaryDocValues instance = binaryInstances.get(field.number);
+    if (instance == null) {
+      // Lazy load
+      instance = loadBinary(binaries.get(field.number));
+      binaryInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+  
+  private BinaryDocValues loadBinary(BinaryEntry entry)  {
+    data.seek(entry.offset);
+    final byte[] bytes = new byte[entry.numBytes];
+    data.readBytes(bytes, 0, entry.numBytes);
+    data.seek(entry.offset + entry.numBytes + entry.missingBytes);
+
+    final int[] address = new int[entry.count+1];
+    for(int i=0;i<entry.count;i++) {
+      address[i] = data.readInt();
+    }
+    address[entry.count] = data.readInt();
+
+    ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes) + RamUsageEstimator.sizeOf(address));
+
+    return new BinaryDocValues() {
+      @Override
+      public void get(int docID, BytesRef result) {
+        result.bytes = bytes;
+        result.offset = address[docID];
+        result.length = address[docID+1] - result.offset;
+      };
+    };
+  }
+  
+  @Override
+  public synchronized SortedDocValues getSorted(FieldInfo field)  {
+    SortedDocValues instance = sortedInstances.get(field.number);
+    if (instance == null) {
+      // Lazy load
+      instance = loadSorted(field);
+      sortedInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+
+  private SortedDocValues loadSorted(FieldInfo field)  {
+    final SortedEntry entry = sorteds.get(field.number);
+    final NumericDocValues docToOrd = loadNumeric(entry.docToOrd);
+    final BinaryDocValues values = loadBinary(entry.values);
+
+    return new SortedDocValues() {
+
+      @Override
+      public int getOrd(int docID) {
+        return (int) docToOrd.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        values.get(ord, result);
+      }
+
+      @Override
+      public int getValueCount() {
+        return entry.values.count;
+      }
+
+      // Leave lookupTerm to super's binary search
+
+      // Leave termsEnum to super
+    };
+  }
+
+  @Override
+  public synchronized SortedSetDocValues getSortedSet(FieldInfo field)  {
+    SortedSetRawValues instance = sortedSetInstances.get(field.number);
+    final SortedSetEntry entry = sortedSets.get(field.number);
+    if (instance == null) {
+      // Lazy load
+      instance = loadSortedSet(entry);
+      sortedSetInstances.put(field.number, instance);
+    }
+
+    final NumericDocValues docToOrdAddress = instance.docToOrdAddress;
+    final NumericDocValues ords = instance.ords;
+    final BinaryDocValues values = instance.values;
+
+    // Must make a new instance since the iterator has state:
+    return new RandomAccessOrds() {
+      int ordStart;
+      int ordUpto;
+      int ordLimit;
+
+      @Override
+      public long nextOrd() {
+        if (ordUpto == ordLimit) {
+          return NO_MORE_ORDS;
+        } else {
+          return ords.get(ordUpto++);
+        }
+      }
+      
+      @Override
+      public void setDocument(int docID) {
+        ordStart = ordUpto = (int) docToOrdAddress.get(docID);
+        ordLimit = (int) docToOrdAddress.get(docID+1);
+      }
+
+      @Override
+      public void lookupOrd(long ord, BytesRef result) {
+        values.get((int) ord, result);
+      }
+
+      @Override
+      public long getValueCount() {
+        return entry.values.count;
+      }
+
+      @Override
+      public long ordAt(int index) {
+        return ords.get(ordStart + index);
+      }
+
+      @Override
+      public int cardinality() {
+        return ordLimit - ordStart;
+      }
+
+      // Leave lookupTerm to super's binary search
+
+      // Leave termsEnum to super
+    };
+  }
+  
+  private SortedSetRawValues loadSortedSet(SortedSetEntry entry)  {
+    SortedSetRawValues instance = new SortedSetRawValues();
+    instance.docToOrdAddress = loadNumeric(entry.docToOrdAddress);
+    instance.ords = loadNumeric(entry.ords);
+    instance.values = loadBinary(entry.values);
+    return instance;
+  }
+
+  private Bits getMissingBits(int fieldNumber, final long offset, final long length)  {
+    if (offset == -1) {
+      return new Bits.MatchAllBits(maxDoc);
+    } else {
+      Bits instance;
+      synchronized(this) {
+        instance = docsWithFieldInstances.get(fieldNumber);
+        if (instance == null) {
+          IndexInput data = this.data.clone();
+          data.seek(offset);
+          Debug.Assert( length % 8 == 0;
+          long bits[] = new long[(int) length >> 3];
+          for (int i = 0; i < bits.length; i++) {
+            bits[i] = data.readLong();
+          }
+          instance = new FixedBitSet(bits, maxDoc);
+          docsWithFieldInstances.put(fieldNumber, instance);
+        }
+      }
+      return instance;
+    }
+  }
+  
+  @Override
+  public Bits getDocsWithField(FieldInfo field)  {
+    switch(field.getDocValuesType()) {
+      case SORTED_SET:
+        return DocValues.docsWithValue(getSortedSet(field), maxDoc);
+      case SORTED:
+        return DocValues.docsWithValue(getSorted(field), maxDoc);
+      case BINARY:
+        BinaryEntry be = binaries.get(field.number);
+        return getMissingBits(field.number, be.missingOffset, be.missingBytes);
+      case NUMERIC:
+        NumericEntry ne = numerics.get(field.number);
+        return getMissingBits(field.number, ne.missingOffset, ne.missingBytes);
+      default: 
+        throw new Debug.Assert(ionError();
+    }
+  }
+
+  @Override
+  public void close()  {
+    data.close();
+  }
+  
+  static class SortedSetRawValues {
+    NumericDocValues docToOrdAddress;
+    NumericDocValues ords;
+    BinaryDocValues values;
+  }
+
+  static class NumericEntry {
+    long offset;
+    int count;
+    long missingOffset;
+    long missingBytes;
+    byte byteWidth;
+    int packedIntsVersion;
+  }
+
+  static class BinaryEntry {
+    long offset;
+    long missingOffset;
+    long missingBytes;
+    int count;
+    int numBytes;
+    int minLength;
+    int maxLength;
+    int packedIntsVersion;
+    int blockSize;
+  }
+  
+  static class SortedEntry {
+    NumericEntry docToOrd;
+    BinaryEntry values;
+  }
+
+  static class SortedSetEntry {
+    NumericEntry docToOrdAddress;
+    NumericEntry ords;
+    BinaryEntry values;
+  }
+  
+  static class FSTEntry {
+    long offset;
+    long numOrds;
+  }
+}


Mime
View raw message