cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jbel...@apache.org
Subject svn commit: r1006225 - in /cassandra/trunk: ./ src/java/org/apache/cassandra/cli/ src/java/org/apache/cassandra/io/sstable/ src/java/org/apache/cassandra/utils/
Date Sat, 09 Oct 2010 21:00:20 GMT
Author: jbellis
Date: Sat Oct  9 21:00:20 2010
New Revision: 1006225

URL: http://svn.apache.org/viewvc?rev=1006225&view=rev
Log:
reduce index sample time by 75%.  patch by Stu Hood; reviewed by jbellis for CASSANDRA-1572

Modified:
    cassandra/trunk/CHANGES.txt
    cassandra/trunk/src/java/org/apache/cassandra/cli/CliClient.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
    cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
    cassandra/trunk/src/java/org/apache/cassandra/utils/FBUtilities.java

Modified: cassandra/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/trunk/CHANGES.txt?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/CHANGES.txt (original)
+++ cassandra/trunk/CHANGES.txt Sat Oct  9 21:00:20 2010
@@ -23,6 +23,7 @@ dev
  * Allow dynamic secondary index creation and destruction (CASSANDRA-1532)
  * log auto-guessed memtable thresholds (CASSANDRA-1595)
  * add ColumnDef support to cli (CASSANDRA-1583)
+ * reduce index sample time by 75% (CASSANDRA-1572)
 
 
 0.7-beta2

Modified: cassandra/trunk/src/java/org/apache/cassandra/cli/CliClient.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/cli/CliClient.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/cli/CliClient.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/cli/CliClient.java Sat Oct  9 21:00:20 2010
@@ -545,24 +545,11 @@ public class CliClient 
         CommonTree columnFamilySpec = (CommonTree)ast.getChild(0);
         assert(columnFamilySpec.getType() == CliParser.NODE_COLUMN_ACCESS);
 
-        String key = CliCompiler.getKey(columnFamilySpec);
-        String columnFamily = CliCompiler.getColumnFamily(columnFamilySpec);
-        int columnSpecCnt = CliCompiler.numColumnSpecifiers(columnFamilySpec);
-
-        List<String> cfnames = new ArrayList<String>();
-        for (CfDef cfd : keyspacesMap.get(keySpace).cf_defs)
-        {
-            cfnames.add(cfd.name);
-        }
- 
-        int idx = cfnames.indexOf(columnFamily);
-        if (idx == -1)
-        {
-            css_.out.println("No such column family: " + columnFamily);
-            return;
-        }
-
-        boolean isSuper = keyspacesMap.get(keySpace).cf_defs.get(idx).column_type.equals("Super");
+        final String key = CliCompiler.getKey(columnFamilySpec);
+        final String columnFamily = CliCompiler.getColumnFamily(columnFamilySpec);
+        final int columnSpecCnt = CliCompiler.numColumnSpecifiers(columnFamilySpec);
+        final CfDef columnFamilyDef = getCfDef(columnFamily); 
+        final boolean isSuper = columnFamilyDef.comparator_type.equals("Super");
         
         byte[] superColumnName = null;
         String columnName;
@@ -601,12 +588,19 @@ public class CliClient 
             return;
         }
 
+        final byte[] columnNameInBytes = columnNameAsByteArray(columnName, columnFamily);
+        final AbstractType validator = getValidatorForValue(columnFamilyDef, columnNameInBytes);
+        
         // Perform a get()
-        ColumnPath path = new ColumnPath(columnFamily).setSuper_column(superColumnName).setColumn(columnNameAsByteArray(columnName,
columnFamily));
+        ColumnPath path = new ColumnPath(columnFamily).setSuper_column(superColumnName).setColumn(columnNameInBytes);
         Column column = thriftClient_.get(key.getBytes(), path, ConsistencyLevel.ONE).column;
+
+        final byte[] columnValue = column.getValue();
+        final String valueAsString = (validator == null) ? new String(columnValue, "UTF-8")
: validator.getString(columnValue);
+
         // print results
         css_.out.printf("=> (column=%s, value=%s, timestamp=%d)\n",
-                        formatColumnName(keySpace, columnFamily, column), new String(column.value,
"UTF-8"), column.timestamp);
+                        formatColumnName(keySpace, columnFamily, column), valueAsString,
column.timestamp);
     }
 
     // Execute SET statement
@@ -651,9 +645,13 @@ public class CliClient 
             columnName = CliCompiler.getColumn(columnFamilySpec, 1);
         }
 
+
+        final byte[] columnNameInBytes  = columnNameAsByteArray(columnName, columnFamily);
+        final byte[] columnValueInBytes = columnValueAsByteArray(columnNameInBytes, columnFamily,
value);
+        
         // do the insert
         thriftClient_.insert(key.getBytes(), new ColumnParent(columnFamily).setSuper_column(superColumnName),
-                             new Column(columnNameAsByteArray(columnName, columnFamily),
value.getBytes(), FBUtilities.timestampMicros()), ConsistencyLevel.ONE);
+                             new Column(columnNameInBytes, columnValueInBytes, FBUtilities.timestampMicros()),
ConsistencyLevel.ONE);
         
         css_.out.println("Value inserted.");
     }
@@ -1103,25 +1101,46 @@ public class CliClient 
         CliMain.connect(css_.hostName, css_.thriftPort);
     }
 
-    private CfDef getCfDef(String ksname, String cfname)
+    /**
+     * To get Column Family Definition object from specified keyspace
+     * @param keySpaceName key space name to search for specific column family
+     * @param columnFamilyName column family name 
+     * @return CfDef - Column family definition object
+     */
+    private CfDef getCfDef(final String keySpaceName, final String columnFamilyName)
     {
-        List<String> cfnames = new ArrayList<String>();
-        KsDef ksd = keyspacesMap.get(ksname);
-        for (CfDef cfd : ksd.cf_defs) {
-            cfnames.add(cfd.name);
+        final KsDef keySpaceDefinition = keyspacesMap.get(keySpaceName);
+        
+        for (CfDef columnFamilyDef : keySpaceDefinition.cf_defs)
+        {
+            if (columnFamilyDef.name.equals(columnFamilyName))
+            {
+                return columnFamilyDef;
+            }
         }
-        int idx = cfnames.indexOf(cfname);
-        return ksd.cf_defs.get(idx);
+
+        throw new RuntimeException("No such column family: " + columnFamilyName);
     }
 
     /**
+     * Uses getCfDef(keySpaceName, columnFamilyName) with current keyspace
+     * @param columnFamilyName column family name to find in specified keyspace
+     * @return CfDef - Column family definition object
+     */
+    private CfDef getCfDef(final String columnFamilyName)
+    {
+        return getCfDef(this.keySpace, columnFamilyName);
+    }
+    
+    /**
      * Used to parse meta tree and compile meta attributes into List<ColumnDef>
      * @param meta (Tree representing Array of the hashes with metadata attributes)
      * @return List<ColumnDef> List of the ColumnDef's
      * 
      * meta is in following format - ^(ARRAY ^(HASH ^(PAIR .. ..) ^(PAIR .. ..)) ^(HASH ...))
      */
-    private List<ColumnDef> getCFColumnMetaFromTree(final Tree meta) {
+    private List<ColumnDef> getCFColumnMetaFromTree(final Tree meta)
+    {
         // this list will be returned
         final List<ColumnDef> columnDefinitions = new ArrayList<ColumnDef>();
         
@@ -1183,7 +1202,13 @@ public class CliClient 
         return columnDefinitions;
     }
 
-    private IndexType getIndexTypeFromString(final String indexTypeAsString) {
+    /**
+     * Getting IndexType object from indexType string
+     * @param indexTypeAsString - string return by parser corresponding to IndexType 
+     * @return IndexType - an IndexType object
+     */
+    private IndexType getIndexTypeFromString(final String indexTypeAsString)
+    {
         final Integer indexTypeId;
         final IndexType indexType;
 
@@ -1203,49 +1228,130 @@ public class CliClient 
         return indexType;
     }
 
-    private byte[] columnNameAsByteArray(final String column, final String columnFamily)
throws NoSuchFieldException, InstantiationException, IllegalAccessException, UnsupportedEncodingException
+    /**
+     * Converts object represented as string into byte[] according to comparator
+     * @param object - object to covert into byte array
+     * @param comparator - comparator used to convert object
+     * @return byte[] - object in the byte array representation
+     * @throws UnsupportedEncodingException - raised but String.getBytes(encoding)
+     */
+    private byte[] getBytesAccordingToType(final String object, final AbstractType comparator)
throws UnsupportedEncodingException
     {
-        List<String> cfnames = new ArrayList<String>();
-        for (CfDef cfd : keyspacesMap.get(keySpace).cf_defs)
-        {
-            cfnames.add(cfd.name);
-        }
-
-        int idx = cfnames.indexOf(columnFamily);
-        if (idx == -1)
-        {
-            throw new NoSuchFieldException("No such column family: " + columnFamily);
-        }
-
-        final String comparatorClass  = keyspacesMap.get(keySpace).cf_defs.get(idx).comparator_type;
-        final AbstractType comparator = getFormatTypeForColumn(comparatorClass);
-
         if (comparator instanceof LongType)
         {
-            return FBUtilities.toByteArray(Long.valueOf(column));
+            final long longType;
+            try
+            {
+                longType = Long.valueOf(object);
+            }
+            catch (Exception e)
+            {
+                throw new RuntimeException("'" + object + "' could not be translated into
a LongType.");
+            }
+
+            return FBUtilities.toByteArray(longType);
         }
         else if (comparator instanceof IntegerType)
         {
             final BigInteger integerType;
-            
+
             try
             {
-                integerType =  new BigInteger(column);
+                integerType =  new BigInteger(object);
             }
             catch (Exception e)
             {
-                throw new RuntimeException("Column name '" + column + "' could not be translated
into an IntegerType.");
+                throw new RuntimeException("'" + object + "' could not be translated into
an IntegerType.");
             }
-            
+
             return integerType.toByteArray();
         }
         else if (comparator instanceof AsciiType)
         {
-            return column.getBytes("US-ASCII");
+            return object.getBytes("US-ASCII");
         }
         else
         {
-            return column.getBytes("UTF-8");
-        }   
+            return object.getBytes("UTF-8");
+        }
+    }
+    
+    /**
+     * Converts column name into byte[] according to comparator type
+     * @param column - column name from parser
+     * @param columnFamily - column family name from parser
+     * @return byte[] - array of bytes in which column name was converted according to comparator
type
+     * @throws NoSuchFieldException - raised from getFormatTypeForColumn call
+     * @throws InstantiationException - raised from getFormatTypeForColumn call
+     * @throws IllegalAccessException - raised from getFormatTypeForColumn call
+     * @throws UnsupportedEncodingException - raised from getBytes() calls
+     */
+    private byte[] columnNameAsByteArray(final String column, final String columnFamily)
throws NoSuchFieldException, InstantiationException, IllegalAccessException, UnsupportedEncodingException
+    {
+        final CfDef columnFamilyDef   = getCfDef(columnFamily);
+        final String comparatorClass  = columnFamilyDef.comparator_type;
+
+        return getBytesAccordingToType(column, getFormatTypeForColumn(comparatorClass));
  
+    }
+
+    /**
+     * Converts column value into byte[] according to validation class
+     * @param columnName - column name to which value belongs
+     * @param columnFamilyName - column family name
+     * @param columnValue - actual column value
+     * @return byte[] - value in byte array representation
+     */
+    private byte[] columnValueAsByteArray(final byte[] columnName, final String columnFamilyName,
final String columnValue)
+    {
+        final CfDef columnFamilyDef = getCfDef(columnFamilyName);
+        
+        for (ColumnDef columnDefinition : columnFamilyDef.getColumn_metadata())
+        {
+            final byte[] currentColumnName = columnDefinition.getName();
+
+            if (Arrays.equals(currentColumnName, columnName))
+            {
+                try
+                {
+                    final String validationClass = columnDefinition.getValidation_class();
+                    return getBytesAccordingToType(columnValue, getFormatTypeForColumn(validationClass));
+                }
+                catch (Exception e)
+                {
+                    throw new RuntimeException(e.getMessage(), e);
+                }
+            }
+        }
+
+        // if no validation were set returning simple .getBytes()
+        return columnValue.getBytes();
+    }
+
+    /**
+     * Get validator for specific column value
+     * @param ColumnFamilyDef - CfDef object representing column family with metadata
+     * @param columnNameInBytes - column name as byte array
+     * @return AbstractType - validator for column value
+     */
+    private AbstractType getValidatorForValue(final CfDef ColumnFamilyDef, final byte[] columnNameInBytes)
+    {
+        for (ColumnDef columnDefinition : ColumnFamilyDef.getColumn_metadata())
+        {
+            final byte[] nameInBytes = columnDefinition.getName();
+
+            if (Arrays.equals(nameInBytes, columnNameInBytes))
+            {
+                try
+                {
+                    return getFormatTypeForColumn(columnDefinition.getValidation_class());
+                }
+                catch (Exception e)
+                {
+                    throw new RuntimeException(e.getMessage(), e);
+                }
+            }
+        }
+
+        return null;
     }
 }

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexSummary.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexSummary.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexSummary.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/IndexSummary.java Sat Oct  9
21:00:20 2010
@@ -30,24 +30,45 @@ import org.apache.cassandra.config.Datab
 import org.apache.cassandra.config.Config;
 import org.apache.cassandra.db.DecoratedKey;
 
+/**
+ * Two approaches to building an IndexSummary:
+ * 1. Call maybeAddEntry with every potential index entry
+ * 2. Call shouldAddEntry, [addEntry,] incrementRowid
+ */
 public class IndexSummary
 {
     private ArrayList<KeyPosition> indexPositions;
-    private int keysWritten = 0;
-    private long lastIndexPosition;
+    private long keysWritten = 0;
+
+    public IndexSummary(long expectedKeys)
+    {
+        long expectedEntries = expectedKeys / DatabaseDescriptor.getIndexInterval();
+        if (expectedEntries > Integer.MAX_VALUE)
+            // TODO: that's a _lot_ of keys, or a very low interval
+            throw new RuntimeException("Cannot use index_interval of " + DatabaseDescriptor.getIndexInterval()
+ " with " + expectedKeys + " (expected) keys.");
+        indexPositions = new ArrayList<KeyPosition>((int)expectedEntries);
+    }
+
+    public void incrementRowid()
+    {
+        keysWritten++;
+    }
+
+    public boolean shouldAddEntry()
+    {
+        return keysWritten % DatabaseDescriptor.getIndexInterval() == 0;
+    }
+
+    public void addEntry(DecoratedKey decoratedKey, long indexPosition)
+    {
+        indexPositions.add(new KeyPosition(decoratedKey, indexPosition));
+    }
 
     public void maybeAddEntry(DecoratedKey decoratedKey, long indexPosition)
     {
-        if (keysWritten++ % DatabaseDescriptor.getIndexInterval() == 0)
-        {
-            if (indexPositions == null)
-            {
-                indexPositions  = new ArrayList<KeyPosition>();
-            }
-            KeyPosition info = new KeyPosition(decoratedKey, indexPosition);
-            indexPositions.add(info);
-        }
-        lastIndexPosition = indexPosition;
+        if (shouldAddEntry())
+            addEntry(decoratedKey, indexPosition);
+        incrementRowid();
     }
 
     public List<KeyPosition> getIndexPositions()
@@ -60,18 +81,13 @@ public class IndexSummary
         indexPositions.trimToSize();
     }
 
-    public long getLastIndexPosition()
-    {
-        return lastIndexPosition;
-    }
-
     /**
      * This is a simple container for the index Key and its corresponding position
      * in the index file. Binary search is performed on a list of these objects
      * to find where to start looking for the index entry containing the data position
      * (which will be turned into a PositionSize object)
      */
-    public static class KeyPosition implements Comparable<KeyPosition>
+    public static final class KeyPosition implements Comparable<KeyPosition>
     {
         public final DecoratedKey key;
         public final long indexPosition;

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTable.java Sat Oct  9 21:00:20
2010
@@ -32,7 +32,9 @@ import org.slf4j.LoggerFactory;
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.dht.IPartitioner;
 import org.apache.cassandra.io.util.FileUtils;
+import org.apache.cassandra.io.util.BufferedRandomAccessFile;
 import org.apache.cassandra.utils.EstimatedHistogram;
+import org.apache.cassandra.utils.FBUtilities;
 import org.apache.cassandra.utils.Pair;
 
 /**
@@ -182,6 +184,42 @@ public abstract class SSTable
         return components;
     }
 
+    /** @return An estimate of the number of keys contained in the given data file. */
+    static long estimateRowsFromData(Descriptor desc, BufferedRandomAccessFile dfile) throws
IOException
+    {
+        // collect sizes for the first 1000 keys, or first 100 megabytes of data
+        final int SAMPLES_CAP = 1000, BYTES_CAP = (int)Math.min(100000000, dfile.length());
+        int keys = 0;
+        long dataPosition = 0;
+        while (dataPosition < BYTES_CAP && keys < SAMPLES_CAP)
+        {
+            dfile.seek(dataPosition);
+            FBUtilities.skipShortByteArray(dfile);
+            long dataSize = SSTableReader.readRowSize(dfile, desc);
+            dataPosition = dfile.getFilePointer() + dataSize;
+            keys++;
+        }
+        dfile.seek(0);
+        return dfile.length() / (dataPosition / keys);
+    }
+
+    /** @return An estimate of the number of keys contained in the given index file. */
+    static long estimateRowsFromIndex(BufferedRandomAccessFile ifile) throws IOException
+    {
+        // collect sizes for the first 10000 keys, or first 10 megabytes of data
+        final int SAMPLES_CAP = 10000, BYTES_CAP = (int)Math.min(10000000, ifile.length());
+        int keys = 0;
+        while (ifile.getFilePointer() < BYTES_CAP && keys < SAMPLES_CAP)
+        {
+            FBUtilities.skipShortByteArray(ifile);
+            ifile.skipBytes(8);
+            keys++;
+        }
+        long estimatedRows = ifile.length() / (ifile.getFilePointer() / keys);
+        ifile.seek(0);
+        return estimatedRows;
+    }
+
     public static long getTotalBytes(Iterable<SSTableReader> sstables)
     {
         long sum = 0;

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableReader.java Sat Oct  9
21:00:20 2010
@@ -252,11 +252,11 @@ public class SSTableReader extends SSTab
      */
     private void load(boolean recreatebloom, Set<DecoratedKey> keysToLoadInCache) throws
IOException
     {
+        boolean cacheLoading = keyCache != null && !keysToLoadInCache.isEmpty();
         SegmentedFile.Builder ibuilder = SegmentedFile.getBuilder(DatabaseDescriptor.getIndexAccessMode());
         SegmentedFile.Builder dbuilder = SegmentedFile.getBuilder(DatabaseDescriptor.getDiskAccessMode());
 
         // we read the positions in a BRAF so we don't have to worry about an entry spanning
a mmap boundary.
-        indexSummary = new IndexSummary();
         BufferedRandomAccessFile input = new BufferedRandomAccessFile(descriptor.filenameFor(Component.PRIMARY_INDEX),
"r");
         try
         {
@@ -264,25 +264,36 @@ public class SSTableReader extends SSTab
                 keyCache.updateCapacity(keyCache.getSize() + keysToLoadInCache.size());
 
             long indexSize = input.length();
+            long estimatedKeys = SSTable.estimateRowsFromIndex(input);
+            indexSummary = new IndexSummary(estimatedKeys);
             if (recreatebloom)
                 // estimate key count based on index length
-                bf = BloomFilter.getFilter((int)(input.length() / 32), 15);
+                bf = BloomFilter.getFilter(estimatedKeys, 15);
             while (true)
             {
                 long indexPosition = input.getFilePointer();
                 if (indexPosition == indexSize)
                     break;
 
-                DecoratedKey decoratedKey = decodeKey(partitioner, descriptor, FBUtilities.readShortByteArray(input));
-                if (recreatebloom)
-                    bf.add(decoratedKey.key);
+                boolean shouldAddEntry = indexSummary.shouldAddEntry();
+                byte[] key = (shouldAddEntry || cacheLoading || recreatebloom)
+                             ? FBUtilities.readShortByteArray(input)
+                             : FBUtilities.skipShortByteArray(input);
                 long dataPosition = input.readLong();
+                if (key != null)
+                {
+                    DecoratedKey decoratedKey = decodeKey(partitioner, descriptor, key);
+                    if (recreatebloom)
+                        bf.add(decoratedKey.key);
+                    if (shouldAddEntry)
+                        indexSummary.addEntry(decoratedKey, indexPosition);
+                    if (cacheLoading && keysToLoadInCache.contains(decoratedKey))
+                        keyCache.put(new Pair(descriptor, decoratedKey), dataPosition);
+                }
 
-                indexSummary.maybeAddEntry(decoratedKey, indexPosition);
+                indexSummary.incrementRowid();
                 ibuilder.addPotentialBoundary(indexPosition);
                 dbuilder.addPotentialBoundary(dataPosition);
-                if (keyCache != null && keysToLoadInCache.contains(decoratedKey))
-                    keyCache.put(new Pair<Descriptor, DecoratedKey>(descriptor, decoratedKey),
dataPosition);
             }
             indexSummary.complete();
         }
@@ -292,7 +303,6 @@ public class SSTableReader extends SSTab
         }
 
         // finalize the state of the reader
-        indexSummary.complete();
         ifile = ibuilder.complete(descriptor.filenameFor(Component.PRIMARY_INDEX));
         dfile = dbuilder.complete(descriptor.filenameFor(Component.DATA));
     }

Modified: cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java Sat Oct  9
21:00:20 2010
@@ -189,27 +189,6 @@ public class SSTableWriter extends SSTab
         return dataFile.getFilePointer();
     }
     
-    /**
-     * @return An estimate of the number of keys contained in the given data file.
-     */
-    private static long estimateRows(Descriptor desc, BufferedRandomAccessFile dfile) throws
IOException
-    {
-        // collect sizes for the first 1000 keys, or first 100 megabytes of data
-        final int SAMPLES_CAP = 1000, BYTES_CAP = (int)Math.min(100000000, dfile.length());
-        int keys = 0;
-        long dataPosition = 0;
-        while (dataPosition < BYTES_CAP && keys < SAMPLES_CAP)
-        {
-            dfile.seek(dataPosition);
-            FBUtilities.readShortByteArray(dfile);
-            long dataSize = SSTableReader.readRowSize(dfile, desc);
-            dataPosition = dfile.getFilePointer() + dataSize;
-            keys++;
-        }
-        dfile.seek(0);
-        return dfile.length() / (dataPosition / keys);
-    }
-
     public static Builder createBuilder(Descriptor desc)
     {
         if (!desc.isLatestVersion)
@@ -257,7 +236,7 @@ public class SSTableWriter extends SSTab
             long estimatedRows;
             try
             {
-                estimatedRows = estimateRows(desc, dfile);
+                estimatedRows = SSTable.estimateRowsFromData(desc, dfile);
                 iwriter = new IndexWriter(desc, StorageService.getPartitioner(), estimatedRows);
             }
             catch(IOException e)
@@ -340,7 +319,7 @@ public class SSTableWriter extends SSTab
             this.partitioner = part;
             indexFile = new BufferedRandomAccessFile(desc.filenameFor(SSTable.COMPONENT_INDEX),
"rw", 8 * 1024 * 1024);
             builder = SegmentedFile.getBuilder(DatabaseDescriptor.getIndexAccessMode());
-            summary = new IndexSummary();
+            summary = new IndexSummary(keyCount);
             bf = BloomFilter.getFilter(keyCount, 15);
         }
 

Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/FBUtilities.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/FBUtilities.java?rev=1006225&r1=1006224&r2=1006225&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/FBUtilities.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/FBUtilities.java Sat Oct  9 21:00:20
2010
@@ -292,18 +292,33 @@ public class FBUtilities
         }
     }
 
+    /** @return An unsigned short in an integer. */
+    private static int readShortLength(DataInput in) throws IOException
+    {
+        int length = (in.readByte() & 0xFF) << 8;
+        return length | (in.readByte() & 0xFF);
+    }
+
     public static byte[] readShortByteArray(DataInput in) throws IOException
     {
-        int length = 0;
-        length |= (in.readByte() & 0xFF) << 8;
-        length |= in.readByte() & 0xFF;
-        if (!(0 <= length && length <= MAX_UNSIGNED_SHORT))
-            throw new IOException("Corrupt name length " + length);
-        byte[] bytes = new byte[length];
+        byte[] bytes = new byte[readShortLength(in)];
         in.readFully(bytes);
         return bytes;
     }
 
+    /** @return null */
+    public static byte[] skipShortByteArray(DataInput in) throws IOException
+    {
+        int skip = readShortLength(in);
+        while (skip > 0)
+        {
+            int skipped = in.skipBytes(skip);
+            if (skipped == 0) throw new EOFException();
+            skip -= skipped;
+        }
+        return null;
+    }
+
     public static byte[] hexToBytes(String str)
     {
         if (str.length() % 2 == 1)



Mime
View raw message