cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From slebre...@apache.org
Subject [1/2] cassandra git commit: Properly handle range tombstones when reading old format sstables
Date Wed, 21 Oct 2015 13:46:33 GMT
Repository: cassandra
Updated Branches:
  refs/heads/trunk 05eb6022c -> 1b386c5d5


Properly handle range tombstones when reading old format sstables

patch by slebresne; reviewed by blambov for CASSANDRA-10360


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/1fe594d8
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/1fe594d8
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/1fe594d8

Branch: refs/heads/trunk
Commit: 1fe594d834bb3f8fa72db6a5d38ba5372f889d1b
Parents: c0a1cce
Author: Sylvain Lebresne <sylvain@datastax.com>
Authored: Fri Oct 2 10:39:09 2015 +0200
Committer: Sylvain Lebresne <sylvain@datastax.com>
Committed: Wed Oct 21 15:46:04 2015 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../org/apache/cassandra/db/LegacyLayout.java   |  50 --
 .../cassandra/db/UnfilteredDeserializer.java    | 468 ++++++++++++++-----
 .../columniterator/AbstractSSTableIterator.java |   4 +-
 .../columniterator/SSTableReversedIterator.java |  18 +-
 5 files changed, 361 insertions(+), 180 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/1fe594d8/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 0c5c9c5..0529dd8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0
+ * Fix handling of range tombstones when reading old format sstables (CASSANDRA-10360)
  * Aggregate with Initial Condition fails with C* 3.0 (CASSANDRA-10367)
 Merged from 2.2:
  * Expose phi values from failure detector via JMX and tweak debug

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1fe594d8/src/java/org/apache/cassandra/db/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java
index 194b6e8..6cfd5d9 100644
--- a/src/java/org/apache/cassandra/db/LegacyLayout.java
+++ b/src/java/org/apache/cassandra/db/LegacyLayout.java
@@ -2200,54 +2200,4 @@ public abstract class LegacyLayout
             return size;
         }
     }
-
-    public static class TombstoneTracker
-    {
-        private final CFMetaData metadata;
-        private final DeletionTime partitionDeletion;
-        private final List<LegacyRangeTombstone> openTombstones = new ArrayList<>();
-
-        public TombstoneTracker(CFMetaData metadata, DeletionTime partitionDeletion)
-        {
-            this.metadata = metadata;
-            this.partitionDeletion = partitionDeletion;
-        }
-
-        public void update(LegacyAtom atom)
-        {
-            if (atom.isCell())
-            {
-                if (openTombstones.isEmpty())
-                    return;
-
-                Iterator<LegacyRangeTombstone> iter = openTombstones.iterator();
-                while (iter.hasNext())
-                {
-                    LegacyRangeTombstone tombstone = iter.next();
-                    if (metadata.comparator.compare(atom.clustering(), tombstone.stop.bound)
>= 0)
-                        iter.remove();
-                }
-            }
-
-            LegacyRangeTombstone tombstone = atom.asRangeTombstone();
-            if (tombstone.deletionTime.supersedes(partitionDeletion) && !tombstone.isRowDeletion(metadata)
&& !tombstone.isCollectionTombstone())
-                openTombstones.add(tombstone);
-        }
-
-        public boolean isShadowed(LegacyAtom atom)
-        {
-            long timestamp = atom.isCell() ? atom.asCell().timestamp : atom.asRangeTombstone().deletionTime.markedForDeleteAt();
-
-            if (partitionDeletion.deletes(timestamp))
-                return true;
-
-            for (LegacyRangeTombstone tombstone : openTombstones)
-            {
-                if (tombstone.deletionTime.deletes(timestamp))
-                    return true;
-            }
-
-            return false;
-        }
-    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1fe594d8/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
index ef30289..52de159 100644
--- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
+++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
@@ -18,6 +18,11 @@
 package org.apache.cassandra.db;
 
 import java.io.IOException;
+import java.io.IOError;
+import java.util.*;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.PeekingIterator;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -218,18 +223,18 @@ public abstract class UnfilteredDeserializer
         private final boolean readAllAsDynamic;
         private boolean skipStatic;
 
-        private boolean isDone;
-        private boolean isStart = true;
-
-        private final LegacyLayout.CellGrouper grouper;
-        private LegacyLayout.LegacyAtom nextAtom;
+        // The next Unfiltered to return, computed by hasNext()
+        private Unfiltered next;
+        // A temporary storage for an unfiltered that isn't returned next but should be looked
at just afterwards
+        private Unfiltered saved;
 
-        private boolean staticFinished;
-        private LegacyLayout.LegacyAtom savedAtom;
+        private boolean isFirst = true;
 
-        private final LegacyLayout.TombstoneTracker tombstoneTracker;
+        // The Unfiltered as read from the old format input
+        private final UnfilteredIterator iterator;
 
-        private RangeTombstoneMarker closingMarker;
+        // Tracks which tombstone are opened at any given point of the deserialization. Note
that this
+        // is directly populated by UnfilteredIterator.
 
         private OldFormatDeserializer(CFMetaData metadata,
                                       DataInputPlus in,
@@ -238,9 +243,8 @@ public abstract class UnfilteredDeserializer
                                       boolean readAllAsDynamic)
         {
             super(metadata, in, helper);
+            this.iterator = new UnfilteredIterator(partitionDeletion);
             this.readAllAsDynamic = readAllAsDynamic;
-            this.grouper = new LegacyLayout.CellGrouper(metadata, helper);
-            this.tombstoneTracker = new LegacyLayout.TombstoneTracker(metadata, partitionDeletion);
         }
 
         public void setSkipStatic()
@@ -248,167 +252,381 @@ public abstract class UnfilteredDeserializer
             this.skipStatic = true;
         }
 
-        public boolean hasNext() throws IOException
+        private boolean isStatic(Unfiltered unfiltered)
         {
-            return nextAtom != null || (!isDone && deserializeNextAtom());
+            return unfiltered.isRow() && ((Row)unfiltered).isStatic();
         }
 
-        private boolean deserializeNextAtom() throws IOException
+        public boolean hasNext() throws IOException
         {
-            if (staticFinished && savedAtom != null)
+            try
             {
-                nextAtom = savedAtom;
-                savedAtom = null;
-                return true;
-            }
-
-            while (true)
-            {
-                nextAtom = LegacyLayout.readLegacyAtom(metadata, in, readAllAsDynamic);
-                if (nextAtom == null)
+                while (next == null)
                 {
-                    isDone = true;
-                    return false;
-                }
-                else if (tombstoneTracker.isShadowed(nextAtom))
-                {
-                    // We don't want to return shadowed data because that would fail the
contract
-                    // of UnfilteredRowIterator. However the old format could have shadowed
data, so filter it here.
-                    nextAtom = null;
-                    continue;
-                }
+                    if (saved == null && !iterator.hasNext())
+                        return false;
 
-                tombstoneTracker.update(nextAtom);
+                    next = saved == null ? iterator.next() : saved;
+                    saved = null;
 
-                // For static compact tables, the "column_metadata" columns are supposed
to be static, but in the old
-                // format they are intermingled with other columns. We deal with that with
2 different strategy:
-                //  1) for thrift queries, we basically consider everything as a "dynamic"
cell. This is ok because
-                //     that's basically what we end up with on ThriftResultsMerger has done
its thing.
-                //  2) otherwise, we make sure to extract the "static" columns first (see
AbstractSSTableIterator.readStaticRow
-                //     and SSTableSimpleIterator.readStaticRow) as a first pass. So, when
we do a 2nd pass for dynamic columns
-                //     (which in practice we only do for compactions), we want to ignore
those extracted static columns.
-                if (skipStatic && metadata.isStaticCompactTable() && nextAtom.isCell())
-                {
-                    LegacyLayout.LegacyCell cell = nextAtom.asCell();
-                    if (cell.name.column.isStatic())
+                    // The sstable iterators assume that if there is one, the static row
is the first thing this deserializer will return.
+                    // However, in the old format, a range tombstone with an empty start
would sort before any static cell. So we should
+                    // detect that case and return the static parts first if necessary.
+                    if (isFirst && iterator.hasNext() && isStatic(iterator.peek()))
                     {
-                        nextAtom = null;
-                        continue;
+                        saved = next;
+                        next = iterator.next();
                     }
-                }
+                    isFirst = false;
 
-                // We want to fetch the static row as the first thing this deserializer return.
-                // However, in practice, it's possible to have range tombstone before the
static row cells
-                // if that tombstone has an empty start. So if we do, we save it initially
so we can get
-                // to the static parts (if there is any).
-                if (isStart)
-                {
-                    isStart = false;
-                    if (!nextAtom.isCell())
-                    {
-                        LegacyLayout.LegacyRangeTombstone tombstone = nextAtom.asRangeTombstone();
-                        if (tombstone.start.bound.size() == 0)
-                        {
-                            savedAtom = tombstone;
-                            nextAtom = LegacyLayout.readLegacyAtom(metadata, in, readAllAsDynamic);
-                            if (nextAtom == null)
-                            {
-                                // That was actually the only atom so use it after all
-                                nextAtom = savedAtom;
-                                savedAtom = null;
-                            }
-                            else if (!nextAtom.isStatic())
-                            {
-                                // We don't have anything static. So we do want to send first
-                                // the saved atom, so switch
-                                LegacyLayout.LegacyAtom atom = nextAtom;
-                                nextAtom = savedAtom;
-                                savedAtom = atom;
-                            }
-                        }
-                    }
+                    // When reading old tables, we sometimes want to skip static data (due
to how staticly defined column of compact
+                    // tables are handled).
+                    if (skipStatic && isStatic(next))
+                        next = null;
                 }
-
                 return true;
             }
+            catch (IOError e)
+            {
+                if (e.getCause() != null && e.getCause() instanceof IOException)
+                    throw (IOException)e.getCause();
+                throw e;
+            }
         }
 
-        private void checkReady() throws IOException
+        private boolean isRow(LegacyLayout.LegacyAtom atom)
         {
-            if (nextAtom == null)
-                hasNext();
-            assert !isDone;
+            if (atom.isCell())
+                return true;
+
+            LegacyLayout.LegacyRangeTombstone tombstone = atom.asRangeTombstone();
+            return tombstone.isCollectionTombstone() || tombstone.isRowDeletion(metadata);
         }
 
         public int compareNextTo(Slice.Bound bound) throws IOException
         {
-            checkReady();
-            int cmp = metadata.comparator.compare(nextAtom.clustering(), bound);
-            if (cmp != 0 || nextAtom.isCell() || !nextIsRow())
-                return cmp;
-
-            // Comparing the clustering of the LegacyAtom to the bound work most of the time.
There is the case
-            // of LegacyRangeTombstone that are either a collectionTombstone or a rowDeletion.
In those case, their
-            // clustering will be the inclusive start of the row they are a tombstone for,
which can be equal to
-            // the slice bound. But we don't want to return equality because the LegacyTombstone
should stand for
-            // it's row and should sort accordingly. This matter particularly because SSTableIterator
will skip
-            // equal results for the start bound (see SSTableIterator.handlePreSliceData
for details).
-            return bound.isStart() ? 1 : -1;
+            if (!hasNext())
+                throw new IllegalStateException();
+            return metadata.comparator.compare(next.clustering(), bound);
         }
 
         public boolean nextIsRow() throws IOException
         {
-            checkReady();
-            if (nextAtom.isCell())
-                return true;
-
-            LegacyLayout.LegacyRangeTombstone tombstone = nextAtom.asRangeTombstone();
-            return tombstone.isCollectionTombstone() || tombstone.isRowDeletion(metadata);
+            if (!hasNext())
+                throw new IllegalStateException();
+            return next.isRow();
         }
 
         public boolean nextIsStatic() throws IOException
         {
-            checkReady();
-            return nextAtom.isStatic();
+            return nextIsRow() && ((Row)next).isStatic();
         }
 
         public Unfiltered readNext() throws IOException
         {
-            if (!nextIsRow())
+            if (!hasNext())
+                throw new IllegalStateException();
+            Unfiltered toReturn = next;
+            next = null;
+            return toReturn;
+        }
+
+        public void skipNext() throws IOException
+        {
+            if (!hasNext())
+                throw new UnsupportedOperationException();
+            next = null;
+        }
+
+        public void clearState()
+        {
+            next = null;
+            saved = null;
+            iterator.clearState();
+        }
+
+        // Groups atoms from the input into proper Unfiltered.
+        // Note: this could use guava AbstractIterator except that we want to be able to
clear
+        // the internal state of the iterator so it's cleaner to do it ourselves.
+        private class UnfilteredIterator implements PeekingIterator<Unfiltered>
+        {
+            private final AtomIterator atoms;
+            private final LegacyLayout.CellGrouper grouper;
+            private final TombstoneTracker tombstoneTracker;
+
+            private Unfiltered next;
+
+            private UnfilteredIterator(DeletionTime partitionDeletion)
+            {
+                this.grouper = new LegacyLayout.CellGrouper(metadata, helper);
+                this.tombstoneTracker = new TombstoneTracker(partitionDeletion);
+                this.atoms = new AtomIterator(tombstoneTracker);
+            }
+
+            public boolean hasNext()
+            {
+                // Note that we loop on next == null because TombstoneTracker.openNew() could
return null below.
+                while (next == null)
+                {
+                    if (atoms.hasNext())
+                    {
+                        // If a range tombstone closes strictly before the next row/RT, we
need to return that close (or boundary) marker first.
+                        if (tombstoneTracker.hasClosingMarkerBefore(atoms.peek()))
+                        {
+                            next = tombstoneTracker.popClosingMarker();
+                        }
+                        else
+                        {
+                            LegacyLayout.LegacyAtom atom = atoms.next();
+                            next = isRow(atom) ? readRow(atom) : tombstoneTracker.openNew(atom.asRangeTombstone());
+                        }
+                    }
+                    else if (tombstoneTracker.hasOpenTombstones())
+                    {
+                        next = tombstoneTracker.popClosingMarker();
+                    }
+                    else
+                    {
+                        return false;
+                    }
+                }
+                return next != null;
+            }
+
+            private Unfiltered readRow(LegacyLayout.LegacyAtom first)
+            {
+                LegacyLayout.CellGrouper grouper = first.isStatic()
+                                                 ? LegacyLayout.CellGrouper.staticGrouper(metadata,
helper)
+                                                 : this.grouper;
+                grouper.reset();
+                grouper.addAtom(first);
+                // As long as atoms are part of the same row, consume them. Note that the
call to addAtom() uses
+                // atoms.peek() so that the atom is only consumed (by next) if it's part
of the row (addAtom returns true)
+                while (atoms.hasNext() && grouper.addAtom(atoms.peek()))
+                {
+                    atoms.next();
+                }
+                return grouper.getRow();
+            }
+
+            public Unfiltered next()
             {
-                LegacyLayout.LegacyRangeTombstone tombstone = nextAtom.asRangeTombstone();
-                // TODO: this is actually more complex, we can have repeated markers etc....
-                if (closingMarker == null)
+                if (!hasNext())
                     throw new UnsupportedOperationException();
-                closingMarker = new RangeTombstoneBoundMarker(tombstone.stop.bound, tombstone.deletionTime);
-                return new RangeTombstoneBoundMarker(tombstone.start.bound, tombstone.deletionTime);
+                Unfiltered toReturn = next;
+                next = null;
+                return toReturn;
             }
 
-            LegacyLayout.CellGrouper grouper = nextAtom.isStatic()
-                                             ? LegacyLayout.CellGrouper.staticGrouper(metadata,
helper)
-                                             : this.grouper;
+            public Unfiltered peek()
+            {
+                if (!hasNext())
+                    throw new UnsupportedOperationException();
+                return next;
+            }
 
-            grouper.reset();
-            grouper.addAtom(nextAtom);
-            while (deserializeNextAtom() && grouper.addAtom(nextAtom))
+            public void clearState()
             {
-                // Nothing to do, deserializeNextAtom() changes nextAtom and it's then added
to the grouper
+                atoms.clearState();
+                tombstoneTracker.clearState();
+                next = null;
             }
 
-            // if this was the first static row, we're done with it. Otherwise, we're also
done with static.
-            staticFinished = true;
-            return grouper.getRow();
+            public void remove()
+            {
+                throw new UnsupportedOperationException();
+            }
         }
 
-        public void skipNext() throws IOException
+        // Wraps the input of the deserializer to provide an iterator (and skip shadowed
atoms).
+        // Note: this could use guava AbstractIterator except that we want to be able to
clear
+        // the internal state of the iterator so it's cleaner to do it ourselves.
+        private class AtomIterator implements PeekingIterator<LegacyLayout.LegacyAtom>
         {
-            readNext();
+            private final TombstoneTracker tombstoneTracker;
+            private boolean isDone;
+            private LegacyLayout.LegacyAtom next;
+
+            private AtomIterator(TombstoneTracker tombstoneTracker)
+            {
+                this.tombstoneTracker = tombstoneTracker;
+            }
+
+            public boolean hasNext()
+            {
+                if (isDone)
+                    return false;
+
+                while (next == null)
+                {
+                    next = readAtom();
+                    if (next == null)
+                    {
+                        isDone = true;
+                        return false;
+                    }
+
+                    if (tombstoneTracker.isShadowed(next))
+                        next = null;
+                }
+                return true;
+            }
+
+            private LegacyLayout.LegacyAtom readAtom()
+            {
+                try
+                {
+                    return LegacyLayout.readLegacyAtom(metadata, in, readAllAsDynamic);
+                }
+                catch (IOException e)
+                {
+                    throw new IOError(e);
+                }
+            }
+
+            public LegacyLayout.LegacyAtom next()
+            {
+                if (!hasNext())
+                    throw new UnsupportedOperationException();
+                LegacyLayout.LegacyAtom toReturn = next;
+                next = null;
+                return toReturn;
+            }
+
+            public LegacyLayout.LegacyAtom peek()
+            {
+                if (!hasNext())
+                    throw new UnsupportedOperationException();
+                return next;
+            }
+
+            public void clearState()
+            {
+                this.next = null;
+                this.isDone = false;
+            }
+
+            public void remove()
+            {
+                throw new UnsupportedOperationException();
+            }
         }
 
-        public void clearState()
+        /**
+         * Tracks which range tombstones are open when deserializing the old format.
+         */
+        private class TombstoneTracker
         {
-            isDone = false;
-            nextAtom = null;
+            private final DeletionTime partitionDeletion;
+
+            // Open tombstones sorted by their closing bound (i.e. first tombstone is the
first to close).
+            // As we only track non-fully-shadowed ranges, the first range is necessarily
the currently
+            // open tombstone (the one with the higher timestamp).
+            private final SortedSet<LegacyLayout.LegacyRangeTombstone> openTombstones;
+
+            public TombstoneTracker(DeletionTime partitionDeletion)
+            {
+                this.partitionDeletion = partitionDeletion;
+                this.openTombstones = new TreeSet<>((rt1, rt2) -> metadata.comparator.compare(rt1.stop.bound,
rt2.stop.bound));
+            }
+
+            /**
+             * Checks if the provided atom is fully shadowed by the open tombstones of this
tracker (or the partition deletion).
+             */
+            public boolean isShadowed(LegacyLayout.LegacyAtom atom)
+            {
+                long timestamp = atom.isCell() ? atom.asCell().timestamp : atom.asRangeTombstone().deletionTime.markedForDeleteAt();
+
+                if (partitionDeletion.deletes(timestamp))
+                    return true;
+
+                SortedSet<LegacyLayout.LegacyRangeTombstone> coveringTombstones = isRow(atom)
? openTombstones : openTombstones.tailSet(atom.asRangeTombstone());
+                return Iterables.any(coveringTombstones, tombstone -> tombstone.deletionTime.deletes(timestamp));
+            }
+
+            /**
+             * Whether the currently open marker closes stricly before the provided row/RT.
+             */
+            public boolean hasClosingMarkerBefore(LegacyLayout.LegacyAtom atom)
+            {
+                return !openTombstones.isEmpty()
+                    && metadata.comparator.compare(openTombstones.first().stop.bound,
atom.clustering()) < 0;
+            }
+
+            /**
+             * Returns the unfiltered corresponding to closing the currently open marker
(and update the tracker accordingly).
+             */
+            public Unfiltered popClosingMarker()
+            {
+                assert !openTombstones.isEmpty();
+
+                Iterator<LegacyLayout.LegacyRangeTombstone> iter = openTombstones.iterator();
+                LegacyLayout.LegacyRangeTombstone first = iter.next();
+                iter.remove();
+
+                // If that was the last open tombstone, we just want to close it. Otherwise,
we have a boundary with the
+                // next tombstone
+                if (!iter.hasNext())
+                    return new RangeTombstoneBoundMarker(first.stop.bound, first.deletionTime);
+
+                LegacyLayout.LegacyRangeTombstone next = iter.next();
+                return RangeTombstoneBoundaryMarker.makeBoundary(false, first.stop.bound,
first.stop.bound.invert(), first.deletionTime, next.deletionTime);
+            }
+
+            /**
+             * Update the tracker given the provided newly open tombstone. This return the
Unfiltered corresponding to the opening
+             * of said tombstone: this can be a simple open mark, a boundary (if there was
an open tombstone superseded by this new one)
+             * or even null (if the new tombston start is supersedes by the currently open
tombstone).
+             *
+             * Note that this method assume the added tombstone is not fully shadowed, i.e.
that !isShadowed(tombstone). It also
+             * assumes no opened tombstone closes before that tombstone (so !hasClosingMarkerBefore(tombstone)).
+             */
+            public Unfiltered openNew(LegacyLayout.LegacyRangeTombstone tombstone)
+            {
+                if (openTombstones.isEmpty())
+                {
+                    openTombstones.add(tombstone);
+                    return new RangeTombstoneBoundMarker(tombstone.start.bound, tombstone.deletionTime);
+                }
+
+                Iterator<LegacyLayout.LegacyRangeTombstone> iter = openTombstones.iterator();
+                LegacyLayout.LegacyRangeTombstone first = iter.next();
+                if (tombstone.deletionTime.supersedes(first.deletionTime))
+                {
+                    // We're supperseding the currently open tombstone, so we should produce
a boundary that close the currently open
+                    // one and open the new one. We should also add the tombstone, but if
it stop after the first one, we should
+                    // also remove that first tombstone as it won't be useful anymore.
+                    if (metadata.comparator.compare(tombstone.stop.bound, first.stop.bound)
>= 0)
+                        iter.remove();
+
+                    openTombstones.add(tombstone);
+                    return RangeTombstoneBoundaryMarker.makeBoundary(false, tombstone.start.bound.invert(),
tombstone.start.bound, first.deletionTime, tombstone.deletionTime);
+                }
+                else
+                {
+                    // If the new tombstone don't supersedes the currently open tombstone,
we don't have anything to return, we
+                    // just add the new tombstone (because we know tombstone is not fully
shadowed, this imply the new tombstone
+                    // simply extend after the first one and we'll deal with it later)
+                    assert metadata.comparator.compare(tombstone.start.bound, first.stop.bound)
> 0;
+                    openTombstones.add(tombstone);
+                    return null;
+                }
+            }
+
+            public boolean hasOpenTombstones()
+            {
+                return !openTombstones.isEmpty();
+            }
+
+            private boolean formBoundary(LegacyLayout.LegacyRangeTombstone close, LegacyLayout.LegacyRangeTombstone
open)
+            {
+                return metadata.comparator.compare(close.stop.bound, open.start.bound) ==
0;
+            }
+
+            public void clearState()
+            {
+                openTombstones.clear();
+            }
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1fe594d8/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java
b/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java
index 837f0a0..8900b31 100644
--- a/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java
@@ -323,7 +323,6 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator
             else
             {
                 file.seek(position);
-                deserializer.clearState();
             }
         }
 
@@ -438,7 +437,10 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator
         public void setToBlock(int blockIdx) throws IOException
         {
             if (blockIdx >= 0 && blockIdx < indexes.size())
+            {
                 reader.seekToPosition(columnOffset(blockIdx));
+                reader.deserializer.clearState();
+            }
 
             currentIndexIdx = blockIdx;
             reader.openMarker = blockIdx > 0 ? indexes.get(blockIdx - 1).endOpenMarker
: null;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1fe594d8/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
index 06855e3..01a8fb2 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
@@ -155,17 +155,23 @@ public class SSTableReversedIterator extends AbstractSSTableIterator
             buffer.reset();
 
             boolean isFirst = true;
+            boolean isDone = false;
 
             // If the start might be in this block, skip everything that comes before it.
             if (start != null)
             {
-                while (deserializer.hasNext() && deserializer.compareNextTo(start)
<= 0 && !stopReadingDisk())
+                while (!isDone && deserializer.hasNext() && deserializer.compareNextTo(start)
<= 0)
                 {
                     isFirst = false;
                     if (deserializer.nextIsRow())
                         deserializer.skipNext();
                     else
                         updateOpenMarker((RangeTombstoneMarker)deserializer.readNext());
+
+                    // Note that because 'deserializer.hasNext()' may advance our file pointer,
we need to always check stopReadingDisk() before any call to it,
+                    // i.e. just after we've called readNext/skipNext
+                    if (stopReadingDisk())
+                        isDone = true;
                 }
             }
 
@@ -177,14 +183,17 @@ public class SSTableReversedIterator extends AbstractSSTableIterator
             }
 
             // Now deserialize everything until we reach our requested end (if we have one)
-            while (deserializer.hasNext()
-                   && (end == null || deserializer.compareNextTo(end) <= 0)
-                   && !stopReadingDisk())
+            while (!isDone
+                   && deserializer.hasNext()
+                   && (end == null || deserializer.compareNextTo(end) <= 0))
             {
                 Unfiltered unfiltered = deserializer.readNext();
                 if (!isFirst || includeFirst)
                     buffer.add(unfiltered);
 
+                if (stopReadingDisk())
+                    isDone = true;
+
                 isFirst = false;
 
                 if (unfiltered.isRangeTombstoneMarker())
@@ -317,6 +326,7 @@ public class SSTableReversedIterator extends AbstractSSTableIterator
                 ClusteringPrefix firstOfCurrent = indexState.index(currentBlock).firstName;
                 includeFirst = metadata().comparator.compare(lastOfPrevious, firstOfCurrent)
!= 0;
             }
+
             loadFromDisk(canIncludeSliceStart ? slice.start() : null, canIncludeSliceEnd
? slice.end() : null, includeFirst);
         }
 


Mime
View raw message