kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject [1/3] kudu git commit: KUDU-1677. Scanners can return out-of-order rows during a compaction
Date Tue, 08 Nov 2016 16:54:13 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 25d2efb06 -> 141de3377


KUDU-1677. Scanners can return out-of-order rows during a compaction

This fixes a bug where a scanner could return out-of-order rows if
started during the "duplicating" phase of a compaction.

The issue was that DuplicatingRowSet would create a UnionIterator rather
than a MergeIterator despite the fact that the root iterator was
supposed to be in an ordered mode.

Fixing this involved passing the OrderMode down into the NewRowIterator
call. This argument is only used by the DuplicatingRowSet for now, but
may be useful for other rowset types down the road (e.g. the MemRowSet
might be able to scan faster if it could scan in insertion order instead
of in key order).

I also removed the redundant Tablet::OrderMode enum and instead just
passed the protobuf-based enum (from common.proto) all the way through,
which simplified things a little bit.

Change-Id: I6d5c373e2d56039a3b8be272cb4fea3d5d840e5b
Reviewed-on: http://gerrit.cloudera.org:8080/4983
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f65924de
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f65924de
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f65924de

Branch: refs/heads/master
Commit: f65924de7b2b707a5a248e63c202dcc8a4540837
Parents: 25d2efb
Author: Todd Lipcon <todd@apache.org>
Authored: Mon Nov 7 16:08:50 2016 -0800
Committer: Todd Lipcon <todd@apache.org>
Committed: Tue Nov 8 02:07:23 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/diskrowset-test-base.h          |   6 +-
 src/kudu/tablet/diskrowset-test.cc              |   4 +-
 src/kudu/tablet/diskrowset.cc                   |   1 +
 src/kudu/tablet/diskrowset.h                    |   3 +-
 src/kudu/tablet/major_delta_compaction-test.cc  |   5 +-
 src/kudu/tablet/memrowset.cc                    |   1 +
 src/kudu/tablet/memrowset.h                     |   1 +
 src/kudu/tablet/mock-rowsets.h                  |   7 +-
 .../tablet/mt-rowset_delta_compaction-test.cc   |   1 +
 src/kudu/tablet/rowset.cc                       |  37 ++++---
 src/kudu/tablet/rowset.h                        |   2 +
 src/kudu/tablet/tablet-test-base.h              |   9 +-
 src/kudu/tablet/tablet-test-util.h              |   9 +-
 src/kudu/tablet/tablet-test.cc                  | 100 +++++++++++++------
 src/kudu/tablet/tablet.cc                       |  11 +-
 src/kudu/tablet/tablet.h                        |   7 +-
 src/kudu/tablet/tablet_bootstrap-test.cc        |   2 +-
 src/kudu/tserver/tablet_service.cc              |   9 +-
 18 files changed, 129 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/diskrowset-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset-test-base.h b/src/kudu/tablet/diskrowset-test-base.h
index 41fd79e..6245fc8 100644
--- a/src/kudu/tablet/diskrowset-test-base.h
+++ b/src/kudu/tablet/diskrowset-test-base.h
@@ -216,7 +216,7 @@ class TestRowSet : public KuduRowSetTest {
     Schema proj_val = CreateProjection(schema_, { "val" });
     MvccSnapshot snap = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
     gscoped_ptr<RowwiseIterator> row_iter;
-    CHECK_OK(rs.NewRowIterator(&proj_val, snap, &row_iter));
+    CHECK_OK(rs.NewRowIterator(&proj_val, snap, UNORDERED, &row_iter));
     CHECK_OK(row_iter->Init(NULL));
     Arena arena(1024, 1024*1024);
     int batch_size = 10000;
@@ -263,7 +263,7 @@ class TestRowSet : public KuduRowSetTest {
 
     MvccSnapshot snap = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
     gscoped_ptr<RowwiseIterator> row_iter;
-    CHECK_OK(rs.NewRowIterator(&schema_, snap, &row_iter));
+    CHECK_OK(rs.NewRowIterator(&schema_, snap, UNORDERED, &row_iter));
     CHECK_OK(row_iter->Init(&spec));
     vector<string> rows;
     IterateToStringList(row_iter.get(), &rows);
@@ -277,7 +277,7 @@ class TestRowSet : public KuduRowSetTest {
                                 int expected_rows, bool do_log = true) {
     MvccSnapshot snap = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
     gscoped_ptr<RowwiseIterator> row_iter;
-    CHECK_OK(rs.NewRowIterator(&schema, snap, &row_iter));
+    CHECK_OK(rs.NewRowIterator(&schema, snap, UNORDERED, &row_iter));
     CHECK_OK(row_iter->Init(NULL));
 
     int batch_size = 1000;

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/diskrowset-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset-test.cc b/src/kudu/tablet/diskrowset-test.cc
index fa36df4..bc5396d 100644
--- a/src/kudu/tablet/diskrowset-test.cc
+++ b/src/kudu/tablet/diskrowset-test.cc
@@ -361,7 +361,7 @@ TEST_F(TestRowSet, TestFlushedUpdatesRespectMVCC) {
   for (int i = 0; i < 5; i++) {
     SCOPED_TRACE(i);
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(rs->NewRowIterator(&schema_, snaps[i], &iter));
+    ASSERT_OK(rs->NewRowIterator(&schema_, snaps[i], UNORDERED, &iter));
     string data = InitAndDumpIterator(std::move(iter));
     EXPECT_EQ(StringPrintf("(string key=row, uint32 val=%d)", i + 1), data);
   }
@@ -373,7 +373,7 @@ TEST_F(TestRowSet, TestFlushedUpdatesRespectMVCC) {
   for (int i = 0; i < 5; i++) {
     SCOPED_TRACE(i);
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(rs->NewRowIterator(&schema_, snaps[i], &iter));
+    ASSERT_OK(rs->NewRowIterator(&schema_, snaps[i], UNORDERED, &iter));
     string data = InitAndDumpIterator(std::move(iter));
     EXPECT_EQ(StringPrintf("(string key=row, uint32 val=%d)", i + 1), data);
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/diskrowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index 16c7031..7cd312a 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -580,6 +580,7 @@ Status DiskRowSet::NewMajorDeltaCompaction(const vector<ColumnId>&
col_ids,
 
 Status DiskRowSet::NewRowIterator(const Schema *projection,
                                   const MvccSnapshot &mvcc_snap,
+                                  OrderMode /*order*/,
                                   gscoped_ptr<RowwiseIterator>* out) const {
   DCHECK(open_);
   shared_lock<rw_spinlock> l(component_lock_.get_lock());

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/diskrowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h
index feb5afd..2398c77 100644
--- a/src/kudu/tablet/diskrowset.h
+++ b/src/kudu/tablet/diskrowset.h
@@ -313,7 +313,8 @@ class DiskRowSet : public RowSet {
   // Read functions.
   ////////////////////
   virtual Status NewRowIterator(const Schema *projection,
-                                const MvccSnapshot &snap,
+                                const MvccSnapshot &mvcc_snap,
+                                OrderMode order,
                                 gscoped_ptr<RowwiseIterator>* out) const OVERRIDE;
 
   virtual Status NewCompactionInput(const Schema* projection,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/major_delta_compaction-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/major_delta_compaction-test.cc b/src/kudu/tablet/major_delta_compaction-test.cc
index 0c609c2..7f7276e 100644
--- a/src/kudu/tablet/major_delta_compaction-test.cc
+++ b/src/kudu/tablet/major_delta_compaction-test.cc
@@ -142,11 +142,10 @@ class TestMajorDeltaCompaction : public KuduRowSetTest {
     VerifyDataWithMvccAndExpectedState(snap, expected_state_);
   }
 
-  void VerifyDataWithMvccAndExpectedState(MvccSnapshot& snap,
+  void VerifyDataWithMvccAndExpectedState(const MvccSnapshot& snap,
                                           const vector<ExpectedRow>& passed_expected_state)
{
       gscoped_ptr<RowwiseIterator> row_iter;
-      ASSERT_OK(tablet()->NewRowIterator(client_schema_, snap,
-                                                Tablet::UNORDERED, &row_iter));
+      ASSERT_OK(tablet()->NewRowIterator(client_schema_, snap, UNORDERED, &row_iter));
       ASSERT_OK(row_iter->Init(nullptr));
 
       vector<string> results;

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/memrowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/memrowset.cc b/src/kudu/tablet/memrowset.cc
index 2ea5e60..11c4e0c 100644
--- a/src/kudu/tablet/memrowset.cc
+++ b/src/kudu/tablet/memrowset.cc
@@ -298,6 +298,7 @@ MemRowSet::Iterator *MemRowSet::NewIterator() const {
 
 Status MemRowSet::NewRowIterator(const Schema *projection,
                                  const MvccSnapshot &snap,
+                                 OrderMode /*order*/,
                                  gscoped_ptr<RowwiseIterator>* out) const {
   out->reset(NewIterator(projection, snap));
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/memrowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/memrowset.h b/src/kudu/tablet/memrowset.h
index 8f81110..dc7e53d 100644
--- a/src/kudu/tablet/memrowset.h
+++ b/src/kudu/tablet/memrowset.h
@@ -277,6 +277,7 @@ class MemRowSet : public RowSet,
   // Alias to conform to DiskRowSet interface
   virtual Status NewRowIterator(const Schema* projection,
                                 const MvccSnapshot& snap,
+                                OrderMode order,
                                 gscoped_ptr<RowwiseIterator>* out) const OVERRIDE;
 
   // Create compaction input.

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/mock-rowsets.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/mock-rowsets.h b/src/kudu/tablet/mock-rowsets.h
index cc7f0f4..2b5622d 100644
--- a/src/kudu/tablet/mock-rowsets.h
+++ b/src/kudu/tablet/mock-rowsets.h
@@ -46,9 +46,10 @@ class MockRowSet : public RowSet {
     LOG(FATAL) << "Unimplemented";
     return Status::OK();
   }
-  virtual Status NewRowIterator(const Schema *projection,
-                                const MvccSnapshot &snap,
-                                gscoped_ptr<RowwiseIterator>* out) const OVERRIDE {
+  virtual Status NewRowIterator(const Schema* /*projection*/,
+                                const MvccSnapshot& /*snap*/,
+                                OrderMode /*order*/,
+                                gscoped_ptr<RowwiseIterator>* /*out*/) const OVERRIDE
{
     LOG(FATAL) << "Unimplemented";
     return Status::OK();
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/mt-rowset_delta_compaction-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/mt-rowset_delta_compaction-test.cc b/src/kudu/tablet/mt-rowset_delta_compaction-test.cc
index ac710a9..e159ac0 100644
--- a/src/kudu/tablet/mt-rowset_delta_compaction-test.cc
+++ b/src/kudu/tablet/mt-rowset_delta_compaction-test.cc
@@ -92,6 +92,7 @@ class TestMultiThreadedRowSetDeltaCompaction : public TestRowSet {
     gscoped_ptr<RowwiseIterator> iter;
     ASSERT_OK(rs->NewRowIterator(&schema_,
                                  MvccSnapshot::CreateSnapshotIncludingAllTransactions(),
+                                 UNORDERED,
                                  &iter));
     uint32_t expected = NoBarrier_Load(&update_counter_);
     ASSERT_OK(iter->Init(nullptr));

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/rowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset.cc b/src/kudu/tablet/rowset.cc
index 71a686d..73adb0c 100644
--- a/src/kudu/tablet/rowset.cc
+++ b/src/kudu/tablet/rowset.cc
@@ -71,25 +71,34 @@ string DuplicatingRowSet::ToString() const {
 
 Status DuplicatingRowSet::NewRowIterator(const Schema *projection,
                                          const MvccSnapshot &snap,
+                                         OrderMode order,
                                          gscoped_ptr<RowwiseIterator>* out) const {
   // Use the original rowset.
   if (old_rowsets_.size() == 1) {
-    return old_rowsets_[0]->NewRowIterator(projection, snap, out);
-  } else {
-    // Union between them
-
-    vector<shared_ptr<RowwiseIterator> > iters;
-    for (const shared_ptr<RowSet> &rowset : old_rowsets_) {
-      gscoped_ptr<RowwiseIterator> iter;
-      RETURN_NOT_OK_PREPEND(rowset->NewRowIterator(projection, snap, &iter),
-                            Substitute("Could not create iterator for rowset $0",
-                                       rowset->ToString()));
-      iters.push_back(shared_ptr<RowwiseIterator>(iter.release()));
-    }
+    return old_rowsets_[0]->NewRowIterator(projection, snap, order, out);
+  }
+  // Union or merge between them
+
+  vector<shared_ptr<RowwiseIterator> > iters;
+  for (const shared_ptr<RowSet> &rowset : old_rowsets_) {
+    gscoped_ptr<RowwiseIterator> iter;
+    RETURN_NOT_OK_PREPEND(rowset->NewRowIterator(projection, snap, order, &iter),
+                          Substitute("Could not create iterator for rowset $0",
+                                     rowset->ToString()));
+    iters.push_back(shared_ptr<RowwiseIterator>(iter.release()));
+  }
 
-    out->reset(new UnionIterator(iters));
-    return Status::OK();
+  switch (order) {
+    case ORDERED:
+      out->reset(new MergeIterator(*projection, iters));
+      break;
+    case UNORDERED:
+      out->reset(new UnionIterator(iters));
+      break;
+    default:
+      LOG(FATAL) << "unknown order: " << order;
   }
+  return Status::OK();
 }
 
 Status DuplicatingRowSet::NewCompactionInput(const Schema* projection,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/rowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset.h b/src/kudu/tablet/rowset.h
index 6e55bf0..094e76c 100644
--- a/src/kudu/tablet/rowset.h
+++ b/src/kudu/tablet/rowset.h
@@ -86,6 +86,7 @@ class RowSet {
   // The returned iterator is not Initted.
   virtual Status NewRowIterator(const Schema *projection,
                                 const MvccSnapshot &snap,
+                                OrderMode order,
                                 gscoped_ptr<RowwiseIterator>* out) const = 0;
 
   // Create the input to be used for a compaction.
@@ -263,6 +264,7 @@ class DuplicatingRowSet : public RowSet {
 
   virtual Status NewRowIterator(const Schema *projection,
                                 const MvccSnapshot &snap,
+                                OrderMode order,
                                 gscoped_ptr<RowwiseIterator>* out) const OVERRIDE;
 
   virtual Status NewCompactionInput(const Schema* projection,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test-base.h b/src/kudu/tablet/tablet-test-base.h
index a00c62b..0a7e5c1 100644
--- a/src/kudu/tablet/tablet-test-base.h
+++ b/src/kudu/tablet/tablet-test-base.h
@@ -411,7 +411,7 @@ class TabletTestBase : public KuduTabletTest {
   }
 
   void VerifyTestRowsWithVerifier(int32_t first_row, uint64_t expected_count,
-                                  boost::optional<TestRowVerifier> verifier) {
+                                  const boost::optional<TestRowVerifier>& verifier)
{
     gscoped_ptr<RowwiseIterator> iter;
     ASSERT_OK(tablet()->NewRowIterator(client_schema_, &iter));
     VerifyTestRowsWithRowIteratorAndVerifier(first_row, expected_count, std::move(iter),
verifier);
@@ -419,16 +419,15 @@ class TabletTestBase : public KuduTabletTest {
 
   void VerifyTestRowsWithTimestampAndVerifier(int32_t first_row, uint64_t expected_count,
                                               Timestamp timestamp,
-                                              boost::optional<TestRowVerifier> verifier)
{
+                                              const boost::optional<TestRowVerifier>&
verifier) {
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet()->NewRowIterator(client_schema_, MvccSnapshot(timestamp), Tablet::UNORDERED,
-                                       &iter));
+    ASSERT_OK(tablet()->NewRowIterator(client_schema_, MvccSnapshot(timestamp), UNORDERED,
&iter));
     VerifyTestRowsWithRowIteratorAndVerifier(first_row, expected_count, std::move(iter),
verifier);
   }
 
   void VerifyTestRowsWithRowIteratorAndVerifier(int32_t first_row, uint64_t expected_row_count,
                                                 gscoped_ptr<RowwiseIterator> iter,
-                                                boost::optional<TestRowVerifier> verifier)
{
+                                                const boost::optional<TestRowVerifier>&
verifier) {
     ASSERT_OK(iter->Init(NULL));
     int batch_size = std::max<size_t>(1, std::min<size_t>(expected_row_count
/ 10,
                                                           4L * 1024 * 1024 / schema_.byte_size()));

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test-util.h b/src/kudu/tablet/tablet-test-util.h
index 977987f..4db2113 100644
--- a/src/kudu/tablet/tablet-test-util.h
+++ b/src/kudu/tablet/tablet-test-util.h
@@ -192,10 +192,7 @@ static inline void CollectRowsForSnapshots(Tablet* tablet,
   for (const MvccSnapshot& snapshot : snaps) {
     DVLOG(1) << "Snapshot: " <<  snapshot.ToString();
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet->NewRowIterator(schema,
-                                            snapshot,
-                                            Tablet::UNORDERED,
-                                            &iter));
+    ASSERT_OK(tablet->NewRowIterator(schema, snapshot, UNORDERED, &iter));
     ASSERT_OK(iter->Init(NULL));
     auto collector = new vector<string>();
     ASSERT_OK(IterateToStringList(iter.get(), collector));
@@ -219,7 +216,7 @@ static inline void VerifySnapshotsHaveSameResult(Tablet* tablet,
     gscoped_ptr<RowwiseIterator> iter;
     ASSERT_OK(tablet->NewRowIterator(schema,
                                             snapshot,
-                                            Tablet::UNORDERED,
+                                            UNORDERED,
                                             &iter));
     ASSERT_OK(iter->Init(NULL));
     vector<string> collector;
@@ -244,7 +241,7 @@ static inline Status DumpRowSet(const RowSet &rs,
                                 vector<string> *out,
                                 int limit = INT_MAX) {
   gscoped_ptr<RowwiseIterator> iter;
-  RETURN_NOT_OK(rs.NewRowIterator(&projection, snap, &iter));
+  RETURN_NOT_OK(rs.NewRowIterator(&projection, snap, UNORDERED, &iter));
   RETURN_NOT_OK(iter->Init(NULL));
   RETURN_NOT_OK(IterateToStringList(iter.get(), out, limit));
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test.cc b/src/kudu/tablet/tablet-test.cc
index 73ae522..90bc5ef 100644
--- a/src/kudu/tablet/tablet-test.cc
+++ b/src/kudu/tablet/tablet-test.cc
@@ -399,6 +399,22 @@ TYPED_TEST(TestTablet, TestRowIteratorSimple) {
   ASSERT_FALSE(iter->HasNext());
 }
 
+// Hook implementation which runs a lambda function during the 'duplicating'
+// phase of compaction.
+template<class HookFunc>
+class RunDuringDuplicatingRowSetPhase : public Tablet::FlushCompactCommonHooks {
+ public:
+  explicit RunDuringDuplicatingRowSetPhase(HookFunc hook)
+      : hook_(std::move(hook)) {}
+
+  Status PostSwapInDuplicatingRowSet() override {
+    hook_();
+    return Status::OK();
+  }
+ private:
+  const HookFunc hook_;
+};
+
 TYPED_TEST(TestTablet, TestRowIteratorOrdered) {
   // Create interleaved keys in each rowset, so they are clearly not in order
   const int kNumRows = 128;
@@ -415,43 +431,65 @@ TYPED_TEST(TestTablet, TestRowIteratorOrdered) {
     }
   }
 
-  MvccSnapshot snap(*this->tablet()->mvcc_manager());
-  // Iterate through with a few different block sizes.
-  for (int numBlocks = 1; numBlocks < 5; numBlocks*=2) {
-    const int rowsPerBlock = kNumRows / numBlocks;
-    // Make a new ordered iterator for the current snapshot.
-    gscoped_ptr<RowwiseIterator> iter;
-
-    ASSERT_OK(this->tablet()->NewRowIterator(this->client_schema_, snap, Tablet::ORDERED,
&iter));
-    ASSERT_OK(iter->Init(nullptr));
-
-    // Iterate the tablet collecting rows.
-    vector<shared_ptr<faststring> > rows;
-    for (int i = 0; i < numBlocks; i++) {
-      RowBlock block(this->schema_, rowsPerBlock, &this->arena_);
-      ASSERT_TRUE(iter->HasNext());
-      ASSERT_OK(iter->NextBlock(&block));
-      ASSERT_EQ(rowsPerBlock, block.nrows()) << "unexpected number of rows returned";
-      for (int j = 0; j < rowsPerBlock; j++) {
-        RowBlockRow row = block.row(j);
-        shared_ptr<faststring> encoded(new faststring());
-        this->client_schema_.EncodeComparableKey(row, encoded.get());
-        rows.push_back(encoded);
+  // We'll test ordered scans a few times, covering before, during, and
+  // after a compaction.
+  auto RunScans = [this, kNumRows]() {
+    MvccSnapshot snap(*this->tablet()->mvcc_manager());
+    // Iterate through with a few different block sizes.
+    for (int numBlocks = 1; numBlocks < 5; numBlocks*=2) {
+      const int rowsPerBlock = kNumRows / numBlocks;
+      // Make a new ordered iterator for the current snapshot.
+      gscoped_ptr<RowwiseIterator> iter;
+
+      ASSERT_OK(this->tablet()->NewRowIterator(this->client_schema_, snap, ORDERED,
&iter));
+      ASSERT_OK(iter->Init(nullptr));
+
+      // Iterate the tablet collecting rows.
+      vector<shared_ptr<faststring> > rows;
+      for (int i = 0; i < numBlocks; i++) {
+        RowBlock block(this->schema_, rowsPerBlock, &this->arena_);
+        ASSERT_TRUE(iter->HasNext());
+        ASSERT_OK(iter->NextBlock(&block));
+        ASSERT_EQ(rowsPerBlock, block.nrows()) << "unexpected number of rows returned";
+        for (int j = 0; j < rowsPerBlock; j++) {
+          RowBlockRow row = block.row(j);
+          shared_ptr<faststring> encoded(new faststring());
+          this->client_schema_.EncodeComparableKey(row, encoded.get());
+          rows.push_back(encoded);
+        }
       }
+      // Verify the collected rows, checking that they are sorted.
+      for (int j = 1; j < rows.size(); j++) {
+        // Use the schema for comparison, since this test is run with different schemas.
+        ASSERT_LT((*rows[j-1]).ToString(), (*rows[j]).ToString());
+      }
+      ASSERT_FALSE(iter->HasNext());
+      ASSERT_EQ(kNumRows, rows.size());
     }
-    // Verify the collected rows, checking that they are sorted.
-    for (int j = 1; j < rows.size(); j++) {
-      // Use the schema for comparison, since this test is run with different schemas.
-      ASSERT_LT((*rows[j-1]).ToString(), (*rows[j]).ToString());
-    }
-    ASSERT_FALSE(iter->HasNext());
-    ASSERT_EQ(kNumRows, rows.size());
+  };
+
+  {
+    SCOPED_TRACE("With no compaction");
+    NO_FATALS(RunScans());
+  }
+
+  {
+    SCOPED_TRACE("With duplicating rowset");
+    shared_ptr<Tablet::FlushCompactCommonHooks> hooks_shared(
+        new RunDuringDuplicatingRowSetPhase<decltype(RunScans)>(RunScans));
+    this->tablet()->SetFlushCompactCommonHooksForTests(hooks_shared);
+    ASSERT_OK(this->tablet()->Compact(Tablet::FORCE_COMPACT_ALL));
+    NO_FATALS();
   }
-}
 
+  {
+    SCOPED_TRACE("After compaction");
+    NO_FATALS(RunScans());
+  }
+}
 
 template<class SETUP>
-bool TestSetupExpectsNulls(int32_t key_idx) {
+bool TestSetupExpectsNulls(int32_t /*key_idx*/) {
   return false;
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 27d6be7..bcde9d0 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -274,7 +274,7 @@ Status Tablet::NewRowIterator(const Schema &projection,
                               gscoped_ptr<RowwiseIterator> *iter) const {
   // Yield current rows.
   MvccSnapshot snap(mvcc_);
-  return NewRowIterator(projection, snap, Tablet::UNORDERED, iter);
+  return NewRowIterator(projection, snap, UNORDERED, iter);
 }
 
 
@@ -1347,6 +1347,7 @@ Status Tablet::CaptureConsistentIterators(
   const Schema *projection,
   const MvccSnapshot &snap,
   const ScanSpec *spec,
+  OrderMode order,
   vector<shared_ptr<RowwiseIterator> > *iters) const {
   shared_lock<rw_spinlock> l(component_lock_);
 
@@ -1356,7 +1357,7 @@ Status Tablet::CaptureConsistentIterators(
 
   // Grab the memrowset iterator.
   gscoped_ptr<RowwiseIterator> ms_iter;
-  RETURN_NOT_OK(components_->memrowset->NewRowIterator(projection, snap, &ms_iter));
+  RETURN_NOT_OK(components_->memrowset->NewRowIterator(projection, snap, order, &ms_iter));
   ret.push_back(shared_ptr<RowwiseIterator>(ms_iter.release()));
 
   // Cull row-sets in the case of key-range queries.
@@ -1372,7 +1373,7 @@ Status Tablet::CaptureConsistentIterators(
         &interval_sets);
     for (const RowSet *rs : interval_sets) {
       gscoped_ptr<RowwiseIterator> row_it;
-      RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, &row_it),
+      RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, order, &row_it),
                             Substitute("Could not create iterator for rowset $0",
                                        rs->ToString()));
       ret.push_back(shared_ptr<RowwiseIterator>(row_it.release()));
@@ -1385,7 +1386,7 @@ Status Tablet::CaptureConsistentIterators(
   // fall back to grabbing all rowset iterators
   for (const shared_ptr<RowSet> &rs : components_->rowsets->all_rowsets())
{
     gscoped_ptr<RowwiseIterator> row_it;
-    RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, &row_it),
+    RETURN_NOT_OK_PREPEND(rs->NewRowIterator(projection, snap, order, &row_it),
                           Substitute("Could not create iterator for rowset $0",
                                      rs->ToString()));
     ret.push_back(shared_ptr<RowwiseIterator>(row_it.release()));
@@ -1682,7 +1683,7 @@ Status Tablet::Iterator::Init(ScanSpec *spec) {
 
   vector<shared_ptr<RowwiseIterator>> iters;
 
-  RETURN_NOT_OK(tablet_->CaptureConsistentIterators(&projection_, snap_, spec, &iters));
+  RETURN_NOT_OK(tablet_->CaptureConsistentIterators(&projection_, snap_, spec, order_,
&iters));
 
   switch (order_) {
     case ORDERED:

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index 77bc9fe..2986345 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -192,12 +192,6 @@ class Tablet {
   Status NewRowIterator(const Schema &projection,
                         gscoped_ptr<RowwiseIterator> *iter) const;
 
-  // Whether the iterator should return results in order.
-  enum OrderMode {
-    UNORDERED = 0,
-    ORDERED = 1
-  };
-
   // Create a new row iterator for some historical snapshot.
   Status NewRowIterator(const Schema &projection,
                         const MvccSnapshot &snap,
@@ -437,6 +431,7 @@ class Tablet {
   Status CaptureConsistentIterators(const Schema *projection,
                                     const MvccSnapshot &snap,
                                     const ScanSpec *spec,
+                                    OrderMode order,
                                     vector<std::shared_ptr<RowwiseIterator> >
*iters) const;
 
   Status PickRowSetsToCompact(RowSetsInCompaction *picked,

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tablet/tablet_bootstrap-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc
index 86b688b..d7468ef 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -144,7 +144,7 @@ class BootstrapTest : public LogTestBase {
     // see the bootstrapped operation. This is likely due to KUDU-138 -- perhaps
     // we aren't properly setting up the clock after bootstrap.
     MvccSnapshot snap = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
-    ASSERT_OK(tablet->NewRowIterator(schema_, snap, Tablet::UNORDERED, &iter));
+    ASSERT_OK(tablet->NewRowIterator(schema_, snap, UNORDERED, &iter));
     ASSERT_OK(iter->Init(nullptr));
     ASSERT_OK(IterateToStringList(iter.get(), results));
     for (const string& result : *results) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/f65924de/src/kudu/tserver/tablet_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 740908b..592e8d1 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -1791,13 +1791,10 @@ Status TabletServiceImpl::HandleScanAtSnapshot(const NewScanRequestPB&
scan_pb,
   tablet->metrics()->snapshot_read_inflight_wait_duration->Increment(duration_usec);
   TRACE("All operations in snapshot committed. Waited for $0 microseconds", duration_usec);
 
-  tablet::Tablet::OrderMode order;
-  switch (scan_pb.order_mode()) {
-    case UNORDERED: order = tablet::Tablet::UNORDERED; break;
-    case ORDERED: order = tablet::Tablet::ORDERED; break;
-    default: LOG(FATAL) << "Unexpected order mode.";
+  if (scan_pb.order_mode() == UNKNOWN_ORDER_MODE) {
+    return Status::InvalidArgument("Unknown order mode specified");
   }
-  RETURN_NOT_OK(tablet->NewRowIterator(projection, snap, order, iter));
+  RETURN_NOT_OK(tablet->NewRowIterator(projection, snap, scan_pb.order_mode(), iter));
   *snap_timestamp = tmp_snap_timestamp;
   return Status::OK();
 }


Mime
View raw message