kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject kudu git commit: KUDU-2251: rowset size can overflow int in RowSetInfo
Date Thu, 11 Jan 2018 22:16:07 GMT
Repository: kudu
Updated Branches:
  refs/heads/branch-1.5.x 0d2a6da33 -> bf6b982fc


KUDU-2251: rowset size can overflow int in RowSetInfo

This overflow causes a CHECK failure from rowset compaction planning in
tablets with rowsets with more than 2GiB of REDO deltafiles:

*** SIGABRT (@0x3ce00007614) received by PID 30228 (TID 0x7fbb52a5e700) from PID 30228; stack
trace: ***
    @     0x7fbb977cb100 (unknown)
    @     0x7fbb95a985f7 __GI_raise
    @     0x7fbb95a99ce8 __GI_abort
    @          0x1af56d9 (unknown)
    @           0x8baf3d google::LogMessage::Fail()
    @           0x8bce93 google::LogMessage::SendToLog()
    @           0x8baa99 google::LogMessage::Flush()
    @           0x8bd81f google::LogMessageFatal::~LogMessageFatal()
    @           0x9f71d6 kudu::tablet::RowSetInfo::CollectOrdered()
    @           0x9d42d9 kudu::tablet::BudgetedCompactionPolicy::SetupKnapsackInput()
    @           0x9d5a3a kudu::tablet::BudgetedCompactionPolicy::PickRowSets()
    @           0x98e28f kudu::tablet::Tablet::UpdateCompactionStats()
    @           0x9aff08 kudu::tablet::CompactRowSetsOp::UpdateStats()
    @          0x1ae02b5 kudu::MaintenanceManager::FindBestOp()
    @          0x1ae2bce kudu::MaintenanceManager::RunSchedulerThread()
    @          0x1b27eda kudu::Thread::SuperviseThread()
    @     0x7fbb977c3dc5 start_thread
    @     0x7fbb95b5921d __clone
    @                0x0 (unknown)

Testing: included is a targeted unit-test which reproduces the overflow
quickly and deterministically. I also reproduced the issue using an
integration test, however that test exposed other issues which need to
be addressed before it can land (KUDU-2253). I'll be working on that in
a follow-up commit.

Change-Id: I598344e22fc1ecbb482bfe85ea3867ddf63963b4
Reviewed-on: http://gerrit.cloudera.org:8080/8941
Tested-by: Kudu Jenkins
Reviewed-by: Jean-Daniel Cryans <jdcryans@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/bf6b982f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/bf6b982f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/bf6b982f

Branch: refs/heads/branch-1.5.x
Commit: bf6b982fc28d2377c8fbe54cd938f58b6156b17b
Parents: 0d2a6da
Author: Dan Burkert <danburkert@apache.org>
Authored: Thu Jan 4 10:14:55 2018 -0800
Committer: Jean-Daniel Cryans <jdcryans@apache.org>
Committed: Thu Jan 11 22:15:40 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/compaction_policy-test.cc | 22 ++++++++++++++++++++++
 src/kudu/tablet/mock-rowsets.h            |  2 +-
 src/kudu/tablet/rowset_info.h             |  9 ++++++---
 3 files changed, 29 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/bf6b982f/src/kudu/tablet/compaction_policy-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy-test.cc b/src/kudu/tablet/compaction_policy-test.cc
index 587d2f4..99e8c09 100644
--- a/src/kudu/tablet/compaction_policy-test.cc
+++ b/src/kudu/tablet/compaction_policy-test.cc
@@ -127,5 +127,27 @@ TEST(TestCompactionPolicy, TestYcsbCompaction) {
       << qualities;
 }
 
+// Regression test for KUDU-2251 which ensures that large (> 2GiB) rowsets don't
+// cause integer overflow in compaction planning.
+TEST(TestCompactionPolicy, KUDU2251) {
+  RowSetVector vec = {
+    std::make_shared<MockDiskRowSet>("C", "c", 1L << 31),
+    std::make_shared<MockDiskRowSet>("B", "a", 1L << 32),
+    std::make_shared<MockDiskRowSet>("A", "b", 1L << 33)
+  };
+
+  RowSetTree tree;
+  ASSERT_OK(tree.Reset(vec));
+
+  const int kBudgetMb = 1L << 30; // enough to select all
+  BudgetedCompactionPolicy policy(kBudgetMb);
+
+  unordered_set<RowSet*> picked;
+  double quality = 0;
+  ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, nullptr));
+  ASSERT_EQ(3, picked.size());
+  ASSERT_GE(quality, 1.0);
+}
+
 } // namespace tablet
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/bf6b982f/src/kudu/tablet/mock-rowsets.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/mock-rowsets.h b/src/kudu/tablet/mock-rowsets.h
index 62deb55..c55bff0 100644
--- a/src/kudu/tablet/mock-rowsets.h
+++ b/src/kudu/tablet/mock-rowsets.h
@@ -153,7 +153,7 @@ class MockRowSet : public RowSet {
 class MockDiskRowSet : public MockRowSet {
  public:
   MockDiskRowSet(std::string first_key, std::string last_key,
-                 int size = 1000000)
+                 uint64_t size = 1000000)
       : first_key_(std::move(first_key)),
         last_key_(std::move(last_key)),
         size_(size) {}

http://git-wip-us.apache.org/repos/asf/kudu/blob/bf6b982f/src/kudu/tablet/rowset_info.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index 1ece56f..619e054 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -17,9 +17,12 @@
 #ifndef KUDU_TABLET_ROWSET_INFO_H_
 #define KUDU_TABLET_ROWSET_INFO_H_
 
+#include <cstdint>
 #include <string>
 #include <vector>
 
+#include "kudu/gutil/ref_counted.h"
+
 namespace kudu {
 namespace tablet {
 
@@ -41,7 +44,7 @@ class RowSetInfo {
                              std::vector<RowSetInfo>* min_key,
                              std::vector<RowSetInfo>* max_key);
 
-  int size_bytes() const { return size_bytes_; }
+  uint64_t size_bytes() const { return size_bytes_; }
   int size_mb() const { return size_mb_; }
 
   // Return the value of the CDF at the minimum key of this candidate.
@@ -83,8 +86,8 @@ class RowSetInfo {
 
   RowSet* const rowset_;
 
-  // Cached version of rowset_->OnDiskDataSize().
-  const int size_bytes_;
+  // Cached version of rowset_->OnDiskDataSizeNoUndos().
+  const uint64_t size_bytes_;
 
   // The size in MB, already clamped so that all rowsets have size at least
   // 1MB. This is cached to avoid the branch during the selection hot path.


Mime
View raw message