kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject [3/3] kudu git commit: KUDU-2251: rowset size can overflow int in RowSetInfo
Date Thu, 11 Jan 2018 22:16:25 GMT
KUDU-2251: rowset size can overflow int in RowSetInfo

This overflow causes a CHECK failure from rowset compaction planning in
tablets with rowsets with more than 2GiB of REDO deltafiles:

*** SIGABRT (@0x3ce00007614) received by PID 30228 (TID 0x7fbb52a5e700) from PID 30228; stack
trace: ***
    @     0x7fbb977cb100 (unknown)
    @     0x7fbb95a985f7 __GI_raise
    @     0x7fbb95a99ce8 __GI_abort
    @          0x1af56d9 (unknown)
    @           0x8baf3d google::LogMessage::Fail()
    @           0x8bce93 google::LogMessage::SendToLog()
    @           0x8baa99 google::LogMessage::Flush()
    @           0x8bd81f google::LogMessageFatal::~LogMessageFatal()
    @           0x9f71d6 kudu::tablet::RowSetInfo::CollectOrdered()
    @           0x9d42d9 kudu::tablet::BudgetedCompactionPolicy::SetupKnapsackInput()
    @           0x9d5a3a kudu::tablet::BudgetedCompactionPolicy::PickRowSets()
    @           0x98e28f kudu::tablet::Tablet::UpdateCompactionStats()
    @           0x9aff08 kudu::tablet::CompactRowSetsOp::UpdateStats()
    @          0x1ae02b5 kudu::MaintenanceManager::FindBestOp()
    @          0x1ae2bce kudu::MaintenanceManager::RunSchedulerThread()
    @          0x1b27eda kudu::Thread::SuperviseThread()
    @     0x7fbb977c3dc5 start_thread
    @     0x7fbb95b5921d __clone
    @                0x0 (unknown)

Testing: included is a targeted unit-test which reproduces the overflow
quickly and deterministically. I also reproduced the issue using an
integration test, however that test exposed other issues which need to
be addressed before it can land (KUDU-2253). I'll be working on that in
a follow-up commit.

Change-Id: I74975cdab605b51617d93d1ae98ef72ce87e35cb
Reviewed-on: http://gerrit.cloudera.org:8080/8951
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <davidralves@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/56107ac8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/56107ac8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/56107ac8

Branch: refs/heads/master
Commit: 56107ac806a171db33f09c6bdce7909bb8c9bd4b
Parents: 236fc67
Author: Dan Burkert <danburkert@apache.org>
Authored: Thu Jan 4 10:14:55 2018 -0800
Committer: Dan Burkert <danburkert@apache.org>
Committed: Thu Jan 11 20:59:43 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/compaction_policy-test.cc | 17 +++++++++++++++++
 src/kudu/tablet/mock-rowsets.h            |  2 +-
 src/kudu/tablet/rowset_info.h             | 11 ++++++-----
 3 files changed, 24 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/56107ac8/src/kudu/tablet/compaction_policy-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy-test.cc b/src/kudu/tablet/compaction_policy-test.cc
index 9b19230..fe548dd 100644
--- a/src/kudu/tablet/compaction_policy-test.cc
+++ b/src/kudu/tablet/compaction_policy-test.cc
@@ -200,5 +200,22 @@ TEST_F(TestCompactionPolicy, TestYcsbCompaction) {
       << qualities;
 }
 
+// Regression test for KUDU-2251 which ensures that large (> 2GiB) rowsets don't
+// cause integer overflow in compaction planning.
+TEST_F(TestCompactionPolicy, KUDU2251) {
+  RowSetVector vec = {
+    std::make_shared<MockDiskRowSet>("C", "c", 1L << 31),
+    std::make_shared<MockDiskRowSet>("B", "a", 1L << 32),
+    std::make_shared<MockDiskRowSet>("A", "b", 1L << 33)
+  };
+
+  const int kBudgetMb = 1L << 30; // enough to select all
+  unordered_set<RowSet*> picked;
+  double quality = 0;
+  NO_FATALS(RunTestCase(vec, kBudgetMb, &picked, &quality));
+  ASSERT_EQ(3, picked.size());
+  ASSERT_GE(quality, 1.0);
+}
+
 } // namespace tablet
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/56107ac8/src/kudu/tablet/mock-rowsets.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/mock-rowsets.h b/src/kudu/tablet/mock-rowsets.h
index f488302..51763d6 100644
--- a/src/kudu/tablet/mock-rowsets.h
+++ b/src/kudu/tablet/mock-rowsets.h
@@ -164,7 +164,7 @@ class MockRowSet : public RowSet {
 class MockDiskRowSet : public MockRowSet {
  public:
   MockDiskRowSet(std::string first_key, std::string last_key,
-                 int size = 1000000)
+                 uint64_t size = 1000000)
       : first_key_(std::move(first_key)),
         last_key_(std::move(last_key)),
         size_(size) {}

http://git-wip-us.apache.org/repos/asf/kudu/blob/56107ac8/src/kudu/tablet/rowset_info.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index 6cc5588..03fb9b7 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -17,11 +17,12 @@
 #ifndef KUDU_TABLET_ROWSET_INFO_H_
 #define KUDU_TABLET_ROWSET_INFO_H_
 
-#include "kudu/gutil/ref_counted.h"
-
+#include <cstdint>
 #include <string>
 #include <vector>
 
+#include "kudu/gutil/ref_counted.h"
+
 namespace kudu {
 namespace tablet {
 
@@ -43,7 +44,7 @@ class RowSetInfo {
                              std::vector<RowSetInfo>* min_key,
                              std::vector<RowSetInfo>* max_key);
 
-  int size_bytes() const { return extra_->size_bytes; }
+  uint64_t size_bytes() const { return extra_->size_bytes; }
   int size_mb() const { return size_mb_; }
 
   // Return the value of the CDF at the minimum key of this candidate.
@@ -97,8 +98,8 @@ class RowSetInfo {
   //
   // These are ref-counted so that RowSetInfo is copyable.
   struct ExtraData : public RefCounted<ExtraData> {
-    // Cached version of rowset_->OnDiskBaseDataSize().
-    int size_bytes;
+    // Cached version of rowset_->OnDiskBaseDataSizeWithRedos().
+    uint64_t size_bytes;
 
     // True if the RowSet has known bounds.
     // MemRowSets in particular do not.


Mime
View raw message