kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [kudu] branch master updated: KUDU-2807 Crash when flush or compaction overlaps with another compaction
Date Mon, 13 May 2019 19:31:57 GMT
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new f6f8bbf  KUDU-2807 Crash when flush or compaction overlaps with another compaction
f6f8bbf is described below

commit f6f8bbf35aa33e668f9cc5ce9b1e80d202a7f736
Author: Will Berkeley <wdberkeley@gmail.com>
AuthorDate: Tue May 7 09:31:49 2019 -0700

    KUDU-2807 Crash when flush or compaction overlaps with another compaction
    
    Commit d3684a7b2add8f06b7189adb9ce9222b8ae1eff5 introduced a metric for
    average rowset height. Computing this requires examining the rowsets in
    the rowset tree and briefly taking each one's `compact_flush_lock_`.
    However, any time a thread takes the `compact_flush_lock_` of a rowset,
    it must hold the `compact_select_lock_` of the tablet that rowset
    belongs to. This was not happening in two of the three places where the
    average height is computed:
    
    1. When opening the tablet.
    2. When updating the rowset tree during a flush or compaction.
    
    The first case is benign (as far as I know). The second case could cause
    a crash like
    
    F0429 07:26:56.918041 34043 tablet.cc:2268] Check failed: lock.owns_lock() RowSet(24130)
unable to lock compact_flush_lock
    
    MM ops enforced the invariant above by try-locking the
    `compact_flush_lock_` and checking that they obtained the lock, while
    holding the `compact_select_lock_`. So, if a MM op try-locked a rowset
    at the same time as another MM op was holding its `compact_flush_lock_`,
    the above crash would result.
    
    This patch fixes the crash by ensuring that the `compact_select_lock_`
    is held whenever `ComputeCdfAndCheckOrdered`, which computes the average
    rowset height, is called. I also made a small modification to the scope
    of a `component_lock_` to avoid having to define a lock order for
    `component_lock_` and `compact_select_lock_`.
    
    Change-Id: Ic255f0466aa2c158fa32e8e38428eddfcf901b99
    Reviewed-on: http://gerrit.cloudera.org:8080/13264
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Tested-by: Will Berkeley <wdberkeley@gmail.com>
---
 src/kudu/tablet/rowset_info.h |  2 ++
 src/kudu/tablet/tablet.cc     | 57 +++++++++++++++++++++++--------------------
 src/kudu/tablet/tablet.h      |  5 +++-
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index af2d07d..f133e3e 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -51,6 +51,8 @@ class RowSetInfo {
   // rowset tree is set into 'average_height', if it is not nullptr.
   // If one of 'info_by_min_key' and 'info_by_max_key' is nullptr, the other
   // must be.
+  // Requires holding the compact_select_lock_ for the tablet that the
+  // rowsets in 'tree' references.
   static void ComputeCdfAndCollectOrdered(const RowSetTree& tree,
                                           double* average_height,
                                           std::vector<RowSetInfo>* info_by_min_key,
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 7cb1053..daee9a5 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -266,7 +266,6 @@ Status Tablet::Open() {
   TRACE_EVENT0("tablet", "Tablet::Open");
   RETURN_IF_STOPPED_OR_CHECK_STATE(kInitialized);
 
-  std::lock_guard<rw_spinlock> lock(component_lock_);
   CHECK(schema()->has_column_ids());
 
   next_mrs_id_ = metadata_->last_durable_mrs_id() + 1;
@@ -293,15 +292,6 @@ Status Tablet::Open() {
 
   shared_ptr<RowSetTree> new_rowset_tree(new RowSetTree());
   CHECK_OK(new_rowset_tree->Reset(rowsets_opened));
-  if (metrics_) {
-    // Compute the initial average height of the rowset tree.
-    double avg_height;
-    RowSetInfo::ComputeCdfAndCollectOrdered(*new_rowset_tree,
-                                            &avg_height,
-                                            nullptr,
-                                            nullptr);
-    metrics_->average_diskrowset_height->set_value(avg_height);
-  }
 
   // Now that the current state is loaded, create the new MemRowSet with the next id.
   shared_ptr<MemRowSet> new_mrs;
@@ -309,7 +299,13 @@ Status Tablet::Open() {
                                   log_anchor_registry_.get(),
                                   mem_trackers_.tablet_tracker,
                                   &new_mrs));
-  components_ = new TabletComponents(new_mrs, new_rowset_tree);
+  {
+    std::lock_guard<rw_spinlock> lock(component_lock_);
+    components_ = new TabletComponents(new_mrs, new_rowset_tree);
+  }
+
+  // Compute the initial average rowset height.
+  UpdateAverageRowsetHeight();
 
   {
     std::lock_guard<simple_spinlock> l(state_lock_);
@@ -1055,10 +1051,10 @@ void Tablet::ModifyRowSetTree(const RowSetTree& old_tree,
   CHECK_OK(new_tree->Reset(post_swap));
 }
 
-void Tablet::AtomicSwapRowSets(const RowSetVector &old_rowsets,
-                               const RowSetVector &new_rowsets) {
+void Tablet::AtomicSwapRowSets(const RowSetVector &to_remove,
+                               const RowSetVector &to_add) {
   std::lock_guard<rw_spinlock> lock(component_lock_);
-  AtomicSwapRowSetsUnlocked(old_rowsets, new_rowsets);
+  AtomicSwapRowSetsUnlocked(to_remove, to_add);
 }
 
 void Tablet::AtomicSwapRowSetsUnlocked(const RowSetVector &to_remove,
@@ -1070,19 +1066,6 @@ void Tablet::AtomicSwapRowSetsUnlocked(const RowSetVector &to_remove,
                    to_remove, to_add, new_tree.get());
 
   components_ = new TabletComponents(components_->memrowset, new_tree);
-
-  // Recompute the average rowset height.
-  // TODO(wdberkeley): We should be able to cache the computation of the CDF
-  // and average height and efficiently recompute it instead of doing it from
-  // scratch.
-  if (metrics_) {
-    double avg_height;
-    RowSetInfo::ComputeCdfAndCollectOrdered(*new_tree,
-                                            &avg_height,
-                                            nullptr,
-                                            nullptr);
-    metrics_->average_diskrowset_height->set_value(avg_height);
-  }
 }
 
 Status Tablet::DoMajorDeltaCompaction(const vector<ColumnId>& col_ids,
@@ -1725,6 +1708,7 @@ Status Tablet::DoMergeCompactionOrFlush(const RowSetsInCompaction &input,
   // Replace the compacted rowsets with the new on-disk rowsets, making them visible now
that
   // their metadata was written to disk.
   AtomicSwapRowSets({ inprogress_rowset }, new_disk_rowsets);
+  UpdateAverageRowsetHeight();
 
   const auto rows_written = drsw.rows_written_count();
   const auto drs_written = drsw.drs_written_count();
@@ -1755,9 +1739,28 @@ Status Tablet::HandleEmptyCompactionOrFlush(const RowSetVector&
rowsets,
                         "Failed to flush new tablet metadata");
 
   AtomicSwapRowSets(rowsets, RowSetVector());
+  UpdateAverageRowsetHeight();
   return Status::OK();
 }
 
+void Tablet::UpdateAverageRowsetHeight() {
+  if (!metrics_) {
+    return;
+  }
+  // TODO(wdberkeley): We should be able to cache the computation of the CDF
+  // and average height and efficiently recompute it instead of doing it from
+  // scratch.
+  scoped_refptr<TabletComponents> comps;
+  GetComponents(&comps);
+  std::lock_guard<std::mutex> l(compact_select_lock_);
+  double avg_height;
+  RowSetInfo::ComputeCdfAndCollectOrdered(*comps->rowsets,
+                                          &avg_height,
+                                          nullptr,
+                                          nullptr);
+  metrics_->average_diskrowset_height->set_value(avg_height);
+}
+
 Status Tablet::Compact(CompactFlags flags) {
   RETURN_IF_STOPPED_OR_CHECK_STATE(kOpen);
 
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index 87448f5..05c7c9b 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -347,7 +347,6 @@ class Tablet {
   // memrowset in the current implementation.
   Status CountRows(uint64_t *count) const;
 
-
   // Verbosely dump this entire tablet to the logs. This is only
   // really useful when debugging unit tests failures where the tablet
   // has a very small number of rows.
@@ -599,6 +598,10 @@ class Tablet {
   Status HandleEmptyCompactionOrFlush(const RowSetVector& rowsets,
                                       int mrs_being_flushed);
 
+  // Updates the average rowset height metric. Acquires the tablet's
+  // compact_select_lock_.
+  void UpdateAverageRowsetHeight();
+
   Status FlushMetadata(const RowSetVector& to_remove,
                        const RowSetMetadataVector& to_add,
                        int64_t mrs_being_flushed);


Mime
View raw message