kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aw...@apache.org
Subject [kudu] 02/03: tablet: detach metrics first in destructor
Date Tue, 07 Jan 2020 17:49:29 GMT
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 790c66f9302ac645aafe81b394538b1e5e07e724
Author: Andrew Wong <awong@cloudera.com>
AuthorDate: Mon Jan 6 17:49:01 2020 -0800

    tablet: detach metrics first in destructor
    
    Previously the FunctionGaugeDetacher in a Tablet could be destructed
    after some of the state in the Tablet had already been destructed.
    
    Specifically, the metrics would be detached after destructing
    'last_rw_time_lock_' et al. This led to some TSAN warnings[1].
    
    Without this patch, RaftConsensusNumLeadersMetricTest
    TestNumLeadersMetric, which deletes tablets and then immediately fetches
    metrics, would fail 29/1000 times in TSAN mode. With it, it only failed
    7/1000, those due to an unrelated timeout (not addressed in this patch).
    
    I went ahead and updated other misordered instances of
    FunctionGaugeDetachers. As far as I can tell, there aren't any other
    affected tests.
    
    [1]:
    ==================
    WARNING: ThreadSanitizer: data race (pid=30286)
      Write of size 1 at 0x7b58000521c8 by thread T156 (mutexes: write M3961, write M2995):
        #0 AnnotateRWLockDestroy /data/8/awong/kudu/thirdparty/src/llvm-9.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cc:264
(kudu-tserver+0x4ab83e)
        #1 kudu::rw_spinlock::~rw_spinlock() ../src/kudu/util/locks.h:89:5 (libtserver.so+0x262d4b)
        #2 kudu::tablet::Tablet::~Tablet() ../src/kudu/tablet/tablet.cc:270:1 (libtablet.so+0x174886)
        #3 std::__1::default_delete<kudu::tablet::Tablet>::operator()(kudu::tablet::Tablet*)
const /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:2338:5 (libtablet.so+0x233026)
        #4 std::__1::__shared_ptr_pointer<kudu::tablet::Tablet*, std::__1::default_delete<kudu::tablet::Tablet>,
std::__1::allocator<kudu::tablet::Tablet> >::__on_zero_shared() /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:3511:5
(libtablet.so+0x2343bf)
        #5 std::__1::__shared_count::__release_shared() /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:3415:9
(libtserver.so+0x125c9c)
        #6 std::__1::__shared_weak_count::__release_shared() /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:3457:27
(libtserver.so+0x125c12)
        #7 std::__1::shared_ptr<kudu::tablet::Tablet>::~shared_ptr() /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:4393:19
(libtserver.so+0x1478b7)
        #8 std::__1::shared_ptr<kudu::tablet::Tablet>::reset() /data/8/awong/kudu/thirdparty/installed/tsan/include/c++/v1/memory:4528:5
(libtablet.so+0x2622c6)
        #9 kudu::tablet::TabletReplica::Stop() ../src/kudu/tablet/tablet_replica.cc:318:13
(libtablet.so+0x2578bd)
        #10 kudu::tserver::TSTabletManager::DeleteTablet(std::__1::basic_string<char, std::__1::char_traits<char>,
std::__1::allocator<char> > const&, kudu::tablet::TabletDataState, boost::optional<long>
const&, kudu::tserver::TabletServerErrorPB_Code*) ../src/kudu/tserver/ts_tablet_manager.cc:972:12
(libtserver.so+0x25a568)
        #11 kudu::tserver::DeleteTabletRunnable::Run() ../src/kudu/tserver/ts_tablet_manager.cc:882:36
(libtserver.so+0x27caa8)
        #12 kudu::ThreadPool::DispatchThread() ../src/kudu/util/threadpool.cc:685:22 (libkudu_util.so+0x40e2f6)
        #13 boost::_mfi::mf0<void, kudu::ThreadPool>::operator()(kudu::ThreadPool*)
const ../thirdparty/installed/tsan/include/boost/bind/mem_fn_template.hpp:49:29 (libkudu_util.so+0x4357bc)
        #14 void boost::_bi::list1<boost::_bi::value<kudu::ThreadPool*> >::operator()<boost::_mfi::mf0<void,
kudu::ThreadPool>, boost::_bi::list0>(boost::_bi::type<void>, boost::_mfi::mf0<void,
kudu::ThreadPool>&, boost::_bi::list0&, int) ../thirdparty/installed/tsan/include/boost/bind/bind.hpp:259:9
(libkudu_util.so+0x43566d)
        #15 boost::_bi::bind_t<void, boost::_mfi::mf0<void, kudu::ThreadPool>, boost::_bi::list1<boost::_bi::value<kudu::ThreadPool*>
> >::operator()() ../thirdparty/installed/tsan/include/boost/bind/bind.hpp:1222:16 (libkudu_util.so+0x4355b1)
        #16 boost::detail::function::void_function_obj_invoker0<boost::_bi::bind_t<void,
boost::_mfi::mf0<void, kudu::ThreadPool>, boost::_bi::list1<boost::_bi::value<kudu::ThreadPool*>
> >, void>::invoke(boost::detail::function::function_buffer&) ../thirdparty/installed/tsan/include/boost/function/function_template.hpp:159:11
(libkudu_util.so+0x4351e0)
        #17 boost::function0<void>::operator()() const ../thirdparty/installed/tsan/include/boost/function/function_template.hpp:770:14
(libkrpc.so+0x13b9c1)
        #18 kudu::Thread::SuperviseThread(void*) ../src/kudu/util/thread.cc:675:3 (libkudu_util.so+0x3eed69)
    
      Previous atomic write of size 4 at 0x7b58000521c8 by thread T12 (mutexes: write M261907054171829296):
        #0 __tsan_atomic32_compare_exchange_strong /data/8/awong/kudu/thirdparty/src/llvm-9.0.0.src/projects/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cc:780
(kudu-tserver+0x4b475e)
        #1 base::subtle::Release_CompareAndSwap(int volatile*, int, int) ../src/kudu/gutil/atomicops-internals-tsan.h:93:3
(libtablet.so+0x1d4842)
        #2 kudu::rw_semaphore::unlock_shared() ../src/kudu/util/rw_semaphore.h:91:19 (libtablet.so+0x1d4766)
        #3 kudu::rw_spinlock::unlock_shared() ../src/kudu/util/locks.h:99:10 (libtablet.so+0x1d45da)
        #4 kudu::shared_lock<kudu::rw_spinlock>::~shared_lock() ../src/kudu/util/locks.h:283:11
(libtablet.so+0x1a3bf5)
        #5 kudu::tablet::Tablet::LastReadElapsedSeconds() const ../src/kudu/tablet/tablet.cc:1989:1
(libtablet.so+0x17457c)
        #6 kudu::internal::RunnableAdapter<unsigned long (kudu::tablet::Tablet::*)() const>::Run(kudu::tablet::Tablet
const*) ../src/kudu/gutil/bind_internal.h:155:12 (libtablet.so+0x1e5c1c)
        #7 kudu::internal::InvokeHelper<false, unsigned long, kudu::internal::RunnableAdapter<unsigned
long (kudu::tablet::Tablet::*)() const>, void (kudu::tablet::Tablet*)>::MakeItSo(kudu::internal::RunnableAdapter<unsigned
long (kudu::tablet::Tablet::*)() const>, kudu::tablet::Tablet*) ../src/kudu/gutil/bind_internal.h:865:21
(libtablet.so+0x1e5a8c)
        #8 kudu::internal::Invoker<1, kudu::internal::BindState<kudu::internal::RunnableAdapter<unsigned
long (kudu::tablet::Tablet::*)() const>, unsigned long (kudu::tablet::Tablet const*), void
(kudu::internal::UnretainedWrapper<kudu::tablet::Tablet>)>, unsigned long (kudu::tablet::Tablet
const*)>::Run(kudu::internal::BindStateBase*) ../src/kudu/gutil/bind_internal.h:1065:12
(libtablet.so+0x1e59a7)
        #9 kudu::Callback<unsigned long ()>::Run() const ../src/kudu/gutil/callback.h:396:12
(libtserver.so+0x15acfb)
        #10 kudu::FunctionGauge<unsigned long>::value() const ../src/kudu/util/metrics.h:1239:22
(libtserver.so+0x15ab1f)
        #11 kudu::FunctionGauge<unsigned long>::WriteValue(kudu::JsonWriter*) const
../src/kudu/util/metrics.h:1243:19 (libtserver.so+0x15a7bc)
        #12 kudu::Gauge::WriteAsJson(kudu::JsonWriter*, kudu::MetricJsonOptions const&)
const ../src/kudu/util/metrics.cc:716:3 (libkudu_util.so+0x31da17)
        #13 void kudu::WriteMetricsToJson<std::__1::unordered_map<kudu::MetricPrototype
const*, scoped_refptr<kudu::Metric>, std::__1::hash<kudu::MetricPrototype const*>,
std::__1::equal_to<kudu::MetricPrototype const*>, std::__1::allocator<std::__1::pair<kudu::MetricPrototype
const* const, scoped_refptr<kudu::Metric> > > > >(kudu::JsonWriter*, std::__1::unordered_map<kudu::MetricPrototype
const*, scoped_refptr<kudu::Metric>, std::__1::hash<kudu::MetricPrototype const*>,
std::__1::equal_t [...]
        #14 kudu::MetricEntity::WriteAsJson(kudu::JsonWriter*, kudu::MetricJsonOptions const&)
const ../src/kudu/util/metrics.cc:388:3 (libkudu_util.so+0x31a9f4)
        #15 kudu::MetricRegistry::WriteAsJson(kudu::JsonWriter*, kudu::MetricJsonOptions const&)
const ../src/kudu/util/metrics.cc:517:7 (libkudu_util.so+0x31c34a)
        #16 kudu::server::DiagnosticsLog::LogMetrics() ../src/kudu/server/diagnostics_log.cc:345:3
(libserver_process.so+0xac51a)
        #17 kudu::server::DiagnosticsLog::RunThread() ../src/kudu/server/diagnostics_log.cc:225:7
(libserver_process.so+0xabc50)
        #18 boost::_mfi::mf0<void, kudu::server::DiagnosticsLog>::operator()(kudu::server::DiagnosticsLog*)
const ../thirdparty/installed/tsan/include/boost/bind/mem_fn_template.hpp:49:29 (libserver_process.so+0xb88ac)
        #19 void boost::_bi::list1<boost::_bi::value<kudu::server::DiagnosticsLog*>
>::operator()<boost::_mfi::mf0<void, kudu::server::DiagnosticsLog>, boost::_bi::list0>(boost::_bi::type<void>,
boost::_mfi::mf0<void, kudu::server::DiagnosticsLog>&, boost::_bi::list0&, int)
../thirdparty/installed/tsan/include/boost/bind/bind.hpp:259:9 (libserver_process.so+0xb875d)
        #20 boost::_bi::bind_t<void, boost::_mfi::mf0<void, kudu::server::DiagnosticsLog>,
boost::_bi::list1<boost::_bi::value<kudu::server::DiagnosticsLog*> > >::operator()()
../thirdparty/installed/tsan/include/boost/bind/bind.hpp:1222:16 (libserver_process.so+0xb8671)
        #21 boost::detail::function::void_function_obj_invoker0<boost::_bi::bind_t<void,
boost::_mfi::mf0<void, kudu::server::DiagnosticsLog>, boost::_bi::list1<boost::_bi::value<kudu::server::DiagnosticsLog*>
> >, void>::invoke(boost::detail::function::function_buffer&) ../thirdparty/installed/tsan/include/boost/function/function_template.hpp:159:11
(libserver_process.so+0xb82a0)
        #22 boost::function0<void>::operator()() const ../thirdparty/installed/tsan/include/boost/function/function_template.hpp:770:14
(libkrpc.so+0x13b9c1)
        #23 kudu::Thread::SuperviseThread(void*) ../src/kudu/util/thread.cc:675:3 (libkudu_util.so+0x3eed69)
    
    Change-Id: Ib32120178a68b5389e167643e9bb8b89f8c625b9
    Reviewed-on: http://gerrit.cloudera.org:8080/14979
    Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/consensus/raft_consensus.h  | 6 ++++--
 src/kudu/master/ts_manager.h         | 6 ++++--
 src/kudu/tablet/tablet.h             | 5 ++++-
 src/kudu/tablet/tablet_replica.h     | 6 ++++--
 src/kudu/tserver/ts_tablet_manager.h | 6 ++++--
 5 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h
index 900e792..b3a8980 100644
--- a/src/kudu/consensus/raft_consensus.h
+++ b/src/kudu/consensus/raft_consensus.h
@@ -931,8 +931,6 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
   // The number of times Update() has been called, used for some test assertions.
   AtomicInt<int32_t> update_calls_for_tests_;
 
-  FunctionGaugeDetacher metric_detacher_;
-
   // The wrapping into std::atomic<> is to simplify the synchronization between
   // consensus-related writers and readers of the attached metric gauge.
   std::atomic<int64_t> last_leader_communication_time_micros_;
@@ -941,6 +939,10 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
   scoped_refptr<AtomicGauge<int64_t>> term_metric_;
   scoped_refptr<AtomicGauge<int64_t>> num_failed_elections_metric_;
 
+  // NOTE: it's important that this is the first member to be destructed. This
+  // ensures we do not attempt to collect metrics while calling the destructor.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(RaftConsensus);
 };
 
diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h
index ec18758..831320d 100644
--- a/src/kudu/master/ts_manager.h
+++ b/src/kudu/master/ts_manager.h
@@ -140,8 +140,6 @@ class TSManager {
   // to recheck any ignored failures.
   void SetAllTServersNeedFullTabletReports();
 
-  FunctionGaugeDetacher metric_detacher_;
-
   // Protects 'servers_by_id_'.
   mutable rw_spinlock lock_;
 
@@ -163,6 +161,10 @@ class TSManager {
 
   LocationCache* location_cache_;
 
+  // NOTE: it's important that this is the first member to be destructed. This
+  // ensures we do not attempt to collect metrics while calling the destructor.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(TSManager);
 };
 
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index 653ccf3..d4997be 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -721,7 +721,6 @@ class Tablet {
 
   scoped_refptr<MetricEntity> metric_entity_;
   gscoped_ptr<TabletMetrics> metrics_;
-  FunctionGaugeDetacher metric_detacher_;
 
   std::unique_ptr<Throttler> throttler_;
 
@@ -769,6 +768,10 @@ class Tablet {
   MonoTime last_write_time_;
   mutable MonoTime last_read_time_;
 
+  // NOTE: it's important that this is the first member to be destructed. This
+  // ensures we do not attempt to collect metrics while calling the destructor.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(Tablet);
 };
 
diff --git a/src/kudu/tablet/tablet_replica.h b/src/kudu/tablet/tablet_replica.h
index e2254f0..9cc38ec 100644
--- a/src/kudu/tablet/tablet_replica.h
+++ b/src/kudu/tablet/tablet_replica.h
@@ -391,11 +391,13 @@ class TabletReplica : public RefCountedThreadSafe<TabletReplica>,
   // The result tracker for writes.
   scoped_refptr<rpc::ResultTracker> result_tracker_;
 
-  FunctionGaugeDetacher metric_detacher_;
-
   // Cached stats for the tablet replica.
   ReportedTabletStatsPB stats_pb_;
 
+  // NOTE: it's important that this is the first member to be destructed. This
+  // ensures we do not attempt to collect metrics while calling the destructor.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(TabletReplica);
 };
 
diff --git a/src/kudu/tserver/ts_tablet_manager.h b/src/kudu/tserver/ts_tablet_manager.h
index 7e1fc15..68cc4b7 100644
--- a/src/kudu/tserver/ts_tablet_manager.h
+++ b/src/kudu/tserver/ts_tablet_manager.h
@@ -401,12 +401,14 @@ class TSTabletManager : public tserver::TabletReplicaLookupIf {
   // Thread pool used to delete tablets asynchronously.
   std::unique_ptr<ThreadPool> delete_tablet_pool_;
 
-  FunctionGaugeDetacher metric_detacher_;
-
   // Ensures that we only update stats from a single thread at a time.
   mutable rw_spinlock lock_update_;
   MonoTime next_update_time_;
 
+  // NOTE: it's important that this is the first member to be destructed. This
+  // ensures we do not attempt to collect metrics while calling the destructor.
+  FunctionGaugeDetacher metric_detacher_;
+
   DISALLOW_COPY_AND_ASSIGN(TSTabletManager);
 };
 


Mime
View raw message