kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [1/5] kudu git commit: KUDU-2287 Expose election failures as metrics
Date Tue, 05 Jun 2018 23:29:02 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 5f9a2f523 -> 76bbdd4a0


KUDU-2287 Expose election failures as metrics

This patch exposes as a metric the number of election failures since the
last successful election attempt, as well as the time since the last
heartbeat from the leader for the tablet.

Change-Id: I1b25df258cdba7bdae7bb2d7b4eb3d73b53425c3
Reviewed-on: http://gerrit.cloudera.org:8080/10076
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mpercy@apache.org>
Reviewed-by: Will Berkeley <wdberkeley@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/91881e2b
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/91881e2b
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/91881e2b

Branch: refs/heads/master
Commit: 91881e2b5fdf853f3c1b76879ba3702e6460bf22
Parents: 5f9a2f5
Author: Attila Bukor <abukor@cloudera.com>
Authored: Fri Jun 1 08:42:54 2018 +0200
Committer: Will Berkeley <wdberkeley@gmail.com>
Committed: Fri Jun 1 21:12:50 2018 +0000

----------------------------------------------------------------------
 src/kudu/consensus/raft_consensus.cc            | 33 ++++++++
 src/kudu/consensus/raft_consensus.h             | 18 ++--
 .../integration-tests/raft_consensus-itest.cc   | 86 ++++++++++++++++++++
 3 files changed, 131 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/91881e2b/src/kudu/consensus/raft_consensus.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.cc b/src/kudu/consensus/raft_consensus.cc
index c8569ac..68f1859 100644
--- a/src/kudu/consensus/raft_consensus.cc
+++ b/src/kudu/consensus/raft_consensus.cc
@@ -47,12 +47,14 @@
 #include "kudu/consensus/pending_rounds.h"
 #include "kudu/consensus/quorum_util.h"
 #include "kudu/gutil/bind.h"
+#include "kudu/gutil/bind_helpers.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/stl_util.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/gutil/strings/substitute.h"
+#include "kudu/gutil/walltime.h"
 #include "kudu/rpc/periodic.h"
 #include "kudu/util/async_util.h"
 #include "kudu/util/debug/trace_event.h"
@@ -149,6 +151,18 @@ METRIC_DEFINE_gauge_int64(tablet, raft_term,
                           kudu::MetricUnit::kUnits,
                           "Current Term of the Raft Consensus algorithm. This number increments
"
                           "each time a leader election is started.");
+METRIC_DEFINE_gauge_int64(tablet, failed_elections_since_stable_leader,
+                          "Failed Elections Since Stable Leader",
+                          kudu::MetricUnit::kUnits,
+                          "Number of failed elections on this node since there was a stable
"
+                          "leader. This number increments on each failed election and resets
on "
+                          "each successful one.");
+METRIC_DEFINE_gauge_int64(tablet, time_since_last_leader_heartbeat,
+                          "Time Since Last Leader Heartbeat",
+                          kudu::MetricUnit::kMilliseconds,
+                          "The time elapsed since the last heartbeat from the leader "
+                          "in milliseconds. This metric is identically zero on a leader replica.");
+
 
 using boost::optional;
 using google::protobuf::util::MessageDifferencer;
@@ -228,6 +242,14 @@ Status RaftConsensus::Start(const ConsensusBootstrapInfo& info,
   follower_memory_pressure_rejections_ =
       metric_entity->FindOrCreateCounter(&METRIC_follower_memory_pressure_rejections);
 
+  num_failed_elections_metric_ =
+      metric_entity->FindOrCreateGauge(&METRIC_failed_elections_since_stable_leader,
+                                       failed_elections_since_stable_leader_);
+
+  METRIC_time_since_last_leader_heartbeat.InstantiateFunctionGauge(
+    metric_entity, Bind(&RaftConsensus::GetMillisSinceLastLeaderHeartbeat, Unretained(this)))
+    ->AutoDetach(&metric_detacher_);
+
   // A single Raft thread pool token is shared between RaftConsensus and
   // PeerManager. Because PeerManager is owned by RaftConsensus, it receives a
   // raw pointer to the token, to emphasize that RaftConsensus is responsible
@@ -595,6 +617,8 @@ Status RaftConsensus::BecomeLeaderUnlocked() {
       &DoNothingStatusCB,
       std::placeholders::_1));
 
+  last_leader_communication_time_micros_ = 0;
+
   return AppendNewRoundToQueueUnlocked(round);
 }
 
@@ -1292,6 +1316,8 @@ Status RaftConsensus::UpdateReplica(const ConsensusRequestPB* request,
     // sanity check.
     SnoozeFailureDetector();
 
+    last_leader_communication_time_micros_ = GetMonoTimeMicros();
+
     // We update the lag metrics here in addition to after appending to the queue so the
     // metrics get updated even when the operation is rejected.
     queue_->UpdateLastIndexAppendedToLeader(request->last_idx_appended_to_leader());
@@ -2281,6 +2307,7 @@ const char* RaftConsensus::State_Name(State state) {
 void RaftConsensus::SetLeaderUuidUnlocked(const string& uuid) {
   DCHECK(lock_.is_locked());
   failed_elections_since_stable_leader_ = 0;
+  num_failed_elections_metric_->set_value(failed_elections_since_stable_leader_);
   cmeta_->set_leader_uuid(uuid);
   MarkDirty(Substitute("New leader $0", uuid));
 }
@@ -2441,6 +2468,7 @@ void RaftConsensus::DoElectionCallback(ElectionReason reason, const
ElectionResu
 
   if (result.decision == VOTE_DENIED) {
     failed_elections_since_stable_leader_++;
+    num_failed_elections_metric_->set_value(failed_elections_since_stable_leader_);
 
     // If we called an election and one of the voters had a higher term than we did,
     // we should bump our term before we potentially try again. This is particularly
@@ -2970,6 +2998,11 @@ ConsensusMetadata* RaftConsensus::consensus_metadata_for_tests() const
{
   return cmeta_.get();
 }
 
+int64_t RaftConsensus::GetMillisSinceLastLeaderHeartbeat() const {
+    return last_leader_communication_time_micros_ == 0 ?
+        0 : (GetMonoTimeMicros() - last_leader_communication_time_micros_) / 1000;
+}
+
 ////////////////////////////////////////////////////////////////////////
 // ConsensusBootstrapInfo
 ////////////////////////////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/kudu/blob/91881e2b/src/kudu/consensus/raft_consensus.h
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/raft_consensus.h b/src/kudu/consensus/raft_consensus.h
index a92ebc1..33e8196 100644
--- a/src/kudu/consensus/raft_consensus.h
+++ b/src/kudu/consensus/raft_consensus.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <cstdint>
 #include <iosfwd>
 #include <memory>
@@ -57,10 +58,9 @@ namespace kudu {
 typedef std::lock_guard<simple_spinlock> Lock;
 typedef gscoped_ptr<Lock> ScopedLock;
 
+class Status;
 class ThreadPool;
 class ThreadPoolToken;
-class Status;
-
 template <typename Sig>
 class Callback;
 
@@ -72,11 +72,10 @@ namespace consensus {
 
 class ConsensusMetadataManager;
 class ConsensusRound;
-class PeerProxyFactory;
 class PeerManager;
+class PeerProxyFactory;
 class PendingRounds;
 class ReplicaTransactionFactory;
-
 struct ConsensusBootstrapInfo;
 struct ElectionResult;
 
@@ -359,6 +358,8 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
   // Return the on-disk size of the consensus metadata, in bytes.
   int64_t MetadataOnDiskSize() const;
 
+  int64_t GetMillisSinceLastLeaderHeartbeat() const;
+
  protected:
   RaftConsensus(ConsensusOptions options,
                 RaftPeerPB local_peer_pb,
@@ -849,7 +850,7 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
   // The number of times this node has called and lost a leader election since
   // the last time it saw a stable leader (either itself or another node).
   // This is used to calculate back-off of the election timeout.
-  int failed_elections_since_stable_leader_;
+  int64_t failed_elections_since_stable_leader_;
 
   Callback<void(const std::string& reason)> mark_dirty_clbk_;
 
@@ -861,8 +862,13 @@ class RaftConsensus : public std::enable_shared_from_this<RaftConsensus>,
   // The number of times Update() has been called, used for some test assertions.
   AtomicInt<int32_t> update_calls_for_tests_;
 
+  FunctionGaugeDetacher metric_detacher_;
+
+  std::atomic<int64_t> last_leader_communication_time_micros_;
+
   scoped_refptr<Counter> follower_memory_pressure_rejections_;
-  scoped_refptr<AtomicGauge<int64_t> > term_metric_;
+  scoped_refptr<AtomicGauge<int64_t>> term_metric_;
+  scoped_refptr<AtomicGauge<int64_t>> num_failed_elections_metric_;
 
   DISALLOW_COPY_AND_ASSIGN(RaftConsensus);
 };

http://git-wip-us.apache.org/repos/asf/kudu/blob/91881e2b/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc
index 09f464f..d1425d0 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -95,6 +95,8 @@ DECLARE_int32(rpc_timeout);
 
 METRIC_DECLARE_entity(tablet);
 METRIC_DECLARE_counter(transaction_memory_pressure_rejections);
+METRIC_DECLARE_gauge_int64(time_since_last_leader_heartbeat);
+METRIC_DECLARE_gauge_int64(failed_elections_since_stable_leader);
 
 using kudu::client::KuduInsert;
 using kudu::client::KuduSession;
@@ -2642,6 +2644,90 @@ TEST_F(RaftConsensusITest, TestCorruptReplicaMetadata) {
                                    kTimeout));
 }
 
+int64_t GetFailedElectionsSinceStableLeader(const ExternalTabletServer* ets,
+                                            const std::string& tablet_id) {
+  int64_t ret;
+  CHECK_OK(GetInt64Metric(
+        ets->bound_http_hostport(),
+        &METRIC_ENTITY_tablet,
+        tablet_id.c_str(),
+        &METRIC_failed_elections_since_stable_leader,
+        "value",
+        &ret));
+  return ret;
+}
+
+int64_t GetTimeSinceLastLeaderHeartbeat(const ExternalTabletServer* ets,
+                                        const std::string& tablet_id) {
+  int64_t ret;
+  CHECK_OK(GetInt64Metric(
+        ets->bound_http_hostport(),
+        &METRIC_ENTITY_tablet,
+        tablet_id.c_str(),
+        &METRIC_time_since_last_leader_heartbeat,
+        "value",
+        &ret));
+  return ret;
+}
+
+TEST_F(RaftConsensusITest, TestElectionMetrics) {
+  const vector<string> kTsFlags = {
+    // Make leader elections faster so we can test
+    // failed_elections_since_stable_leader faster.
+    "--raft_heartbeat_interval_ms=100",
+  };
+
+  FLAGS_num_replicas = 3;
+  FLAGS_num_tablet_servers = 3;
+  NO_FATALS(BuildAndStart(kTsFlags));
+
+  vector<TServerDetails*> tservers;
+  AppendValuesFromMap(tablet_servers_, &tservers);
+
+  ASSERT_OK(StartElection(tservers[0], tablet_id_, MonoDelta::FromSeconds(10)));
+  ASSERT_OK(WaitForServersToAgree(MonoDelta::FromSeconds(10), tablet_servers_, tablet_id_,
1));
+
+  int num_tablet_servers = cluster_->num_tablet_servers();
+  int leader_idx = cluster_->tablet_server_index_by_uuid(tservers[0]->uuid());
+  ExternalTabletServer* leader = cluster_->tablet_server(leader_idx);
+  ExternalTabletServer* follower = cluster_->tablet_server((leader_idx + 1) % num_tablet_servers);
+
+  // Leader should always report 0 since last leader heartbeat.
+  ASSERT_EQ(0, GetTimeSinceLastLeaderHeartbeat(leader, tablet_id_));
+  ASSERT_EQ(0, GetFailedElectionsSinceStableLeader(leader, tablet_id_));
+
+  // Let's shut down all tablet servers except our chosen follower to make sure we don't
+  // have a leader.
+  for (auto i = 0; i < num_tablet_servers; ++i) {
+    if (cluster_->tablet_server(i) != follower) {
+      cluster_->tablet_server(i)->Shutdown();
+    }
+  }
+
+  // Get two measurements with 500 ms sleep between them and see if the
+  // difference between them is at least 500ms.
+  int64_t time_before_wait = GetTimeSinceLastLeaderHeartbeat(follower, tablet_id_);
+  SleepFor(MonoDelta::FromMilliseconds(500));
+  int64_t time_after_wait = GetTimeSinceLastLeaderHeartbeat(follower, tablet_id_);
+  ASSERT_TRUE(time_after_wait >= time_before_wait + 500);
+
+  // Verify failed_elections_since_stable_leader is advanced eventually.
+  ASSERT_EVENTUALLY([&]() {
+    ASSERT_TRUE(GetFailedElectionsSinceStableLeader(follower, tablet_id_) > 0);
+  });
+
+  // Start the servers back up and verify failed_elections_since_stable_leader
+  // is reset to 0.
+  for (auto i = 0; i < num_tablet_servers; ++i) {
+    if (cluster_->tablet_server(i) != follower) {
+      cluster_->tablet_server(i)->Start();
+    }
+  }
+  ASSERT_EVENTUALLY([&]() {
+    ASSERT_EQ(0, GetFailedElectionsSinceStableLeader(follower, tablet_id_));
+  });
+}
+
 // Test that an IOError when writing to the write-ahead log is a fatal error.
 // First, we test that failed replicates are fatal. Then, we test that failed
 // commits are fatal.


Mime
View raw message