kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aw...@apache.org
Subject [kudu] 02/03: KUDU-2069 p2: stop placement onto servers in maintenance mode
Date Wed, 18 Sep 2019 23:27:49 GMT
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit e8362ca1bb48ed0150cf134d93899d7a2f74caf3
Author: Andrew Wong <awong@apache.org>
AuthorDate: Sat Sep 14 23:28:49 2019 -0700

    KUDU-2069 p2: stop placement onto servers in maintenance mode
    
    This patch stops placement of replicas on tablet servers that are in
    maintenance mode.
    
    Change-Id: Icc3ca41a3b3c8625bec0470e9604df86e91b4952
    Reviewed-on: http://gerrit.cloudera.org:8080/14220
    Reviewed-by: Andrew Wong <awong@cloudera.com>
    Tested-by: Andrew Wong <awong@cloudera.com>
---
 src/kudu/master/catalog_manager.cc |  6 +--
 src/kudu/master/ts_manager.cc      | 34 +++++++++++-----
 src/kudu/master/ts_manager.h       | 16 ++++++--
 src/kudu/master/ts_state-test.cc   | 82 +++++++++++++++++++++++++++++++++++---
 4 files changed, 117 insertions(+), 21 deletions(-)

diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index 0933d89..3bad2cb 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -1574,7 +1574,7 @@ Status CatalogManager::CreateTable(const CreateTableRequestPB* orig_req,
   // Verify that the number of replicas isn't larger than the number of live tablet
   // servers.
   TSDescriptorVector ts_descs;
-  master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
+  master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs);
   const auto num_live_tservers = ts_descs.size();
   if (FLAGS_catalog_manager_check_ts_count_for_create_table && num_replicas >
num_live_tservers) {
     // Note: this error message is matched against in master-stress-test.
@@ -3766,7 +3766,7 @@ bool AsyncAddReplicaTask::SendRequest(int attempt) {
     }
 
     TSDescriptorVector ts_descs;
-    master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
+    master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs);
 
     // Get the dimension of the tablet. Otherwise, it will be none.
     optional<string> dimension = none;
@@ -4649,7 +4649,7 @@ Status CatalogManager::ProcessPendingAssignments(
   // For those tablets which need to be created in this round, assign replicas.
   {
     TSDescriptorVector ts_descs;
-    master_->ts_manager()->GetAllLiveDescriptors(&ts_descs);
+    master_->ts_manager()->GetDescriptorsAvailableForPlacement(&ts_descs);
     PlacementPolicy policy(std::move(ts_descs), &rng_);
     for (auto& tablet : deferred.needs_create_rpc) {
       // NOTE: if we fail to select replicas on the first pass (due to
diff --git a/src/kudu/master/ts_manager.cc b/src/kudu/master/ts_manager.cc
index 6eef6c1..7106ed3 100644
--- a/src/kudu/master/ts_manager.cc
+++ b/src/kudu/master/ts_manager.cc
@@ -197,24 +197,24 @@ void TSManager::GetAllDescriptors(TSDescriptorVector* descs) const {
   AppendValuesFromMap(servers_by_id_, descs);
 }
 
-void TSManager::GetAllLiveDescriptors(TSDescriptorVector* descs) const {
-  descs->clear();
+int TSManager::GetCount() const {
+  shared_lock<rw_spinlock> l(lock_);
+  return servers_by_id_.size();
+}
 
+void TSManager::GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const {
+  descs->clear();
   shared_lock<rw_spinlock> l(lock_);
+  shared_lock<RWMutex> tsl(ts_state_lock_);
   descs->reserve(servers_by_id_.size());
   for (const TSDescriptorMap::value_type& entry : servers_by_id_) {
     const shared_ptr<TSDescriptor>& ts = entry.second;
-    if (!ts->PresumedDead()) {
+    if (AvailableForPlacementUnlocked(*ts)) {
       descs->push_back(ts);
     }
   }
 }
 
-int TSManager::GetCount() const {
-  shared_lock<rw_spinlock> l(lock_);
-  return servers_by_id_.size();
-}
-
 Status TSManager::SetTServerState(const string& ts_uuid,
                                   TServerStatePB ts_state,
                                   SysCatalogTable* sys_catalog) {
@@ -238,9 +238,14 @@ Status TSManager::SetTServerState(const string& ts_uuid,
   return Status::OK();
 }
 
+TServerStatePB TSManager::GetTServerStateUnlocked(const string& ts_uuid) const {
+  ts_state_lock_.AssertAcquired();
+  return FindWithDefault(ts_state_by_uuid_, ts_uuid, TServerStatePB::NONE);
+}
+
 TServerStatePB TSManager::GetTServerState(const string& ts_uuid) const {
   shared_lock<RWMutex> l(ts_state_lock_);
-  return FindWithDefault(ts_state_by_uuid_, ts_uuid, TServerStatePB::NONE);
+  return GetTServerStateUnlocked(ts_uuid);
 }
 
 Status TSManager::ReloadTServerStates(SysCatalogTable* sys_catalog) {
@@ -266,6 +271,17 @@ int TSManager::ClusterSkew() const {
   return max_count - min_count;
 }
 
+bool TSManager::AvailableForPlacementUnlocked(const TSDescriptor& ts) const {
+  DCHECK(lock_.is_locked());
+  // TODO(KUDU-1827): this should also be used when decommissioning a server.
+  if (GetTServerStateUnlocked(ts.permanent_uuid()) == TServerStatePB::MAINTENANCE_MODE) {
+    return false;
+  }
+  // If the tablet server has heartbeated recently enough, it is considered
+  // alive and available for placement.
+  return !ts.PresumedDead();
+}
+
 } // namespace master
 } // namespace kudu
 
diff --git a/src/kudu/master/ts_manager.h b/src/kudu/master/ts_manager.h
index cd803fa..7d3603f 100644
--- a/src/kudu/master/ts_manager.h
+++ b/src/kudu/master/ts_manager.h
@@ -84,9 +84,11 @@ class TSManager {
   // list.
   void GetAllDescriptors(TSDescriptorVector* descs) const;
 
-  // Return all of the currently registered TS descriptors that have sent a
-  // heartbeat recently, indicating that they're alive and well.
-  void GetAllLiveDescriptors(TSDescriptorVector* descs) const;
+  // Return all of the currently registered TS descriptors that are available
+  // for replica placement -- they have sent a heartbeat recently, indicating
+  // that they're alive and well, and they aren't in a mode that would block
+  // replication to them (e.g. maintenance mode).
+  void GetDescriptorsAvailableForPlacement(TSDescriptorVector* descs) const;
 
   // Get the TS count.
   int GetCount() const;
@@ -108,6 +110,14 @@ class TSManager {
 
   int ClusterSkew() const;
 
+  // Return the tserver state for the given tablet server UUID, or NONE if one
+  // doesn't exist. Must hold 'ts_state_lock_' to call.
+  TServerStatePB GetTServerStateUnlocked(const std::string& ts_uuid) const;
+
+  // Returns whether the given server can have replicas placed on it (e.g. it
+  // is not dead, not in maintenance mode).
+  bool AvailableForPlacementUnlocked(const TSDescriptor& ts) const;
+
   mutable rw_spinlock lock_;
 
   FunctionGaugeDetacher metric_detacher_;
diff --git a/src/kudu/master/ts_state-test.cc b/src/kudu/master/ts_state-test.cc
index c0c45ab..cfeba76 100644
--- a/src/kudu/master/ts_state-test.cc
+++ b/src/kudu/master/ts_state-test.cc
@@ -19,12 +19,15 @@
 #include <memory>
 #include <string>
 #include <thread>
+#include <utility>
 #include <vector>
 
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/wire_protocol-test-util.h"
+#include "kudu/common/wire_protocol.h"
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/consensus/replica_management.pb.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -33,6 +36,7 @@
 #include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h"
 #include "kudu/master/mini_master.h"
+#include "kudu/master/ts_descriptor.h"
 #include "kudu/master/ts_manager.h"
 #include "kudu/rpc/messenger.h"
 #include "kudu/rpc/rpc_controller.h"
@@ -90,10 +94,10 @@ class TServerStateTest : public KuduTest {
     mini_master_.reset(new MiniMaster(GetTestPath("master"),
                                       HostPort("127.0.0.1", 0)));
     ASSERT_OK(mini_master_->Start());
-    Master* master = mini_master_->master();
-    ASSERT_OK(master->WaitUntilCatalogManagerIsLeaderAndReadyForTests(
+    master_ = mini_master_->master();
+    ASSERT_OK(master_->WaitUntilCatalogManagerIsLeaderAndReadyForTests(
         MonoDelta::FromSeconds(30)));
-    ts_manager_ = master->ts_manager();
+    ts_manager_ = master_->ts_manager();
     MessengerBuilder builder("client");
     ASSERT_OK(builder.Build(&client_messenger_));
     proxy_.reset(new MasterServiceProxy(client_messenger_,
@@ -103,9 +107,8 @@ class TServerStateTest : public KuduTest {
 
   // Sets the tserver state for the given tablet server to 'state'.
   Status SetTServerState(const string& tserver_uuid, TServerStatePB state) {
-    Master* master = mini_master_->master();
-    return master->ts_manager()->SetTServerState(
-        tserver_uuid, state, master->catalog_manager()->sys_catalog());
+    return master_->ts_manager()->SetTServerState(
+        tserver_uuid, state, master_->catalog_manager()->sys_catalog());
   }
 
   // Pretends to be a tserver by sending a heartbeat to the master from the
@@ -130,8 +133,28 @@ class TServerStateTest : public KuduTest {
     return proxy_->TSHeartbeat(req, &resp_pb, &rpc);
   }
 
+  // Creates a table with the given name with a simple schema, a default
+  // replication factor, and a single partition.
+  Status CreateTable(const string& table_name) {
+    RpcController rpc;
+    CreateTableResponsePB resp;
+    CreateTableRequestPB req;
+    req.set_name(table_name);
+    RETURN_NOT_OK(SchemaToPB(GetSimpleTestSchema(), req.mutable_schema()));
+
+    RETURN_NOT_OK(proxy_->CreateTable(req, &resp, &rpc));
+    if (resp.has_error()) {
+      RETURN_NOT_OK(StatusFromPB(resp.error().status()));
+    }
+    if (!resp.has_table_id()) {
+      return Status::RuntimeError("expected table id");
+    }
+    return Status::OK();
+  }
+
  protected:
   unique_ptr<MiniMaster> mini_master_;
+  Master* master_;
   TSManager* ts_manager_;
   unique_ptr<MasterServiceProxy> proxy_;
   shared_ptr<Messenger> client_messenger_;
@@ -237,5 +260,52 @@ TEST_F(TServerStateTest, TestConcurrentSetTServerState) {
   }
 }
 
+// Test that tablet servers that are in maintenance mode don't get tablet
+// replicas placed on them.
+TEST_F(TServerStateTest, MaintenanceModeTServerDoesntGetNewReplicas) {
+  const int kNumTServers = 4;
+  vector<string> tserver_ids;
+
+  // Report to the master from a few tablet servers to register them.
+  for (int i = 0; i < kNumTServers; i++) {
+    string tserver_id = Substitute("$0-$1", kTServer, i);
+    ASSERT_OK(SendHeartbeat(tserver_id));
+    tserver_ids.emplace_back(std::move(tserver_id));
+  }
+
+  // Put one of the tablet servers in maintenance mode and create a few tables.
+  const string& first_maintenance_tserver = tserver_ids[0];
+  ASSERT_OK(SetTServerState(first_maintenance_tserver, TServerStatePB::MAINTENANCE_MODE));
+  const int kNumTables = 10;
+  for (int i = 0; i < kNumTables; i++) {
+    ASSERT_OK(CreateTable(Substitute("table-$0", i)));
+  }
+  TSDescriptorVector descs;
+  master_->ts_manager()->GetAllDescriptors(&descs);
+  for (const auto& desc : descs) {
+    if (desc->permanent_uuid() == first_maintenance_tserver) {
+      // The tablet server in maintenance mode should have had no replicas
+      // placed on it because it's in maintenance mode.
+      ASSERT_EQ(0, desc->RecentReplicaCreations());
+    } else {
+      // All others should have some. Note that we can't compare an exact
+      // number because the replica creations has a decay factor built into it.
+      ASSERT_LT(0, desc->RecentReplicaCreations());
+    }
+  }
+  // If we put another tserver in maintenance mode, we'll only have two
+  // remaining tservers, and won't be able to create new tables with the
+  // default replication factor.
+  ASSERT_OK(SetTServerState(tserver_ids[1], TServerStatePB::MAINTENANCE_MODE));
+  string sad_table_id;
+  Status s = CreateTable("sad-table");
+  ASSERT_TRUE(s.IsInvalidArgument()) << s.ToString();
+  ASSERT_STR_CONTAINS(s.ToString(), "not enough live tablet servers");
+
+  // And if we exit maintenance mode, we should be able to create tables again.
+  ASSERT_OK(SetTServerState(tserver_ids[1], TServerStatePB::NONE));
+  ASSERT_OK(CreateTable("happy-table"));
+}
+
 } // namespace master
 } // namespace kudu


Mime
View raw message