kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject incubator-kudu git commit: KUDU-1288. Release WAL file descriptors when deleting tablets
Date Mon, 25 Jan 2016 05:56:10 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/master a10e3739a -> ffd84cf9c


KUDU-1288. Release WAL file descriptors when deleting tablets

This is a bug fix for an issue where we would leak WAL file FDs when
deleting tablets. Over time, this would cause Kudu to run out of file
descriptors and crash.

Change-Id: I97dfac521bcd61d2b286c9a437310f40cbe7b288
Reviewed-on: http://gerrit.cloudera.org:8080/1715
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/ffd84cf9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/ffd84cf9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/ffd84cf9

Branch: refs/heads/master
Commit: ffd84cf9c34b1c4fee74cafc04515ba313b6fd9b
Parents: a10e373
Author: Mike Percy <mpercy@apache.org>
Authored: Tue Jan 5 17:16:52 2016 -0800
Committer: Todd Lipcon <todd@apache.org>
Committed: Mon Jan 25 05:55:41 2016 +0000

----------------------------------------------------------------------
 src/kudu/consensus/log-test.cc                  | 10 ++-
 src/kudu/consensus/log.cc                       | 18 +++---
 src/kudu/consensus/mt-log-test.cc               |  5 +-
 src/kudu/integration-tests/delete_table-test.cc | 66 ++++++++++++++++++++
 4 files changed, 89 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ffd84cf9/src/kudu/consensus/log-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/log-test.cc b/src/kudu/consensus/log-test.cc
index 9433706..1d4313d 100644
--- a/src/kudu/consensus/log-test.cc
+++ b/src/kudu/consensus/log-test.cc
@@ -360,7 +360,10 @@ TEST_F(LogTest, TestSegmentRollover) {
   ASSERT_FALSE(segments.back()->HasFooter());
   ASSERT_OK(log_->Close());
 
-  ASSERT_OK(log_->GetLogReader()->GetSegmentsSnapshot(&segments));
+  gscoped_ptr<LogReader> reader;
+  ASSERT_OK(LogReader::Open(fs_manager_.get(), NULL, kTestTablet, NULL, &reader));
+  ASSERT_OK(reader->GetSegmentsSnapshot(&segments));
+
   ASSERT_TRUE(segments.back()->HasFooter());
 
   for (const scoped_refptr<ReadableLogSegment>& entry : segments) {
@@ -690,7 +693,10 @@ TEST_F(LogTest, TestWriteManyBatches) {
     uint32_t num_entries = 0;
 
     vector<scoped_refptr<ReadableLogSegment> > segments;
-    ASSERT_OK(log_->GetLogReader()->GetSegmentsSnapshot(&segments));
+
+    gscoped_ptr<LogReader> reader;
+    ASSERT_OK(LogReader::Open(fs_manager_.get(), NULL, kTestTablet, NULL, &reader));
+    ASSERT_OK(reader->GetSegmentsSnapshot(&segments));
 
     for (const scoped_refptr<ReadableLogSegment> entry : segments) {
       STLDeleteElements(&entries_);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ffd84cf9/src/kudu/consensus/log.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/log.cc b/src/kudu/consensus/log.cc
index f5c6960..05abfd8 100644
--- a/src/kudu/consensus/log.cc
+++ b/src/kudu/consensus/log.cc
@@ -810,20 +810,24 @@ Status Log::Close() {
       RETURN_NOT_OK(ReplaceSegmentInReaderUnlocked());
       log_state_ = kLogClosed;
       VLOG(1) << "Log closed";
+
+      // Release FDs held by these objects.
+      log_index_.reset();
+      reader_.reset();
+
+      if (log_hooks_) {
+        RETURN_NOT_OK_PREPEND(log_hooks_->PostClose(),
+                              "PostClose hook failed");
+      }
       return Status::OK();
+
     case kLogClosed:
       VLOG(1) << "Log already closed";
       return Status::OK();
+
     default:
       return Status::IllegalState(Substitute("Bad state for Close() $0", log_state_));
   }
-
-  if (log_hooks_) {
-    RETURN_NOT_OK_PREPEND(log_hooks_->PostClose(),
-                          "PostClose hook failed");
-  }
-
-  return Status::OK();
 }
 
 Status Log::DeleteOnDiskData(FsManager* fs_manager, const string& tablet_id) {

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ffd84cf9/src/kudu/consensus/mt-log-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/mt-log-test.cc b/src/kudu/consensus/mt-log-test.cc
index 66a8402..1b08c4d 100644
--- a/src/kudu/consensus/mt-log-test.cc
+++ b/src/kudu/consensus/mt-log-test.cc
@@ -24,6 +24,7 @@
 #include <algorithm>
 #include <vector>
 
+#include "kudu/consensus/log_index.h"
 #include "kudu/gutil/algorithm.h"
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/gutil/strings/substitute.h"
@@ -158,8 +159,10 @@ TEST_F(MultiThreadedLogTest, TestAppends) {
   }
   ASSERT_OK(log_->Close());
 
+  gscoped_ptr<LogReader> reader;
+  ASSERT_OK(LogReader::Open(fs_manager_.get(), NULL, kTestTablet, NULL, &reader));
   SegmentSequence segments;
-  ASSERT_OK(log_->GetLogReader()->GetSegmentsSnapshot(&segments));
+  ASSERT_OK(reader->GetSegmentsSnapshot(&segments));
 
   for (const SegmentSequence::value_type& entry : segments) {
     ASSERT_OK(entry->ReadEntries(&entries_));

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/ffd84cf9/src/kudu/integration-tests/delete_table-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-test.cc b/src/kudu/integration-tests/delete_table-test.cc
index 4edbba8..49144d9 100644
--- a/src/kudu/integration-tests/delete_table-test.cc
+++ b/src/kudu/integration-tests/delete_table-test.cc
@@ -25,6 +25,7 @@
 #include "kudu/client/client-test-util.h"
 #include "kudu/common/wire_protocol-test-util.h"
 #include "kudu/gutil/stl_util.h"
+#include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/integration-tests/cluster_verifier.h"
 #include "kudu/integration-tests/external_mini_cluster-itest-base.h"
@@ -32,6 +33,7 @@
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/tserver/tserver.pb.h"
 #include "kudu/util/curl_util.h"
+#include "kudu/util/subprocess.h"
 
 using kudu::client::KuduClient;
 using kudu::client::KuduClientBuilder;
@@ -822,6 +824,70 @@ TEST_F(DeleteTableTest, TestOrphanedBlocksClearedOnDelete) {
   ASSERT_EQ(0, superblock_pb.orphaned_blocks_size()) << superblock_pb.DebugString();
 }
 
+vector<const string*> Grep(const string& needle, const vector<string>&
haystack) {
+  vector<const string*> results;
+  for (const string& s : haystack) {
+    if (s.find(needle) != string::npos) {
+      results.push_back(&s);
+    }
+  }
+  return results;
+}
+
+vector<string> ListOpenFiles(pid_t pid) {
+  string cmd = strings::Substitute("export PATH=$$PATH:/usr/bin:/usr/sbin; lsof -n -p $0",
pid);
+  vector<string> argv = { "bash", "-c", cmd };
+  string out;
+  CHECK_OK(Subprocess::Call(argv, &out));
+  vector<string> lines = strings::Split(out, "\n");
+  return lines;
+}
+
+int PrintOpenTabletFiles(pid_t pid, const string& tablet_id) {
+  vector<string> lines = ListOpenFiles(pid);
+  vector<const string*> wal_lines = Grep(tablet_id, lines);
+  LOG(INFO) << "There are " << wal_lines.size() << " open WAL files for
pid " << pid << ":";
+  for (const string* l : wal_lines) {
+    LOG(INFO) << *l;
+  }
+  return wal_lines.size();
+}
+
+// Regression test for tablet deletion FD leak. See KUDU-1288.
+TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
+  const MonoDelta timeout = MonoDelta::FromSeconds(30);
+
+  vector<string> ts_flags, master_flags;
+  NO_FATALS(StartCluster(ts_flags, master_flags, 1));
+
+  // Create the table.
+  TestWorkload workload(cluster_.get());
+  workload.set_num_replicas(1);
+  workload.Setup();
+  workload.Start();
+  while (workload.rows_inserted() < 1000) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+  workload.StopAndJoin();
+
+  // Figure out the tablet id of the created tablet.
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  ASSERT_OK(WaitForNumTabletsOnTS(ts_map_.begin()->second, 1, timeout, &tablets));
+  const string& tablet_id = tablets[0].tablet_status().tablet_id();
+
+  // Tombstone the tablet and then ensure that lsof does not list any
+  // tablet-related paths.
+  ExternalTabletServer* ets = cluster_->tablet_server(0);
+  ASSERT_OK(itest::DeleteTablet(ts_map_[ets->uuid()],
+                                tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
+  ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
+
+  // Restart the TS after deletion and then do the same lsof check again.
+  ets->Shutdown();
+  ASSERT_OK(ets->Restart());
+  ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
+}
+
 // Parameterized test case for TABLET_DATA_DELETED deletions.
 class DeleteTableDeletedParamTest : public DeleteTableTest,
                                     public ::testing::WithParamInterface<const char*>
{


Mime
View raw message