kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject kudu git commit: fs_manager: optimize tmp file deletion
Date Sat, 13 May 2017 00:11:23 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 6fc8cc3a6 -> c26e09ef5


fs_manager: optimize tmp file deletion

In a run of dense_node-itest, the bulk of CPU time[1] was spent
canonicalizing paths while cleaning up temporary files. This patch optimizes
that in two ways:
- Stop canonicalizing paths. We already canonicalize the WAL and data dir
  roots; that's good enough for admin-provided symlinks.
- Split WAL and data root cleaning, parallelizing the latter through the
  DataDirManager.

1. Though admittedly the majority of wall clock time was waiting on IO.

Change-Id: I07830f0cc3fd5da847361607c62a369c39e677d2
Reviewed-on: http://gerrit.cloudera.org:8080/6837
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c26e09ef
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c26e09ef
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c26e09ef

Branch: refs/heads/master
Commit: c26e09ef510f2fd4cfab7cde0bb811127ee14b7c
Parents: 6fc8cc3
Author: Adar Dembo <adar@cloudera.com>
Authored: Tue May 9 16:17:35 2017 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Sat May 13 00:05:40 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/data_dirs.cc  | 16 ++++++++++
 src/kudu/fs/fs_manager.cc | 71 ++++++++++--------------------------------
 src/kudu/fs/fs_manager.h  |  5 +--
 src/kudu/util/env_util.cc | 26 ++++++++++++++++
 src/kudu/util/env_util.h  |  6 ++++
 5 files changed, 67 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c26e09ef/src/kudu/fs/data_dirs.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/data_dirs.cc b/src/kudu/fs/data_dirs.cc
index 44e9f1b..64a52a4 100644
--- a/src/kudu/fs/data_dirs.cc
+++ b/src/kudu/fs/data_dirs.cc
@@ -135,6 +135,14 @@ Status CheckHolePunch(Env* env, const string& path) {
   return Status::OK();
 }
 
+// Wrapper for env_util::DeleteTmpFilesRecursively that is suitable for parallel
+// execution on a data directory's thread pool (which requires the return value
+// be void).
+void DeleteTmpFilesRecursively(Env* env, const string& path) {
+  WARN_NOT_OK(env_util::DeleteTmpFilesRecursively(env, path),
+              "Error while deleting temp files");
+}
+
 } // anonymous namespace
 
 #define GINIT(x) x(METRIC_##x.Instantiate(entity, 0))
@@ -360,6 +368,14 @@ Status DataDirManager::Open(int max_data_dirs, LockMode mode) {
                         Substitute("Could not verify integrity of files: $0",
                                    JoinStrings(paths_, ",")));
 
+  // Use the per-dir thread pools to delete temporary files in parallel.
+  for (const auto& dd : dds) {
+    dd->ExecClosure(Bind(&DeleteTmpFilesRecursively, env_, dd->dir()));
+  }
+  for (const auto& dd : dds) {
+    dd->WaitOnClosures();
+  }
+
   // Build uuid index and data directory maps.
   UuidIndexMap dd_by_uuid_idx;
   ReverseUuidIndexMap uuid_idx_by_dd;

http://git-wip-us.apache.org/repos/asf/kudu/blob/c26e09ef/src/kudu/fs/fs_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/fs_manager.cc b/src/kudu/fs/fs_manager.cc
index d229fcf..14ca730 100644
--- a/src/kudu/fs/fs_manager.cc
+++ b/src/kudu/fs/fs_manager.cc
@@ -234,12 +234,10 @@ void FsManager::InitBlockManager() {
 Status FsManager::Open(FsReport* report) {
   RETURN_NOT_OK(Init());
 
-  // Remove leftover tmp files and fix permissions.
-  if (!read_only_) {
-    CleanTmpFiles();
-    CheckAndFixPermissions();
-  }
-
+  // Load and verify the instance metadata files.
+  //
+  // Done first to minimize side effects in the case that the configured roots
+  // are not yet initialized on disk.
   for (const string& root : canonicalized_all_fs_roots_) {
     gscoped_ptr<InstanceMetadataPB> pb(new InstanceMetadataPB);
     RETURN_NOT_OK(pb_util::ReadPBContainerFromPath(env_, GetInstanceMetadataPath(root),
@@ -253,6 +251,15 @@ Status FsManager::Open(FsReport* report) {
     }
   }
 
+  // Remove leftover temporary files from the WAL root and fix permissions.
+  //
+  // Temporary files in the data directory roots will be removed by the block
+  // manager.
+  if (!read_only_) {
+    CleanTmpFilesInWalRoot();
+    CheckAndFixPermissions();
+  }
+
   LOG_TIMING(INFO, "opening block manager") {
     RETURN_NOT_OK(block_manager_->Open(report));
   }
@@ -466,56 +473,10 @@ string FsManager::GetWalSegmentFileName(const string& tablet_id,
                                               StringPrintf("%09" PRIu64, sequence_number)));
 }
 
-void FsManager::CleanTmpFiles() {
+void FsManager::CleanTmpFilesInWalRoot() {
   DCHECK(!read_only_);
-  string canonized_path;
-  vector<string> children;
-  unordered_set<string> checked_dirs;
-  stack<string> paths;
-  for (const string& root : canonicalized_all_fs_roots_) {
-    paths.push(root);
-  }
-
-  while (!paths.empty()) {
-    string path = paths.top();
-    paths.pop();
-
-    Status s = env_->GetChildren(path, &children);
-    if (s.ok()) {
-      for (const string& child : children) {
-        if (child == "." || child == "..") continue;
-
-        // Canonicalize in case of symlinks
-        s = env_->Canonicalize(JoinPathSegments(path, child), &canonized_path);
-        if (!s.ok()) {
-          LOG(WARNING) << "Unable to get the real path: " << s.ToString();
-          continue;
-        }
-
-        bool is_directory;
-        s = env_->IsDirectory(canonized_path, &is_directory);
-        if (!s.ok()) {
-          LOG(WARNING) << "Unable to get information about file: " << s.ToString();
-          continue;
-        }
-
-        if (is_directory) {
-          // Check if we didn't handle this path yet
-          if (!ContainsKey(checked_dirs, canonized_path)) {
-            checked_dirs.insert(canonized_path);
-            paths.push(canonized_path);
-          }
-        } else if (child.find(kTmpInfix) != string::npos) {
-          s = env_->DeleteFile(canonized_path);
-          if (!s.ok()) {
-            LOG(WARNING) << "Unable to delete tmp file: " << s.ToString();
-          }
-        }
-      }
-    } else {
-      LOG(WARNING) << "Unable to read directory: " << s.ToString();
-    }
-  }
+  WARN_NOT_OK(env_util::DeleteTmpFilesRecursively(env_, canonicalized_wal_fs_root_),
+              "Error while deleting tmp files");
 }
 
 void FsManager::CheckAndFixPermissions() {

http://git-wip-us.apache.org/repos/asf/kudu/blob/c26e09ef/src/kudu/fs/fs_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/fs_manager.h b/src/kudu/fs/fs_manager.h
index e189463..3ade3ff 100644
--- a/src/kudu/fs/fs_manager.h
+++ b/src/kudu/fs/fs_manager.h
@@ -244,9 +244,10 @@ class FsManager {
                           const std::string& path,
                           const std::vector<std::string>& objects);
 
-  // Deletes temporary files left from previous execution (e.g., after a crash).
+  // Deletes leftover temporary files in canonicalized_wal_fs_root_.
+  //
   // Logs warnings in case of errors.
-  void CleanTmpFiles();
+  void CleanTmpFilesInWalRoot();
 
   // Checks that the permissions of the root data directories conform to the
   // configured umask, and tightens them as necessary if they do not.

http://git-wip-us.apache.org/repos/asf/kudu/blob/c26e09ef/src/kudu/util/env_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.cc b/src/kudu/util/env_util.cc
index 44f687a..e9117a4 100644
--- a/src/kudu/util/env_util.cc
+++ b/src/kudu/util/env_util.cc
@@ -26,6 +26,7 @@
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 
+#include "kudu/gutil/bind.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/numbers.h"
 #include "kudu/gutil/strings/split.h"
@@ -238,6 +239,31 @@ Status DeleteExcessFilesByPattern(Env* env, const string& pattern,
int max_match
   return Status::OK();
 }
 
+// Callback for DeleteTmpFilesRecursively().
+//
+// Tests 'basename' for the Kudu-specific tmp file infix, and if found,
+// deletes the file.
+static Status DeleteTmpFilesRecursivelyCb(Env* env,
+                                          Env::FileType file_type,
+                                          const string& dirname,
+                                          const string& basename) {
+  if (file_type != Env::FILE_TYPE) {
+    // Skip directories.
+    return Status::OK();
+  }
+
+  if (basename.find(kTmpInfix) != string::npos) {
+    string filename = JoinPathSegments(dirname, basename);
+    WARN_NOT_OK(env->DeleteFile(filename),
+                Substitute("Failed to remove temporary file $0", filename));
+  }
+  return Status::OK();
+}
+
+Status DeleteTmpFilesRecursively(Env* env, const string& path) {
+  return env->Walk(path, Env::PRE_ORDER, Bind(&DeleteTmpFilesRecursivelyCb, env));
+}
+
 ScopedFileDeleter::ScopedFileDeleter(Env* env, std::string path)
     : env_(DCHECK_NOTNULL(env)), path_(std::move(path)), should_delete_(true) {}
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/c26e09ef/src/kudu/util/env_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/env_util.h b/src/kudu/util/env_util.h
index 5c93584..f93d81b 100644
--- a/src/kudu/util/env_util.h
+++ b/src/kudu/util/env_util.h
@@ -74,6 +74,12 @@ Status CopyFile(Env* env, const std::string& source_path, const std::string&
des
 // defined which file will be deleted first.
 Status DeleteExcessFilesByPattern(Env* env, const std::string& pattern, int max_matches);
 
+// Traverses 'path' recursively and deletes all files matching the special Kudu
+// tmp file infix. Does not follow symlinks.
+//
+// Deletion errors generate warnings but do not halt the traversal.
+Status DeleteTmpFilesRecursively(Env* env, const std::string& path);
+
 // Deletes a file or directory when this object goes out of scope.
 //
 // The deletion may be cancelled by calling .Cancel().


Mime
View raw message