kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject kudu git commit: tool: port cfile-dump to 'kudu fs dump_cfile'
Date Mon, 29 Aug 2016 22:25:06 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 25f5c215e -> b7d2780ff


tool: port cfile-dump to 'kudu fs dump_cfile'

Some non-cosmetic changes:
- I changed the block_id conversion into something nicer than a CHECK.
- The block_id parameter is expected in base 10, not base 16. To be honest,
  cfile-dump should have used base 10 for quite some time, because that's
  how they're printed in dumped PBs.
- I dropped the num_iterations parameter because it didn't seem useful.

Change-Id: I30cbaa6552e88348cebbf3059390a4c252eb7f8e
Reviewed-on: http://gerrit.cloudera.org:8080/4151
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b7d2780f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b7d2780f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b7d2780f

Branch: refs/heads/master
Commit: b7d2780ffee2b5d8b88ff1801d3e40d54c773f06
Parents: 25f5c21
Author: Adar Dembo <adar@cloudera.com>
Authored: Sun Aug 28 12:18:49 2016 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Mon Aug 29 22:23:44 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/CMakeLists.txt    |  4 --
 src/kudu/cfile/cfile-dump.cc     | 93 -----------------------------------
 src/kudu/cfile/cfile-test-base.h |  6 +--
 src/kudu/tools/kudu-tool-test.cc | 64 ++++++++++++++++++++++++
 src/kudu/tools/tool_action_fs.cc | 70 +++++++++++++++++++++++---
 5 files changed, 131 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/CMakeLists.txt b/src/kudu/cfile/CMakeLists.txt
index 2bd5ca4..ad2a961 100644
--- a/src/kudu/cfile/CMakeLists.txt
+++ b/src/kudu/cfile/CMakeLists.txt
@@ -65,7 +65,3 @@ ADD_KUDU_TEST(bloomfile-test)
 ADD_KUDU_TEST(mt-bloomfile-test)
 ADD_KUDU_TEST(block_cache-test)
 ADD_KUDU_TEST(compression-test)
-
-# Tools
-add_executable(cfile-dump cfile-dump.cc)
-target_link_libraries(cfile-dump cfile ${KUDU_BASE_LIBS})

http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/cfile-dump.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-dump.cc b/src/kudu/cfile/cfile-dump.cc
deleted file mode 100644
index 2b279d0..0000000
--- a/src/kudu/cfile/cfile-dump.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-#include <iostream>
-
-#include "kudu/cfile/cfile_reader.h"
-#include "kudu/cfile/cfile_util.h"
-#include "kudu/fs/block_id.h"
-#include "kudu/fs/fs_manager.h"
-#include "kudu/util/logging.h"
-#include "kudu/util/flags.h"
-
-DEFINE_bool(print_meta, true, "print the header and footer from the file");
-DEFINE_bool(iterate_rows, true, "iterate each row in the file");
-DEFINE_int32(num_iterations, 1, "number of times to iterate the file");
-
-namespace kudu {
-namespace cfile {
-
-using std::string;
-using std::cout;
-using std::endl;
-
-Status DumpFile(const string& block_id_str) {
-  // Allow read-only access to live blocks.
-  FsManagerOpts fs_opts;
-  fs_opts.read_only = true;
-  FsManager fs_manager(Env::Default(), fs_opts);
-  RETURN_NOT_OK(fs_manager.Open());
-
-  uint64_t numeric_id;
-  CHECK(safe_strtou64_base(block_id_str, &numeric_id, 16));
-  BlockId block_id(numeric_id);
-  gscoped_ptr<fs::ReadableBlock> block;
-  RETURN_NOT_OK(fs_manager.OpenBlock(block_id, &block));
-
-  gscoped_ptr<CFileReader> reader;
-  RETURN_NOT_OK(CFileReader::Open(std::move(block), ReaderOptions(), &reader));
-
-  if (FLAGS_print_meta) {
-    cout << "Header:\n" << reader->header().DebugString() << endl;
-    cout << "Footer:\n" << reader->footer().DebugString() << endl;
-  }
-
-  if (FLAGS_iterate_rows) {
-    gscoped_ptr<CFileIterator> it;
-    RETURN_NOT_OK(reader->NewIterator(&it, CFileReader::DONT_CACHE_BLOCK));
-
-    for (int i = 0; i < FLAGS_num_iterations; i++) {
-      RETURN_NOT_OK(it->SeekToFirst());
-      RETURN_NOT_OK(DumpIterator(*reader, it.get(), &cout, 0, 0));
-    }
-  }
-
-  return Status::OK();
-}
-
-} // namespace cfile
-} // namespace kudu
-
-int main(int argc, char **argv) {
-  kudu::ParseCommandLineFlags(&argc, &argv, true);
-  kudu::InitGoogleLoggingSafe(argv[0]);
-  if (argc != 2) {
-    std::cerr << "usage: " << argv[0]
-              << " -fs_wal_dir <dir> -fs_data_dirs <dirs> <block id>"
<< std::endl;
-    return 1;
-  }
-
-  kudu::Status s = kudu::cfile::DumpFile(argv[1]);
-  if (!s.ok()) {
-    std::cerr << "Error: " << s.ToString() << std::endl;
-    return 1;
-  }
-
-  return 0;
-}

http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/cfile/cfile-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h
index a5e1b9b..7213bfa 100644
--- a/src/kudu/cfile/cfile-test-base.h
+++ b/src/kudu/cfile/cfile-test-base.h
@@ -388,7 +388,7 @@ SumType FastSum(const Indexable &data, size_t n) {
 }
 
 template<DataType Type, typename SumType>
-static void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter, int &count)
{
+void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter, int &count)
{
   ScopedColumnBlock<Type> cb(8192);
 
   SumType sum = 0;
@@ -404,7 +404,7 @@ static void TimeReadFileForDataType(gscoped_ptr<CFileIterator> &iter,
int &count
 }
 
 template<DataType Type>
-static void ReadBinaryFile(CFileIterator* iter, int* count) {
+void ReadBinaryFile(CFileIterator* iter, int* count) {
   ScopedColumnBlock<Type> cb(100);
   uint64_t sum_lens = 0;
   while (iter->HasNext()) {
@@ -420,7 +420,7 @@ static void ReadBinaryFile(CFileIterator* iter, int* count) {
   LOG(INFO) << "Count: " << *count;
 }
 
-static void TimeReadFile(FsManager* fs_manager, const BlockId& block_id, size_t *count_ret)
{
+void TimeReadFile(FsManager* fs_manager, const BlockId& block_id, size_t *count_ret)
{
   Status s;
 
   gscoped_ptr<fs::ReadableBlock> source;

http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/tools/kudu-tool-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index 877439f..637cdf9 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -21,7 +21,12 @@
 #include <gtest/gtest.h>
 #include <glog/stl_logging.h>
 
+#include "kudu/cfile/cfile-test-base.h"
+#include "kudu/cfile/cfile_util.h"
+#include "kudu/cfile/cfile_writer.h"
+#include "kudu/fs/block_manager.h"
 #include "kudu/fs/fs_manager.h"
+#include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/strings/split.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/env.h"
@@ -34,6 +39,10 @@
 namespace kudu {
 namespace tools {
 
+using cfile::CFileWriter;
+using cfile::StringDataGenerator;
+using cfile::WriterOptions;
+using fs::WritableBlock;
 using std::string;
 using std::vector;
 using strings::Substitute;
@@ -258,5 +267,60 @@ TEST_F(ToolTest, TestPbcDump) {
   }
 }
 
+TEST_F(ToolTest, TestFsDumpCFile) {
+  const int kNumEntries = 8192;
+  const string kTestDir = GetTestPath("test");
+  FsManager fs(env_.get(), kTestDir);
+  ASSERT_OK(fs.CreateInitialFileSystemLayout());
+  ASSERT_OK(fs.Open());
+
+  gscoped_ptr<WritableBlock> block;
+  ASSERT_OK(fs.CreateNewBlock(&block));
+  BlockId block_id = block->id();
+  StringDataGenerator<false> generator("hello %04d");
+  WriterOptions opts;
+  opts.write_posidx = true;
+  CFileWriter writer(opts, GetTypeInfo(generator.kDataType),
+                     generator.has_nulls(), std::move(block));
+  ASSERT_OK(writer.Start());
+  generator.Build(kNumEntries);
+  ASSERT_OK_FAST(writer.AppendEntries(generator.values(), kNumEntries));
+  ASSERT_OK(writer.Finish());
+
+  vector<string> stdout;
+  {
+    NO_FATALS(RunTestAction(Substitute(
+        "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_meta --noprint_rows",
+        kTestDir, block_id.ToString()), &stdout));
+    SCOPED_TRACE(stdout);
+    ASSERT_TRUE(stdout.empty());
+  }
+  {
+    NO_FATALS(RunTestAction(Substitute(
+        "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_rows",
+        kTestDir, block_id.ToString()), &stdout));
+    SCOPED_TRACE(stdout);
+    ASSERT_GE(stdout.size(), 4);
+    ASSERT_EQ(stdout[0], "Header:");
+    ASSERT_EQ(stdout[3], "Footer:");
+  }
+  {
+    NO_FATALS(RunTestAction(Substitute(
+        "fs dump_cfile --fs_wal_dir=$0 $1 --noprint_meta",
+        kTestDir, block_id.ToString()), &stdout));
+    SCOPED_TRACE(stdout);
+    ASSERT_EQ(kNumEntries, stdout.size());
+  }
+  {
+    NO_FATALS(RunTestAction(Substitute(
+        "fs dump_cfile --fs_wal_dir=$0 $1",
+        kTestDir, block_id.ToString()), &stdout));
+    SCOPED_TRACE(stdout);
+    ASSERT_GT(stdout.size(), kNumEntries);
+    ASSERT_EQ(stdout[0], "Header:");
+    ASSERT_EQ(stdout[3], "Footer:");
+  }
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/b7d2780f/src/kudu/tools/tool_action_fs.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_fs.cc b/src/kudu/tools/tool_action_fs.cc
index 3b9d269..63fd702 100644
--- a/src/kudu/tools/tool_action_fs.cc
+++ b/src/kudu/tools/tool_action_fs.cc
@@ -24,20 +24,31 @@
 #include <boost/optional/optional.hpp>
 #include <gflags/gflags.h>
 
+#include "kudu/cfile/cfile_reader.h"
+#include "kudu/cfile/cfile_util.h"
 #include "kudu/fs/fs_manager.h"
+#include "kudu/gutil/strings/numbers.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/util/status.h"
 
+DEFINE_bool(print_meta, true,
+            "Print the header and footer from the CFile");
+DEFINE_bool(print_rows, true,
+            "Print each row in the CFile");
+DEFINE_string(uuid, "",
+              "The uuid to use in the filesystem. If not provided, one is generated");
+namespace kudu {
+namespace tools {
+
+using cfile::CFileReader;
+using cfile::CFileIterator;
+using cfile::ReaderOptions;
 using std::cout;
 using std::endl;
 using std::string;
 using std::unique_ptr;
 using std::vector;
-
-DEFINE_string(uuid, "",
-              "The uuid to use in the filesystem. If not provided, one is generated");
-
-namespace kudu {
-namespace tools {
+using strings::Substitute;
 
 namespace {
 
@@ -59,6 +70,42 @@ Status PrintUuid(const RunnerContext& context) {
   return Status::OK();
 }
 
+Status DumpCFile(const RunnerContext& context) {
+  string block_id_str = FindOrDie(context.required_args, "block_id");
+  uint64_t numeric_id;
+  if (!safe_strtou64(block_id_str, &numeric_id)) {
+    return Status::InvalidArgument(Substitute(
+        "Could not parse $0 as numeric block ID", block_id_str));
+  }
+  BlockId block_id(numeric_id);
+
+  FsManagerOpts fs_opts;
+  fs_opts.read_only = true;
+  FsManager fs_manager(Env::Default(), fs_opts);
+  RETURN_NOT_OK(fs_manager.Open());
+
+  gscoped_ptr<fs::ReadableBlock> block;
+  RETURN_NOT_OK(fs_manager.OpenBlock(block_id, &block));
+
+  gscoped_ptr<CFileReader> reader;
+  RETURN_NOT_OK(CFileReader::Open(std::move(block), ReaderOptions(), &reader));
+
+  if (FLAGS_print_meta) {
+    cout << "Header:\n" << reader->header().DebugString() << endl;
+    cout << "Footer:\n" << reader->footer().DebugString() << endl;
+  }
+
+  if (FLAGS_print_rows) {
+    gscoped_ptr<CFileIterator> it;
+    RETURN_NOT_OK(reader->NewIterator(&it, CFileReader::DONT_CACHE_BLOCK));
+    RETURN_NOT_OK(it->SeekToFirst());
+
+    RETURN_NOT_OK(DumpIterator(*reader, it.get(), &cout, 0, 0));
+  }
+
+  return Status::OK();
+}
+
 } // anonymous namespace
 
 unique_ptr<Mode> BuildFsMode() {
@@ -77,10 +124,21 @@ unique_ptr<Mode> BuildFsMode() {
       .AddOptionalParameter("fs_data_dirs")
       .Build();
 
+  unique_ptr<Action> dump_cfile =
+      ActionBuilder("dump_cfile", &DumpCFile)
+      .Description("Dump the contents of a CFile (column file)")
+      .AddRequiredParameter({ "block_id", "block identifier" })
+      .AddOptionalParameter("fs_wal_dir")
+      .AddOptionalParameter("fs_data_dirs")
+      .AddOptionalParameter("print_meta")
+      .AddOptionalParameter("print_rows")
+      .Build();
+
   return ModeBuilder("fs")
       .Description("Operate on a local Kudu filesystem")
       .AddAction(std::move(format))
       .AddAction(std::move(print_uuid))
+      .AddAction(std::move(dump_cfile))
       .Build();
 }
 


Mime
View raw message