kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [1/2] kudu git commit: ksck: colorize and clean up output
Date Fri, 26 Aug 2016 22:26:07 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 362743c34 -> db6d22da4


ksck: colorize and clean up output

Dan and I were looking at some ksck output earlier and found it somewhat
hard to read. This colorizes the output and also cleans up the format to
be slightly less messy. The output now goes to stdout instead of stderr
as well, so it's easier to pipe to a file or 'less', which is a common
use case.

Example output:

Tablet 0e4e79de2c2547c091779b13735d2b73 of table 'my_table' is under-replicated: 1 replica(s)
not RUNNING
  63c4785c3b6f47f5a1c23ffbf6e52b71 (ts-023.foo.com:7050): RUNNING
  cd20c68cd23c4cf986eda0936a5691cc (ts-004.foo.com:7050): RUNNING [LEADER]
  5edf82f0516b4897b3a7991a7e67d71c (ts-028.foo.com:7050): bad state
    State:       NOT_STARTED
    Data state:  TABLET_DATA_COPYING
    Last status: TabletCopy: Downloading block 4611685629730158289 (3491/9569)

Change-Id: Icc5196d63cbcbcbb2a9aba1ff17377b678c104bd
Reviewed-on: http://gerrit.cloudera.org:8080/4129
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cf0eb4dd
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cf0eb4dd
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cf0eb4dd

Branch: refs/heads/master
Commit: cf0eb4ddae4fbcff23cc1a39171ce7ae997f1997
Parents: 362743c
Author: Todd Lipcon <todd@apache.org>
Authored: Thu Aug 25 18:23:50 2016 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Fri Aug 26 22:24:05 2016 +0000

----------------------------------------------------------------------
 src/kudu/tools/CMakeLists.txt         |   2 +
 src/kudu/tools/color.cc               |  80 ++++++++++
 src/kudu/tools/color.h                |  37 +++++
 src/kudu/tools/ksck-test.cc           |  70 ++++----
 src/kudu/tools/ksck.cc                | 247 ++++++++++++++++-------------
 src/kudu/tools/ksck.h                 |   9 +-
 src/kudu/tools/tool_action_cluster.cc |   1 +
 7 files changed, 299 insertions(+), 147 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tools/CMakeLists.txt b/src/kudu/tools/CMakeLists.txt
index 614bf11..fe95bad 100644
--- a/src/kudu/tools/CMakeLists.txt
+++ b/src/kudu/tools/CMakeLists.txt
@@ -30,6 +30,7 @@ set(LINK_LIBS
 )
 
 add_library(kudu_tools_util
+  color.cc
   data_gen_util.cc)
 target_link_libraries(kudu_tools_util
   ${LINK_LIBS})
@@ -75,6 +76,7 @@ add_library(ksck
 )
 target_link_libraries(ksck
   consensus
+  kudu_tools_util
   master_proto
   server_base_proto
   tserver_proto

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/color.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/color.cc b/src/kudu/tools/color.cc
new file mode 100644
index 0000000..e90761d
--- /dev/null
+++ b/src/kudu/tools/color.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/tools/color.h"
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <unistd.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/flag_tags.h"
+
+DEFINE_string(color, "auto",
+              "Specifies whether output should be colorized. The default value "
+              "'auto' colorizes output if the output is a terminal. The other "
+              "valid values are 'always' or 'never'.");
+TAG_FLAG(color, stable);
+
+static bool ValidateColorFlag(const char* flagname, const string& value) {
+  if (value == "always" ||
+      value == "auto" ||
+      value == "never") {
+    return true;
+  }
+  LOG(ERROR) << "option 'color' expects \"always\", \"auto\", or \"never\"";
+  return false;
+}
+static bool dummy = google::RegisterFlagValidator(
+    &FLAGS_color, &ValidateColorFlag);
+
+
+namespace kudu {
+namespace tools {
+
+namespace {
+bool UseColor() {
+  if (FLAGS_color == "never") return false;
+  if (FLAGS_color == "always") return true;
+  return isatty(STDOUT_FILENO);
+}
+
+const char* StringForCode(AnsiCode color) {
+  if (!UseColor()) return "";
+
+  // Codes from: https://en.wikipedia.org/wiki/ANSI_escape_code
+  switch (color) {
+    case AnsiCode::RED:    return "\x1b[31m";
+    case AnsiCode::GREEN:  return "\x1b[32m";
+    case AnsiCode::YELLOW: return "\x1b[33m";
+    case AnsiCode::BLUE:   return "\x1b[34m";
+    case AnsiCode::RESET:  return "\x1b[m";
+  }
+  LOG(FATAL);
+  return "";
+}
+} // anonymous namespace
+
+string Color(AnsiCode color, StringPiece s) {
+  return strings::Substitute("$0$1$2",
+                             StringForCode(color),
+                             s,
+                             StringForCode(AnsiCode::RESET));
+}
+
+} // namespace tools
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/color.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/color.h b/src/kudu/tools/color.h
new file mode 100644
index 0000000..ef0afbe
--- /dev/null
+++ b/src/kudu/tools/color.h
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <string>
+
+#include "kudu/gutil/strings/stringpiece.h"
+
+namespace kudu {
+namespace tools {
+
+enum class AnsiCode {
+  RED,
+  YELLOW,
+  GREEN,
+  BLUE,
+  RESET
+};
+
+std::string Color(AnsiCode color, StringPiece s);
+
+} // namespace tools
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 1a7a040..aae4656 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <gflags/gflags.h>
 #include <gtest/gtest.h>
 #include <memory>
 #include <unordered_map>
@@ -25,6 +26,8 @@
 #include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/test_util.h"
 
+DECLARE_string(color);
+
 namespace kudu {
 namespace tools {
 
@@ -112,6 +115,7 @@ class KsckTest : public KuduTest {
       : master_(new MockKsckMaster()),
         cluster_(new KsckCluster(static_pointer_cast<KsckMaster>(master_))),
         ksck_(new Ksck(cluster_)) {
+    FLAGS_color = "never";
     unordered_map<string, shared_ptr<KsckTabletServer>> tablet_servers;
     for (int i = 0; i < 3; i++) {
       string name = Substitute("ts-id-$0", i);
@@ -289,25 +293,22 @@ TEST_F(KsckTest, TestBadTabletServer) {
       "ts-id-1 (<mock>): Network error: Network failure");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-0 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-1 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-1 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Should have a replica on TS ts-id-1 (<mock>), but TS is unavailable\n"
-      "INFO: OK state on TS ts-id-0 (<mock>): RUNNING\n"
-      "INFO: OK state on TS ts-id-2 (<mock>): RUNNING\n");
+      "Tablet tablet-id-2 of table 'test' is under-replicated: 1 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): RUNNING [LEADER]\n"
+      "  ts-id-1 (<mock>): TS unavailable\n"
+      "  ts-id-2 (<mock>): RUNNING\n");
 }
 
 TEST_F(KsckTest, TestZeroTabletReplicasCheck) {
@@ -340,7 +341,7 @@ TEST_F(KsckTest, TestOneSmallReplicatedTable) {
   Status s = ksck_->ChecksumData(ChecksumOptions());
   EXPECT_EQ("Not found: No table found. Filter: table_filters=xyz", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "INFO: The cluster doesn't have any matching tables");
+                      "The cluster doesn't have any matching tables");
 
   // Test filtering with a matching table pattern.
   err_stream_.str("");
@@ -365,7 +366,8 @@ TEST_F(KsckTest, TestOneOneTabletBrokenTable) {
   Status s = RunKsck();
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "Tablet tablet-id-1 of table 'test' has 2 instead of 3 replicas");
+                      "Tablet tablet-id-1 of table 'test' is under-replicated: "
+                      "configuration has 2 replicas vs desired 3");
 }
 
 TEST_F(KsckTest, TestMismatchedAssignments) {
@@ -377,9 +379,11 @@ TEST_F(KsckTest, TestMismatchedAssignments) {
   Status s = RunKsck();
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(err_stream_.str(),
-                      "WARNING: Detected problems with Tablet tablet-id-2 of table 'test'\n"
-                      "------------------------------------------------------------\n"
-                      "WARNING: Missing a tablet replica on tablet server ts-id-0 (<mock>)\n");
+                      "Tablet tablet-id-2 of table 'test' is under-replicated: "
+                      "1 replica(s) not RUNNING\n"
+                      "  ts-id-0 (<mock>): missing [LEADER]\n"
+                      "  ts-id-1 (<mock>): RUNNING\n"
+                      "  ts-id-2 (<mock>): RUNNING\n");
 }
 
 TEST_F(KsckTest, TestTabletNotRunning) {
@@ -389,19 +393,19 @@ TEST_F(KsckTest, TestTabletNotRunning) {
   EXPECT_EQ("Corruption: 1 table(s) are bad", s.ToString());
   ASSERT_STR_CONTAINS(
       err_stream_.str(),
-      "WARNING: Detected problems with Tablet tablet-id-0 of table 'test'\n"
-      "------------------------------------------------------------\n"
-      "WARNING: Bad state on TS ts-id-0 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "WARNING: Bad state on TS ts-id-1 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "WARNING: Bad state on TS ts-id-2 (<mock>): FAILED\n"
-      "  Last status: \n"
-      "  Data state:  TABLET_DATA_UNKNOWN\n"
-      "ERROR: Tablet tablet-id-0 of table 'test' does not have a majority of "
-      "replicas in RUNNING state\n");
+      "Tablet tablet-id-0 of table 'test' is unavailable: 3 replica(s) not RUNNING\n"
+      "  ts-id-0 (<mock>): bad state [LEADER]\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n"
+      "  ts-id-1 (<mock>): bad state\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n"
+      "  ts-id-2 (<mock>): bad state\n"
+      "    State:       FAILED\n"
+      "    Data state:  TABLET_DATA_UNKNOWN\n"
+      "    Last status: \n");
 }
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 8d7e4f5..7165280 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -18,8 +18,10 @@
 #include "kudu/tools/ksck.h"
 
 #include <algorithm>
+#include <boost/optional.hpp>
 #include <glog/logging.h>
 #include <iostream>
+#include <map>
 #include <mutex>
 
 #include "kudu/consensus/quorum_util.h"
@@ -29,6 +31,7 @@
 #include "kudu/gutil/strings/human_readable.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
+#include "kudu/tools/color.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/blocking_queue.h"
 #include "kudu/util/locks.h"
@@ -38,7 +41,6 @@
 namespace kudu {
 namespace tools {
 
-using std::cerr;
 using std::cout;
 using std::endl;
 using std::ostream;
@@ -60,26 +62,23 @@ DEFINE_uint64(checksum_snapshot_timestamp, ChecksumOptions::kCurrentTimestamp,
 DEFINE_int32(fetch_replica_info_concurrency, 20,
              "Number of concurrent tablet servers to fetch replica info from.");
 
-// The stream to write output to. If this is NULL, defaults to cerr.
+// The stream to write output to. If this is NULL, defaults to cout.
 // This is used by tests to capture output.
 ostream* g_err_stream = NULL;
 
-// Print an informational message to cerr.
+// Print an informational message to cout.
 static ostream& Out() {
-  return (g_err_stream ? *g_err_stream : cerr);
-}
-static ostream& Info() {
-  return Out() << "INFO: ";
+  return (g_err_stream ? *g_err_stream : cout);
 }
 
-// Print a warning message to cerr.
+// Print a warning message to cout.
 static ostream& Warn() {
-  return Out() << "WARNING: ";
+  return Out() << Color(AnsiCode::YELLOW, "WARNING: ");
 }
 
-// Print an error message to cerr.
+// Print an error message to cout.
 static ostream& Error() {
-  return Out() << "ERROR: ";
+  return Out() << Color(AnsiCode::RED, "WARNING: ");
 }
 
 namespace {
@@ -151,7 +150,7 @@ Status Ksck::CheckMasterRunning() {
   VLOG(1) << "Connecting to the Master";
   Status s = cluster_->master()->Connect();
   if (s.ok()) {
-    Info() << "Connected to the Master" << endl;
+    Out() << "Connected to the Master" << endl;
   }
   return s;
 }
@@ -188,7 +187,7 @@ Status Ksck::FetchInfoFromTabletServers() {
   pool->Wait();
 
   if (bad_servers.Load() == 0) {
-    Info() << Substitute("Fetched info from all $0 Tablet Servers", servers_count)
<< endl;
+    Out() << Substitute("Fetched info from all $0 Tablet Servers", servers_count) <<
endl;
     return Status::OK();
   } else {
     Warn() << Substitute("Fetched info from $0 Tablet Servers, $1 weren't reachable",
@@ -221,15 +220,16 @@ Status Ksck::CheckTablesConsistency() {
     if (!VerifyTable(table)) {
       bad_tables_count++;
     }
+    Out() << endl;
   }
 
   if (tables_checked == 0) {
-    Info() << "The cluster doesn't have any matching tables" << endl;
+    Out() << "The cluster doesn't have any matching tables" << endl;
     return Status::OK();
   }
 
   if (bad_tables_count == 0) {
-    Info() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked)
<< endl;
+    Out() << Substitute("The metadata for $0 table(s) is HEALTHY", tables_checked)
<< endl;
     return Status::OK();
   } else {
     Warn() << Substitute("$0 out of $1 table(s) are not in a healthy state",
@@ -289,7 +289,7 @@ class ChecksumResultReporter : public RefCountedThreadSafe<ChecksumResultReporte
       done = responses_.WaitFor(MonoDelta::FromMilliseconds(std::min(rem_ms, 5000)));
       string status = done ? "finished in " : "running for ";
       int run_time_sec = (MonoTime::Now() - start).ToSeconds();
-      Info() << "Checksum " << status << run_time_sec << "s: "
+      Out() << "Checksum " << status << run_time_sec << "s: "
              << responses_.count() << "/" << expected_count_ << "
replicas remaining ("
              << HumanReadableNumBytes::ToString(disk_bytes_summed_.Load()) <<
" from disk, "
              << HumanReadableInt::ToString(rows_summed_.Load()) << " rows summed)"
@@ -453,7 +453,7 @@ Status Ksck::ChecksumData(const ChecksumOptions& opts) {
       return Status::ServiceUnavailable(
           "No tablet servers were available to fetch the current timestamp");
     }
-    Info() << "Using snapshot timestamp: " << options.snapshot_timestamp <<
endl;
+    Out() << "Using snapshot timestamp: " << options.snapshot_timestamp <<
endl;
   }
 
   // Kick off checksum scans in parallel. For each tablet server, we start
@@ -546,7 +546,6 @@ Status Ksck::ChecksumData(const ChecksumOptions& opts) {
 }
 
 bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table) {
-  bool good_table = true;
   const auto all_tablets = table->tablets();
   vector<shared_ptr<KsckTablet>> tablets;
   std::copy_if(all_tablets.begin(), all_tablets.end(), std::back_inserter(tablets),
@@ -555,124 +554,146 @@ bool Ksck::VerifyTable(const shared_ptr<KsckTable>& table)
{
                  });
 
   int table_num_replicas = table->num_replicas();
-  VLOG(1) << Substitute("Verifying $0 tablets for table $1 configured with num_replicas
= $2",
+  VLOG(1) << Substitute("Verifying $0 tablet(s) for table $1 configured with num_replicas
= $2",
                         tablets.size(), table->name(), table_num_replicas);
 
-  int bad_tablets_count = 0;
-  for (const shared_ptr<KsckTablet> &tablet : tablets) {
-    if (!VerifyTablet(tablet, table_num_replicas)) {
-      bad_tablets_count++;
-    }
+  map<CheckResult, int> result_counts;
+  for (const auto& tablet : tablets) {
+    auto tablet_result = VerifyTablet(tablet, table_num_replicas);
+    result_counts[tablet_result]++;
   }
-  if (bad_tablets_count == 0) {
-    Info() << Substitute("Table $0 is HEALTHY ($1 tablets checked)",
-                         table->name(), tablets.size()) << endl;
+  if (result_counts[CheckResult::OK] == tablets.size()) {
+    Out() << Substitute("Table $0 is $1 ($2 tablet(s) checked)",
+                        table->name(),
+                        Color(AnsiCode::GREEN, "HEALTHY"),
+                        tablets.size()) << endl;
+    return true;
   } else {
-    Warn() << Substitute("Table $0 has $1 bad tablets", table->name(), bad_tablets_count)
<< endl;
-    good_table = false;
+    if (result_counts[CheckResult::UNAVAILABLE] > 0) {
+      Out() << Substitute("Table $0 has $1 $2 tablet(s)",
+                          table->name(),
+                          result_counts[CheckResult::UNAVAILABLE],
+                          Color(AnsiCode::RED, "unavailable")) << endl;
+    }
+    if (result_counts[CheckResult::UNDER_REPLICATED] > 0) {
+      Out() << Substitute("Table $0 has $1 $2 tablet(s)",
+                          table->name(),
+                          result_counts[CheckResult::UNDER_REPLICATED],
+                          Color(AnsiCode::YELLOW, "under-replicated")) << endl;
+    }
+    return false;
   }
-  return good_table;
 }
 
-bool Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int table_num_replicas)
{
-  string tablet_str = Substitute("Tablet $0 of table '$1'",
+Ksck::CheckResult Ksck::VerifyTablet(const shared_ptr<KsckTablet>& tablet, int
table_num_replicas) {
+  const string tablet_str = Substitute("Tablet $0 of table '$1'",
                                  tablet->id(), tablet->table()->name());
-  vector<shared_ptr<KsckTabletReplica> > replicas = tablet->replicas();
-  vector<string> warnings, errors, infos;
-  if (check_replica_count_ && replicas.size() != table_num_replicas) {
-    warnings.push_back(Substitute("$0 has $1 instead of $2 replicas",
-                                  tablet_str, replicas.size(), table_num_replicas));
-  }
-  int leaders_count = 0;
-  int followers_count = 0;
-  int alive_count = 0;
-  int running_count = 0;
-  for (const shared_ptr<KsckTabletReplica> replica : replicas) {
+
+  // Consolidate the state of each replica into a simple struct for easier analysis.
+  struct ReplicaState {
+    KsckTabletReplica* replica;
+    KsckTabletServer* ts = nullptr;
+    tablet::TabletStatePB state = tablet::UNKNOWN;
+    boost::optional<tablet::TabletStatusPB> status_pb;
+  };
+
+  vector<ReplicaState> replica_states;
+  for (const shared_ptr<KsckTabletReplica> replica : tablet->replicas()) {
+    replica_states.emplace_back();
+    auto* repl_state = &replica_states.back();
+    repl_state->replica = replica.get();
+
     VLOG(1) << Substitute("A replica of tablet $0 is on live tablet server $1",
                           tablet->id(), replica->ts_uuid());
     // Check for agreement on tablet assignment and state between the master
     // and the tablet server.
     auto ts = FindPtrOrNull(cluster_->tablet_servers(), replica->ts_uuid());
-    if (ts && ts->is_healthy()) {
-      alive_count++;
-      auto state = ts->ReplicaState(tablet->id());
-      if (state != tablet::UNKNOWN) {
-        VLOG(1) << Substitute("Tablet server $0 agrees that it hosts a replica of $1",
-                              ts->ToString(), tablet_str);
-      }
+    repl_state->ts = ts.get();
 
-      switch (state) {
-        case tablet::RUNNING:
-          VLOG(1) << Substitute("Tablet replica for $0 on TS $1 is RUNNING",
-                                tablet_str, ts->ToString());
-          running_count++;
-          infos.push_back(Substitute("OK state on TS $0: $1",
-                                     ts->ToString(), tablet::TabletStatePB_Name(state)));
-          break;
-
-        case tablet::UNKNOWN:
-          warnings.push_back(Substitute("Missing a tablet replica on tablet server $0",
-                                        ts->ToString()));
-          break;
-
-        default: {
-          const auto& status_pb = ts->tablet_status_map().at(tablet->id());
-          warnings.push_back(
-              Substitute("Bad state on TS $0: $1\n"
-                         "  Last status: $2\n"
-                         "  Data state:  $3",
-                         ts->ToString(), tablet::TabletStatePB_Name(state),
-                         status_pb.last_status(),
-                         tablet::TabletDataState_Name(status_pb.tablet_data_state())));
-          break;
-        }
+    if (ts && ts->is_healthy()) {
+      repl_state->state = ts->ReplicaState(tablet->id());
+      if (ContainsKey(ts->tablet_status_map(), tablet->id())) {
+        repl_state->status_pb = ts->tablet_status_map().at(tablet->id());
       }
-    } else {
-      // no TS or unhealthy TS
-      warnings.push_back(Substitute("Should have a replica on TS $0, but TS is unavailable",
-                                    ts ? ts->ToString() : replica->ts_uuid()));
-    }
-    if (replica->is_leader()) {
-      VLOG(1) << Substitute("Replica at $0 is a LEADER", replica->ts_uuid());
-      leaders_count++;
-    } else if (replica->is_follower()) {
-      VLOG(1) << Substitute("Replica at $0 is a FOLLOWER", replica->ts_uuid());
-      followers_count++;
     }
   }
-  if (leaders_count == 0) {
-    errors.push_back("No leader detected");
-  }
-  VLOG(1) << Substitute("$0 has $1 leader and $2 followers",
-                        tablet_str, leaders_count, followers_count);
-  int majority_size = consensus::MajoritySize(table_num_replicas);
-  if (alive_count < majority_size) {
-    errors.push_back(Substitute("$0 does not have a majority of replicas on live tablet servers",
-                                tablet_str));
-  } else if (running_count < majority_size) {
-    errors.push_back(Substitute("$0 does not have a majority of replicas in RUNNING state",
-                                tablet_str));
-  }
-
-  bool has_issues = !warnings.empty() || !errors.empty();
-  if (has_issues) {
-    Warn() << "Detected problems with " << tablet_str << endl
-           << "------------------------------------------------------------" <<
endl;
-    for (const auto& s : warnings) {
-      Warn() << s << endl;
+
+  // Summarize the states.
+  int leaders_count = 0;
+  int running_count = 0;
+  for (const auto& r : replica_states) {
+    if (r.replica->is_leader()) {
+      leaders_count++;
     }
-    for (const auto& s : errors) {
-      Error() << s << endl;
+    if (r.state == tablet::RUNNING) {
+      running_count++;
     }
-    // We only print the 'INFO' messages on tablets that have some issues.
-    // Otherwise, it's a bit verbose.
-    for (const auto& s : infos) {
-      Info() << s << endl;
+  }
+
+  // Determine the overall health state of the tablet.
+  CheckResult result = CheckResult::OK;
+  int num_voters = replica_states.size();
+  int majority_size = consensus::MajoritySize(num_voters);
+  if (running_count < majority_size) {
+    Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
+                        tablet_str,
+                        Color(AnsiCode::RED, "unavailable"),
+                        num_voters - running_count) << endl;
+    result = CheckResult::UNAVAILABLE;
+  } else if (running_count < num_voters) {
+    Out() << Substitute("$0 is $1: $2 replica(s) not RUNNING",
+                        tablet_str,
+                        Color(AnsiCode::YELLOW, "under-replicated"),
+                        num_voters - running_count) << endl;
+    result = CheckResult::UNDER_REPLICATED;
+  } else if (check_replica_count_ && num_voters < table_num_replicas) {
+    Out() << Substitute("$0 is $1: configuration has $2 replicas vs desired $3",
+                        tablet_str,
+                        Color(AnsiCode::YELLOW, "under-replicated"),
+                        num_voters,
+                        table_num_replicas) << endl;
+    result = CheckResult::UNDER_REPLICATED;
+  } else if (leaders_count != 1) {
+    Out() << Substitute("$0 is $1: expected one LEADER replica",
+                        tablet_str, Color(AnsiCode::RED, "unavailable")) << endl;
+    result = CheckResult::UNAVAILABLE;
+  }
+
+  // In the case that we found something wrong, dump info on all the replicas
+  // to make it easy to debug.
+  if (result != CheckResult::OK) {
+    for (const ReplicaState& r : replica_states) {
+      string ts_str = r.ts ? r.ts->ToString() : r.replica->ts_uuid();
+      const char* leader_str = r.replica->is_leader() ? " [LEADER]" : "";
+
+      Out() << "  " << ts_str << ": ";
+      if (!r.ts || !r.ts->is_healthy()) {
+        Out() << Color(AnsiCode::YELLOW, "TS unavailable") << leader_str <<
endl;
+        continue;
+      }
+      if (r.state == tablet::RUNNING) {
+        Out() << Color(AnsiCode::GREEN, "RUNNING") << leader_str << endl;
+        continue;
+      }
+      if (r.status_pb == boost::none) {
+        Out() << Color(AnsiCode::YELLOW, "missing") << leader_str << endl;
+        continue;
+      }
+
+      Out() << Color(AnsiCode::YELLOW, "bad state") << leader_str << endl;
+      Out() << Substitute(
+          "    State:       $0\n"
+          "    Data state:  $1\n"
+          "    Last status: $2\n",
+          Color(AnsiCode::BLUE, tablet::TabletStatePB_Name(r.state)),
+          Color(AnsiCode::BLUE, tablet::TabletDataState_Name(r.status_pb->tablet_data_state())),
+          Color(AnsiCode::BLUE, r.status_pb->last_status()));
     }
+
     Out() << endl;
   }
 
-  return !has_issues;
+  return result;
 }
 
 } // namespace tools

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index cb78686..c512831 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -375,11 +375,18 @@ class Ksck {
   Status ChecksumData(const ChecksumOptions& options);
 
  private:
+  enum class CheckResult {
+    OK,
+    UNDER_REPLICATED,
+    UNAVAILABLE
+  };
+
   bool VerifyTable(const std::shared_ptr<KsckTable>& table);
   bool VerifyTableWithTimeout(const std::shared_ptr<KsckTable>& table,
                               const MonoDelta& timeout,
                               const MonoDelta& retry_interval);
-  bool VerifyTablet(const std::shared_ptr<KsckTablet>& tablet, int table_num_replicas);
+  CheckResult VerifyTablet(const std::shared_ptr<KsckTablet>& tablet,
+                           int table_num_replicas);
 
   const std::shared_ptr<KsckCluster> cluster_;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/cf0eb4dd/src/kudu/tools/tool_action_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_cluster.cc b/src/kudu/tools/tool_action_cluster.cc
index d198a9b..93ed5db 100644
--- a/src/kudu/tools/tool_action_cluster.cc
+++ b/src/kudu/tools/tool_action_cluster.cc
@@ -135,6 +135,7 @@ unique_ptr<Mode> BuildClusterMode() {
     .AddRequiredParameter({ "master_address", "Kudu Master RPC address of form hostname:port"
})
     .AddOptionalParameter("checksum_scan")
     .AddOptionalParameter("checksum_snapshot")
+    .AddOptionalParameter("color")
     .AddOptionalParameter("tables")
     .AddOptionalParameter("tablets")
     .Build();


Mime
View raw message