kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject [1/5] kudu git commit: Account for REINSERTs in delta stats
Date Wed, 09 Nov 2016 14:09:42 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 279b72916 -> e3a8f64b9


Account for REINSERTs in delta stats

This makes DeltaStats also account for REINSERTs. This is not
actually used outside of tests, but seems like it would be
silly to not count this type of delta. In the future it might
be useful for selecting undo deltas for minor delta compaction.

The protobuf field is optional to preserve data format
compatibility.

Change-Id: Idd60f6c1c12803d339f5f8d96c6b089fab21b13f
Reviewed-on: http://gerrit.cloudera.org:8080/4932
Tested-by: Kudu Jenkins
Reviewed-by: Jean-Daniel Cryans <jdcryans@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3c68deac
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3c68deac
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3c68deac

Branch: refs/heads/master
Commit: 3c68deacc05f6b5a9b82522baa38e046744b1815
Parents: 279b729
Author: David Alves <dralves@apache.org>
Authored: Mon Nov 7 21:46:42 2016 -0800
Committer: David Ribeiro Alves <dralves@apache.org>
Committed: Tue Nov 8 23:38:14 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/delta_stats.cc | 48 ++++++++++++++++++++++++++-----------
 src/kudu/tablet/delta_stats.h  |  9 ++++++-
 src/kudu/tablet/tablet.proto   |  4 ++++
 3 files changed, 46 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/delta_stats.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_stats.cc b/src/kudu/tablet/delta_stats.cc
index f46becb..25b7ec8 100644
--- a/src/kudu/tablet/delta_stats.cc
+++ b/src/kudu/tablet/delta_stats.cc
@@ -24,6 +24,8 @@
 #include "kudu/tablet/tablet.pb.h"
 #include "kudu/util/bitmap.h"
 
+using strings::Substitute;
+
 namespace kudu {
 
 using std::vector;
@@ -32,6 +34,7 @@ namespace tablet {
 
 DeltaStats::DeltaStats()
     : delete_count_(0),
+      reinsert_count_(0),
       max_timestamp_(Timestamp::kMin),
       min_timestamp_(Timestamp::kMax) {
 }
@@ -45,21 +48,35 @@ void DeltaStats::IncrDeleteCount(int64_t delete_count) {
   delete_count_ += delete_count;
 }
 
+void DeltaStats::IncrReinsertCount(int64_t reinsert_count) {
+  reinsert_count_ += reinsert_count;
+}
+
 Status DeltaStats::UpdateStats(const Timestamp& timestamp,
                                const RowChangeList& update) {
-  // Decode the update, incrementing the update count for each of the
+  // Decode the mutation incrementing the update count for each of the
   // columns we find present.
-  RowChangeListDecoder update_decoder(update);
-  RETURN_NOT_OK(update_decoder.Init());
-  if (PREDICT_FALSE(update_decoder.is_delete())) {
-    IncrDeleteCount(1);
-  } else if (PREDICT_TRUE(update_decoder.is_update())) {
-    vector<ColumnId> col_ids;
-    RETURN_NOT_OK(update_decoder.GetIncludedColumnIds(&col_ids));
-    for (ColumnId col_id : col_ids) {
-      IncrUpdateCount(col_id, 1);
+  RowChangeListDecoder decoder(update);
+  RETURN_NOT_OK(decoder.Init());
+  switch (decoder.get_type()) {
+    case RowChangeList::kReinsert: {
+      IncrReinsertCount(1);
+    } // FALLTHROUGH INTENDED. REINSERTs contain column updates so we need to account
+      // for those in the updated column stats.
+    case RowChangeList::kUpdate: {
+      vector<ColumnId> col_ids;
+      RETURN_NOT_OK(decoder.GetIncludedColumnIds(&col_ids));
+      for (const ColumnId& col_id : col_ids) {
+        IncrUpdateCount(col_id, 1);
+      }
+      break;
+    }
+    case RowChangeList::kDelete: {
+      IncrDeleteCount(1);
+      break;
     }
-  } // Don't handle re-inserts
+    default: LOG(FATAL) << "Invalid mutation type: " << decoder.get_type();
+  }
 
   if (min_timestamp_.CompareTo(timestamp) > 0) {
     min_timestamp_ = timestamp;
@@ -76,6 +93,8 @@ string DeltaStats::ToString() const {
       "ts range=[$0, $1]",
       min_timestamp_.ToString(),
       max_timestamp_.ToString());
+  ret.append(Substitute(", delete_count=[$0]", delete_count_));
+  ret.append(Substitute(", reinsert_count=[$0]", reinsert_count_));
   ret.append(", update_counts_by_col_id=[");
   ret.append(JoinKeysAndValuesIterator(update_counts_by_col_id_.begin(),
                                        update_counts_by_col_id_.end(),
@@ -88,8 +107,8 @@ string DeltaStats::ToString() const {
 void DeltaStats::ToPB(DeltaStatsPB* pb) const {
   pb->Clear();
   pb->set_delete_count(delete_count_);
-  typedef std::pair<ColumnId, int64_t> entry;
-  for (const entry& e : update_counts_by_col_id_) {
+  pb->set_reinsert_count(reinsert_count_);
+  for (const auto& e : update_counts_by_col_id_) {
     DeltaStatsPB::ColumnStats* stats = pb->add_column_stats();
     stats->set_col_id(e.first);
     stats->set_update_count(e.second);
@@ -101,8 +120,9 @@ void DeltaStats::ToPB(DeltaStatsPB* pb) const {
 
 Status DeltaStats::InitFromPB(const DeltaStatsPB& pb) {
   delete_count_ = pb.delete_count();
+  reinsert_count_ = pb.reinsert_count();
   update_counts_by_col_id_.clear();
-  for (const DeltaStatsPB::ColumnStats stats : pb.column_stats()) {
+  for (const DeltaStatsPB::ColumnStats& stats : pb.column_stats()) {
     IncrUpdateCount(ColumnId(stats.col_id()), stats.update_count());
   }
   max_timestamp_.FromUint64(pb.max_timestamp());

http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/delta_stats.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_stats.h b/src/kudu/tablet/delta_stats.h
index d10f4b2..a5213c0 100644
--- a/src/kudu/tablet/delta_stats.h
+++ b/src/kudu/tablet/delta_stats.h
@@ -47,6 +47,9 @@ class DeltaStats {
   // Increment the per-store delete count by 'delete_count'.
   void IncrDeleteCount(int64_t delete_count);
 
+  // Increment the per-store reinsert count by 'reinsert_count'.
+  void IncrReinsertCount(int64_t reinsert_count);
+
   // Increment delete and update counts based on changes contained in
   // 'update'.
   Status UpdateStats(const Timestamp& timestamp,
@@ -55,8 +58,11 @@ class DeltaStats {
   // Return the number of deletes in the current delta store.
   int64_t delete_count() const { return delete_count_; }
 
+  // Return the number of reinserts in the current delta store.
+  int64_t reinsert_count() const { return reinsert_count_; }
+
   // Returns number of updates for a given column.
-  int64_t update_count_for_col_id(ColumnId col_id) const {
+  int64_t update_count_for_col_id(const ColumnId& col_id) const {
     return FindWithDefault(update_counts_by_col_id_, col_id, 0);
   }
 
@@ -95,6 +101,7 @@ class DeltaStats {
  private:
   std::unordered_map<ColumnId, int64_t> update_counts_by_col_id_;
   uint64_t delete_count_;
+  uint64_t reinsert_count_;
   Timestamp max_timestamp_;
   Timestamp min_timestamp_;
 };

http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/tablet.proto
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.proto b/src/kudu/tablet/tablet.proto
index 468add3..975c3be 100644
--- a/src/kudu/tablet/tablet.proto
+++ b/src/kudu/tablet/tablet.proto
@@ -64,6 +64,10 @@ message DeltaStatsPB {
   // Number of deletes (deletes result in deletion of an entire row)
   required int64 delete_count = 1;
 
+  // Number of reinserts.
+  // Optional for data format compatibility.
+  optional int64 reinsert_count = 6 [ default = 0];
+
   // REMOVED: replaced by column_stats, which maps by column ID,
   // whereas this older version mapped by index.
   // repeated int64 per_column_update_count = 2;


Mime
View raw message