kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From granthe...@apache.org
Subject [kudu] 01/04: [backup] Add more metadata fields
Date Thu, 02 May 2019 12:45:52 GMT
This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 889086d44d72e6157e08e7089af6e946af5e6955
Author: Grant Henke <granthenke@apache.org>
AuthorDate: Fri Apr 26 11:59:26 2019 -0500

    [backup] Add more metadata fields
    
    This patch adds more metadata fields to the metadata
    file for each backup.
    
    The following was added:
    - table_id: Can be used in the future to handle dropped
    or renamed tables gracefully.
    - comment: Used to backup and restore column comments.
    - column_ids: Can be used in the future to handle
    dropped or renamed columns gracefully.
    
    Change-Id: I42458f598a523596acb9f18558e6f518719a969b
    Reviewed-on: http://gerrit.cloudera.org:8080/13130
    Tested-by: Grant Henke <granthenke@apache.org>
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Reviewed-by: Mike Percy <mpercy@apache.org>
---
 java/kudu-backup/src/main/protobuf/backup.proto     | 14 ++++++++++----
 .../org/apache/kudu/backup/TableMetadata.scala      |  8 ++++++++
 .../org/apache/kudu/backup/TestKuduBackup.scala     |  1 +
 .../src/main/java/org/apache/kudu/Schema.java       | 21 +++++++++++++++++++++
 .../java/org/apache/kudu/util/SchemaGenerator.java  |  3 ++-
 5 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/java/kudu-backup/src/main/protobuf/backup.proto b/java/kudu-backup/src/main/protobuf/backup.proto
index da2c794..5971e02 100644
--- a/java/kudu-backup/src/main/protobuf/backup.proto
+++ b/java/kudu-backup/src/main/protobuf/backup.proto
@@ -35,7 +35,6 @@ message ColumnTypeAttributesMetadataPB {
 
 // Maps to the ColumnSchema class.
 // The fields are effectively 1 to 1 mappings of those in ColumnSchema.
-// TODO (KUDU-2788): How do we handle column additions?
 message ColumnMetadataPB {
   string name = 1;
   string type = 2;
@@ -48,6 +47,7 @@ message ColumnMetadataPB {
   string encoding = 7;
   string compression = 8;
   int32 block_size = 9;
+  string comment = 10;
 }
 
 // A human readable string representation of a column value for use
@@ -110,10 +110,16 @@ message TableMetadataPB {
   string data_format = 4;
   // The name of the table.
   string table_name = 5;
+  // The internal id of the table.
+  // This is useful for detecting dropped and added tables.
+  string table_id = 6;
   // The replication factor of this table.
-  int32 num_replicas = 6;
+  int32 num_replicas = 7;
   // The metadata for the table's columns.
-  repeated ColumnMetadataPB columns = 7;
+  repeated ColumnMetadataPB columns = 8;
+  // A map of column name to internal column id.
+  // This is useful for detecting dropped and added columns.
+  map<string, int32> column_ids = 9;
   // The metadata for the table's partitions.
-  PartitionMetadataPB partitions = 8;
+  PartitionMetadataPB partitions = 10;
 }
\ No newline at end of file
diff --git a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala
index 0d2c315..f09e3d2 100644
--- a/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala
+++ b/java/kudu-backup/src/main/scala/org/apache/kudu/backup/TableMetadata.scala
@@ -17,6 +17,7 @@
 package org.apache.kudu.backup
 
 import java.math.BigDecimal
+import java.util
 
 import com.google.protobuf.StringValue
 import org.apache.commons.net.util.Base64
@@ -35,6 +36,7 @@ import org.apache.yetus.audience.InterfaceAudience
 import org.apache.yetus.audience.InterfaceStability
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -44,7 +46,9 @@ object TableMetadata {
   val MetadataVersion = 1
 
   def getTableMetadata(table: KuduTable, options: BackupOptions): TableMetadataPB = {
+    val columnIds = new util.HashMap[String, Integer]()
     val columns = table.getSchema.getColumns.asScala.map { col =>
+      columnIds.put(col.getName, table.getSchema.getColumnId(col.getName))
       val builder = ColumnMetadataPB
         .newBuilder()
         .setName(col.getName)
@@ -54,6 +58,7 @@ object TableMetadata {
         .setEncoding(col.getEncoding.toString)
         .setCompression(col.getCompressionAlgorithm.toString)
         .setBlockSize(col.getDesiredBlockSize)
+        .setComment(col.getComment)
       if (col.getTypeAttributes != null) {
         builder.setTypeAttributes(getTypeAttributesMetadata(col))
       }
@@ -70,7 +75,9 @@ object TableMetadata {
       .setToMs(options.toMs)
       .setDataFormat(options.format)
       .setTableName(table.getName)
+      .setTableId(table.getTableId)
       .addAllColumns(columns.asJava)
+      .putAllColumnIds(columnIds)
       .setNumReplicas(table.getNumReplicas)
       .setPartitions(getPartitionMetadata(table))
       .build()
@@ -174,6 +181,7 @@ object TableMetadata {
         .encoding(Encoding.valueOf(col.getEncoding))
         .compressionAlgorithm(CompressionAlgorithm.valueOf(col.getCompression))
         .desiredBlockSize(col.getBlockSize)
+        .comment(col.getComment)
 
       if (col.hasDefaultValue) {
         val value = valueFromString(col.getDefaultValue.getValue, colType)
diff --git a/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala b/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala
index 239194f..7533251 100644
--- a/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala
+++ b/java/kudu-backup/src/test/scala/org/apache/kudu/backup/TestKuduBackup.scala
@@ -425,6 +425,7 @@ class TestKuduBackup extends KuduTestSuite {
     Objects
       .equal(before.getCompressionAlgorithm, after.getCompressionAlgorithm) &&
     Objects.equal(before.getTypeAttributes, after.getTypeAttributes)
+    Objects.equal(before.getComment, after.getComment)
   }
 
   // Special handling because default values can be a byte array which is not
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/Schema.java b/java/kudu-client/src/main/java/org/apache/kudu/Schema.java
index 0be52aa..630a68d 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/Schema.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/Schema.java
@@ -60,6 +60,11 @@ public class Schema {
   private final Map<Integer, Integer> columnsById;
 
   /**
+   * Mapping of column name to column ID, or null if the schema does not have assigned column
IDs.
+   */
+  private final Map<String, Integer> columnIdByName;
+
+  /**
    * Mapping of column index to backing byte array offset.
    */
   private final int[] columnOffsets;
@@ -105,6 +110,7 @@ public class Schema {
     this.columnOffsets = new int[columns.size()];
     this.columnsByName = new HashMap<>(columns.size());
     this.columnsById = hasColumnIds ? new HashMap<Integer, Integer>(columnIds.size())
: null;
+    this.columnIdByName = hasColumnIds ? new HashMap<String, Integer>(columnIds.size())
: null;
     int offset = 0;
     boolean hasNulls = false;
     int isDeletedIndex = NO_IS_DELETED_INDEX;
@@ -131,6 +137,10 @@ public class Schema {
           throw new IllegalArgumentException(
               String.format("Column IDs must be unique: %s", columnIds));
         }
+        if (this.columnIdByName.put(column.getName(), columnIds.get(index)) != null) {
+          throw new IllegalArgumentException(
+              String.format("Column names must be unique: %s", columnIds));
+        }
       }
 
       // If this is the IS_DELETED virtual column, set `hasIsDeleted` and `isDeletedIndex`.
@@ -308,6 +318,17 @@ public class Schema {
   }
 
   /**
+   * Get the internal column ID for a column name.
+   * @param columnName column's name
+   * @return the column ID
+   */
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
+  public int getColumnId(String columnName) {
+    return columnIdByName.get(columnName);
+  }
+
+  /**
    * Creates a new partial row for the schema.
    * @return a new partial row
    */
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java b/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java
index 6ac21ec..bb5c13d 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/util/SchemaGenerator.java
@@ -113,7 +113,8 @@ public class SchemaGenerator {
         .nullable(random.nextBoolean() && !key)
         .compressionAlgorithm(randomCompression())
         .desiredBlockSize(randomBlockSize())
-        .encoding(randomEncoding(type));
+        .encoding(randomEncoding(type))
+        .comment("A " + type.getName() + " column for " + name);
 
     ColumnTypeAttributes typeAttributes = null;
     if (type == Type.DECIMAL) {


Mime
View raw message