kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject [kudu] 01/03: [backup] Add a basic CLI tool
Date Wed, 22 May 2019 18:31:11 GMT
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8776b742d8c2cbabaaab280b8654ecf3020b14a9
Author: Will Berkeley <wdberkeley@gmail.com>
AuthorDate: Wed May 15 16:32:59 2019 -0700

    [backup] Add a basic CLI tool
    
    This adds a CLI tool. For a set of tables, it can
    - Print out the latest backup
    - Print out the sequence of backups that would be used by a restore job
    - Print out all backups, ordered by end time
    
    An example invocation looks like
    
    $ java -cp <elided> org.apache.kudu.backup.KuduBackupCLI \
      --rootPath=hdfs:///kudu-backups \
      list_restore_sequence \
      default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e foo
     table name                                            | table id                    
    | end time                     | start timestamp | end timestamp | type
    -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+-------------
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba
| 2019-05-13T09:38:54.642-0700 | 0               | 1557765534642 | full
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba
| 2019-05-13T10:33:23.669-0700 | 1557765534642   | 1557768803669 | incremental
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba
| 2019-05-13T11:07:25.358-0700 | 1557768803669   | 1557770845358 | incremental
    
     table name                                            | table id                    
    | end time                     | start timestamp | end timestamp | type
    -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+------
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | 1e0ab43cb925473fbc5acf86db12eb56
| 2019-05-13T14:09:01.518-0700 | 0               | 1557781741518 | full
    
    No backups were found for 1 table(s):
    foo
    
    Notice how, due to table renames, more than one table can match a table
    name, and, of course, there may be no table for a name.
    
    Another example:
    
    $ java -cp <elided> org.apache.kudu.backup.KuduBackupCLI \
      --rootPath=hdfs:///kudu-backup-tests-again \
      list_latest \
      default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e
     table name                                            | table id                    
    | end time                     | start timestamp | end timestamp | type
    -------------------------------------------------------+----------------------------------+------------------------------+-----------------+---------------+-------------
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | b46191d75c8d46bab6dc76ba0bd345ba
| 2019-05-13T11:07:25.358-0700 | 1557768803669   | 1557770845358 | incremental
     default.loadgen_auto_69c4838542ea481d81e03e8f66f7731e | 1e0ab43cb925473fbc5acf86db12eb56
| 2019-05-13T14:09:01.518-0700 | 0               | 1557781741518 | full
    
    The tool also supports TSV and CSV output, and listing out information
    about all backups by omitting an explicit list of tables.
    
    Change-Id: Ib5a4d3b44c77a06fa1b5d2b09506ba5a6e1b52c1
    Reviewed-on: http://gerrit.cloudera.org:8080/13356
    Reviewed-by: Mike Percy <mpercy@apache.org>
    Tested-by: Will Berkeley <wdberkeley@gmail.com>
---
 .../scala/org/apache/kudu/backup/BackupGraph.scala |   7 +
 .../scala/org/apache/kudu/backup/BackupIO.scala    |  15 +-
 .../org/apache/kudu/backup/KuduBackupCLI.scala     | 193 ++++++++++++++++++
 .../org/apache/kudu/backup/TestKuduBackupCLI.scala | 219 +++++++++++++++++++++
 4 files changed, 430 insertions(+), 4 deletions(-)

diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala
b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala
index 13bb9e3..be6207f 100644
--- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala
+++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupGraph.scala
@@ -64,6 +64,13 @@ class BackupGraph(val tableId: String) {
   }
 
   /**
+   * @return all the backups in the graph.
+   */
+  def allBackups: Seq[BackupNode] = {
+    adjacencyList.values.flatten.toSeq
+  }
+
+  /**
    * @return the most recent full backup.
    * @throws IllegalStateException if no full backup exists.
    */
diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala
index 43a359d..1cf3140 100644
--- a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala
+++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/BackupIO.scala
@@ -68,9 +68,9 @@ class BackupIO(val conf: Configuration, rootPathStr: String) {
   /**
    * Return the path to the table directory.
    */
-  def tablePath(table: KuduTable): Path = {
-    val tableName = URLEncoder.encode(table.getName, "UTF-8")
-    val dirName = s"${table.getTableId}-$tableName"
+  def tablePath(tableId: String, tableName: String): Path = {
+    val encodedTableName = URLEncoder.encode(tableName, "UTF-8")
+    val dirName = s"$tableId-$encodedTableName"
     new Path(rootPath, dirName)
   }
 
@@ -78,7 +78,7 @@ class BackupIO(val conf: Configuration, rootPathStr: String) {
    * Return the backup path for a table and time.
    */
   def backupPath(table: KuduTable, timestampMs: Long): Path = {
-    new Path(tablePath(table), timestampMs.toString)
+    new Path(tablePath(table.getTableId, table.getName), timestampMs.toString)
   }
 
   /**
@@ -101,6 +101,13 @@ class BackupIO(val conf: Configuration, rootPathStr: String) {
   }
 
   /**
+   * Reads all of the backup graphs.
+   */
+  def readAllBackupGraphs(): Seq[BackupGraph] = {
+    buildBackupGraphs(listAllTableDirs(), System.currentTimeMillis())
+  }
+
+  /**
    * Reads all of the backup graphs for a given list of table names and a time filter.
    */
   def readBackupGraphsByTableName(
diff --git a/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala
b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala
new file mode 100644
index 0000000..c77abc3
--- /dev/null
+++ b/java/kudu-backup-tools/src/main/scala/org/apache/kudu/backup/KuduBackupCLI.scala
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.kudu.backup
+
+import java.text.SimpleDateFormat
+
+import org.apache.hadoop.conf.Configuration
+import scopt.OptionParser
+
+// The possible backup CLI tool actions.
+object Action extends Enumeration {
+  val LIST_LATEST, LIST_RESTORE_SEQUENCE, LIST_ALL = Value
+}
+
+// The possible backup CLI print formats.
+object Format extends Enumeration {
+  val PRETTY, TSV, CSV = Value
+}
+
+case class BackupCLIOptions(
+    action: Action.Value,
+    format: Format.Value,
+    tables: Seq[String],
+    rootPath: String)
+
+object BackupCLIOptions {
+
+  val ProgramName: String =
+    KuduBackupCLI.getClass.getCanonicalName.dropRight(1) // Remove trailing `$`
+
+  val parser: OptionParser[BackupCLIOptions] =
+    new OptionParser[BackupCLIOptions](ProgramName) {
+      opt[String]("rootPath")
+        .action((v, o) => o.copy(rootPath = v))
+        .text("The root path to search for backups. Accepts any Hadoop compatible path.")
+        .required()
+
+      arg[String]("format")
+        .validate(validateEnumeratedOption("format", Format.values.map(_.toString.toLowerCase)))
+        .action((v, o) => o.copy(format = Format.withName(v.toUpperCase)))
+        .text("The output format. One of 'pretty', 'tsv', 'csv'.")
+        .optional()
+
+      arg[String]("<action>")
+        .validate(validateEnumeratedOption("action", Action.values.map(_.toString.toLowerCase)))
+        .action((v, o) => o.copy(action = Action.withName(v.toUpperCase)))
+        .text("The action to perform. One of 'list_latest', 'list_restore_sequence', 'list_all'.")
+
+      arg[String]("<table>...")
+        .unbounded()
+        .action((v, o) => o.copy(tables = o.tables :+ v))
+        .text("A list of tables about which to print backup information. Specifying no tables
includes all tables.")
+        .optional()
+
+      help("help").text("Prints this usage text")
+    }
+
+  def validateEnumeratedOption(
+      name: String,
+      optionStrings: Iterable[String]): String => Either[String, Unit] =
+    (v: String) => {
+      if (optionStrings.exists(_.equalsIgnoreCase(v))) {
+        Right(())
+      } else {
+        Left(s"$name must be one of ${optionStrings.mkString(", ")}: $v")
+      }
+    }
+
+  def parse(args: Seq[String]): Option[BackupCLIOptions] = {
+    parser.parse(args, BackupCLIOptions(null, Format.PRETTY, Seq(), null))
+  }
+}
+
+object KuduBackupCLI {
+
+  // The header for all tables printed by the tool.
+  val HEADER: Seq[String] =
+    Seq("table name", "table id", "end time", "start timestamp", "end timestamp", "type")
+
+  // Run the backup CLI tool with the given options. Like a command, returns 0 if successful,
or
+  // a nonzero error code.
+  def run(options: BackupCLIOptions): Int = {
+    // Sort by table name for a consistent ordering (at least if there's no duplicate names).
+    val sortedTables = options.tables.sorted
+
+    val io: BackupIO = new BackupIO(new Configuration(), options.rootPath)
+    val backupGraphs =
+      if (sortedTables.isEmpty)
+        io.readAllBackupGraphs()
+      else
+        io.readBackupGraphsByTableName(sortedTables)
+
+    options.action match {
+      case Action.LIST_LATEST => {
+        val rows = backupGraphs.map(graph => rowForBackupNode(graph.restorePath.lastBackup))
+        printTable(options.format, rows)
+      }
+      case Action.LIST_RESTORE_SEQUENCE => {
+        val tablesOfBackups =
+          backupGraphs.map(_.restorePath.backups.map(node => rowForBackupNode(node)))
+        tablesOfBackups.foreach(table => printTable(options.format, table))
+      }
+      case Action.LIST_ALL => {
+        val tablesOfBackups = backupGraphs.map(
+          _.allBackups.sortBy(node => node.metadata.getToMs).map(node => rowForBackupNode(node)))
+        tablesOfBackups.foreach(table => printTable(options.format, table))
+      }
+    }
+    // Because of renames, one table name might map to multiple backup directories, so it's
not
+    // sufficient to check the size of 'options.tables' against the size of 'backupGraphs'.
+    val foundTables = backupGraphs.map(graph => graph.backupBase.metadata.getTableName).toSet
+    val notFoundTables = options.tables.filter(table => !foundTables.contains(table))
+    if (notFoundTables.nonEmpty) {
+      Console.err.println(s"No backups were found for ${notFoundTables.size} table(s):")
+      notFoundTables.foreach(Console.err.println)
+      return 1
+    }
+    0
+  }
+
+  private def rowForBackupNode(backup: BackupNode): Seq[String] = {
+    val metadata = backup.metadata
+    val tableName = metadata.getTableName
+    val tableId = metadata.getTableId
+    val fromMs = metadata.getFromMs
+    val toMs = metadata.getToMs
+    val toDatetime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs)
+    val backupType = if (fromMs == 0) "full" else "incremental"
+    Seq(tableName, tableId, toDatetime, s"$fromMs", s"$toMs", backupType)
+  }
+
+  private def formatDsv(delimiter: String, table: Seq[Seq[String]]): String = {
+    table.map(_.mkString(delimiter)).mkString("\n")
+  }
+
+  private def formatPrettyTable(table: Seq[Seq[String]]): String = {
+    if (table.isEmpty) {
+      return ""
+    }
+    // The width of a column is the width of largest cell, plus a padding of 2.
+    val colWidths = table.transpose.map(_.map(_.length).max + 2)
+    val rows = table.map { row =>
+      (row, colWidths).zipped
+        .map {
+          // 1 space on left, then pad to (padding - 1) spaces.
+          case (cell, width) => s" %-${width - 1}s".format(cell)
+        }
+        .mkString("|")
+    }
+    val separatorRow = colWidths.map("-" * _).mkString("+")
+    (rows.head +: separatorRow +: rows.tail).mkString("\n")
+  }
+
+  private def printTable(format: Format.Value, rows: Seq[Seq[String]]): Unit = {
+    if (rows.isEmpty) {
+      return
+    }
+    val table = HEADER +: rows
+    format match {
+      case Format.PRETTY => {
+        println(formatPrettyTable(table))
+      }
+      case Format.TSV => {
+        println(formatDsv("\t", table))
+      }
+      case Format.CSV => {
+        println(formatDsv(",", table))
+      }
+    }
+    println() // Spacing after the table.
+  }
+
+  def main(args: Array[String]): Unit = {
+    val options = BackupCLIOptions
+      .parse(args)
+      .getOrElse(throw new IllegalArgumentException("could not parse the arguments"))
+    System.exit(run(options))
+  }
+}
diff --git a/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala
b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala
new file mode 100644
index 0000000..8302c8c
--- /dev/null
+++ b/java/kudu-backup-tools/src/test/scala/org/apache/kudu/backup/TestKuduBackupCLI.scala
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+package org.apache.kudu.backup
+
+import java.io.ByteArrayOutputStream
+import java.io.PrintStream
+import java.nio.file.Files
+import java.nio.file.Path
+import java.text.SimpleDateFormat
+
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.{Path => HPath}
+import org.apache.hadoop.conf.Configuration
+import org.junit.After
+import org.junit.Assert._
+import org.junit.Before
+import org.junit.Test
+import org.slf4j.Logger
+import org.slf4j.LoggerFactory
+
+import org.apache.kudu.backup.Backup.TableMetadataPB
+import org.apache.kudu.backup.TableMetadata.MetadataVersion
+
+class TestKuduBackupCLI {
+  val log: Logger = LoggerFactory.getLogger(getClass)
+
+  var rootDir: Path = _
+
+  @Before
+  def setUp(): Unit = {
+    rootDir = Files.createTempDirectory("backupcli")
+  }
+
+  @After
+  def tearDown(): Unit = {
+    FileUtils.deleteDirectory(rootDir.toFile)
+  }
+
+  // Create dummy table metadata and write it to the test directory.
+  private def createTableMetadata(
+      io: BackupIO,
+      tableName: String,
+      fromMs: Long,
+      toMs: Long): Unit = {
+    // Create dummy table metadata with just enough information to be used to create a BackupGraph.
+    val tableId = s"id_$tableName"
+    val metadata = TableMetadataPB
+      .newBuilder()
+      .setVersion(MetadataVersion)
+      .setFromMs(fromMs)
+      .setToMs(toMs)
+      .setTableName(tableName)
+      .setTableId(tableId)
+      .build()
+    val backupPath = new HPath(io.tablePath(tableId, tableName), s"$toMs")
+    val metadataPath = io.backupMetadataPath(backupPath)
+    io.writeTableMetadata(metadata, metadataPath)
+  }
+
+  // Helper to write a standard collection of backup metadata useful for a few tests.
+  private def createStandardTableMetadata(io: BackupIO): Unit = {
+    Seq(
+      // Two fulls and one incremental for 'taco' table.
+      ("taco", 0, 100),
+      ("taco", 0, 1000),
+      ("taco", 100, 2000),
+      // One full and two incrementals for 'pizza' table.
+      ("pizza", 0, 200),
+      ("pizza", 200, 400),
+      ("pizza", 400, 600)
+    ).foreach {
+      case (tableName: String, fromMs: Int, toMs: Int) =>
+        createTableMetadata(io, tableName, fromMs, toMs)
+    }
+  }
+
+  // Helper to format the end time column, since its value depends on the timezone of the
machine
+  // where the tool is run.
+  private def endTime(toMs: Long): String = {
+    new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ").format(toMs)
+  }
+
+  @Test
+  def testListAllBackups(): Unit = {
+    val io = new BackupIO(new Configuration(), rootDir.toUri.toString)
+    createStandardTableMetadata(io)
+
+    val options =
+      BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq(), rootDir.toUri.toString)
+    val stdout = new ByteArrayOutputStream
+    Console.withOut(new PrintStream(stdout)) {
+      assertEquals(0, KuduBackupCLI.run(options))
+    }
+
+    val headerString = KuduBackupCLI.HEADER.mkString(",")
+    val expected = Seq(
+      headerString,
+      s"pizza,id_pizza,${endTime(200)},0,200,full",
+      s"pizza,id_pizza,${endTime(400)},200,400,incremental",
+      s"pizza,id_pizza,${endTime(600)},400,600,incremental",
+      "",
+      headerString,
+      s"taco,id_taco,${endTime(100)},0,100,full",
+      s"taco,id_taco,${endTime(1000)},0,1000,full",
+      s"taco,id_taco,${endTime(2000)},100,2000,incremental"
+    ).mkString("\n")
+    assertEquals(expected, stdout.toString.trim)
+  }
+
+  @Test
+  def testListLatestBackups(): Unit = {
+    val io = new BackupIO(new Configuration(), rootDir.toUri.toString)
+    createStandardTableMetadata(io)
+
+    val options =
+      BackupCLIOptions(Action.LIST_LATEST, Format.CSV, Seq(), rootDir.toUri.toString)
+    val stdout = new ByteArrayOutputStream
+    Console.withOut(new PrintStream(stdout)) {
+      assertEquals(0, KuduBackupCLI.run(options))
+    }
+
+    val headerString = KuduBackupCLI.HEADER.mkString(",")
+    val expected = Seq(
+      headerString,
+      s"pizza,id_pizza,${endTime(600)},400,600,incremental",
+      s"taco,id_taco,${endTime(2000)},100,2000,incremental"
+    ).mkString("\n")
+    assertEquals(expected, stdout.toString.trim)
+  }
+
+  @Test
+  def testListRestorePath(): Unit = {
+    val io = new BackupIO(new Configuration(), rootDir.toUri.toString)
+    createStandardTableMetadata(io)
+
+    val options =
+      BackupCLIOptions(Action.LIST_RESTORE_SEQUENCE, Format.CSV, Seq(), rootDir.toUri.toString)
+    val stdout = new ByteArrayOutputStream
+    Console.withOut(new PrintStream(stdout)) {
+      assertEquals(0, KuduBackupCLI.run(options))
+    }
+
+    val headerString = KuduBackupCLI.HEADER.mkString(",")
+    val expected = Seq(
+      headerString,
+      s"pizza,id_pizza,${endTime(200)},0,200,full",
+      s"pizza,id_pizza,${endTime(400)},200,400,incremental",
+      s"pizza,id_pizza,${endTime(600)},400,600,incremental",
+      "",
+      headerString,
+      s"taco,id_taco,${endTime(100)},0,100,full",
+      s"taco,id_taco,${endTime(2000)},100,2000,incremental"
+    ).mkString("\n")
+    assertEquals(expected, stdout.toString.trim)
+  }
+
+  @Test
+  def testTableFilter(): Unit = {
+    val io = new BackupIO(new Configuration(), rootDir.toUri.toString)
+    createStandardTableMetadata(io)
+
+    val options =
+      BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq("taco"), rootDir.toUri.toString)
+    val stdout = new ByteArrayOutputStream
+    Console.withOut(new PrintStream(stdout)) {
+      assertEquals(0, KuduBackupCLI.run(options))
+    }
+
+    val headerString = KuduBackupCLI.HEADER.mkString(",")
+    val expected = Seq(
+      headerString,
+      s"taco,id_taco,${endTime(100)},0,100,full",
+      s"taco,id_taco,${endTime(1000)},0,1000,full",
+      s"taco,id_taco,${endTime(2000)},100,2000,incremental"
+    ).mkString("\n")
+    assertEquals(expected, stdout.toString.trim)
+  }
+
+  @Test
+  def testMissingTable(): Unit = {
+    val io = new BackupIO(new Configuration(), rootDir.toUri.toString)
+    createStandardTableMetadata(io)
+
+    val options =
+      BackupCLIOptions(Action.LIST_ALL, Format.CSV, Seq("pizza", "nope"), rootDir.toUri.toString)
+    val stdout = new ByteArrayOutputStream
+    val stderr = new ByteArrayOutputStream
+    Console.withOut(new PrintStream(stdout)) {
+      Console.withErr(new PrintStream(stderr)) {
+        assertEquals(1, KuduBackupCLI.run(options))
+      }
+    }
+
+    val headerString = KuduBackupCLI.HEADER.mkString(",")
+    val expected = Seq(
+      headerString,
+      s"pizza,id_pizza,${endTime(200)},0,200,full",
+      s"pizza,id_pizza,${endTime(400)},200,400,incremental",
+      s"pizza,id_pizza,${endTime(600)},400,600,incremental"
+    ).mkString("\n")
+    assertEquals(expected, stdout.toString.trim)
+
+    assertEquals("No backups were found for 1 table(s):\nnope", stderr.toString.trim)
+  }
+}


Mime
View raw message