hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (Jira)" <j...@apache.org>
Subject [jira] [Work logged] (HIVE-22869) Add locking benchmark to metastore-tools/metastore-benchmarks
Date Thu, 11 Jun 2020 15:45:00 GMT

     [ https://issues.apache.org/jira/browse/HIVE-22869?focusedWorklogId=444355&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-444355
]

ASF GitHub Bot logged work on HIVE-22869:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Jun/20 15:44
            Start Date: 11/Jun/20 15:44
    Worklog Time Spent: 10m 
      Work Description: deniskuzZ commented on a change in pull request #1073:
URL: https://github.com/apache/hive/pull/1073#discussion_r438868981



##########
File path: standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/ACIDBenchmarks.java
##########
@@ -0,0 +1,247 @@
+package org.apache.hadoop.hive.metastore.tools;
+
+import org.apache.hadoop.hive.metastore.api.DataOperationType;
+import org.apache.hadoop.hive.metastore.api.LockComponent;
+import org.apache.hadoop.hive.metastore.api.LockRequest;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.core.LoggerContext;
+import org.apache.logging.log4j.core.config.Configuration;
+import org.apache.thrift.TException;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.hadoop.hive.metastore.tools.BenchmarkUtils.createManyTables;
+import static org.apache.hadoop.hive.metastore.tools.BenchmarkUtils.dropManyTables;
+import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper;
+
+public class ACIDBenchmarks {
+
+  private static final Logger LOG = LoggerFactory.getLogger(CoreContext.class);
+
+  @State(Scope.Benchmark)
+  public static class CoreContext {
+    @Param("1")
+    protected int howMany;
+
+    @State(Scope.Thread)
+    public static class ThreadState {
+      HMSClient client;
+
+      @Setup
+      public void doSetup() throws Exception {
+        LOG.debug("Creating client");
+        client = HMSConfig.getInstance().newClient();
+      }
+
+      @TearDown
+      public void doTearDown() throws Exception {
+        client.close();
+        LOG.debug("Closed a connection to metastore.");
+      }
+    }
+
+    @Setup
+    public void setup() {
+      LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
+      Configuration ctxConfig = ctx.getConfiguration();
+      ctxConfig.getLoggerConfig(CoreContext.class.getName()).setLevel(Level.INFO);
+      ctx.updateLoggers(ctxConfig);
+    }
+  }
+
+  @State(Scope.Benchmark)
+  public static class TestOpenTxn extends CoreContext {
+
+    @State(Scope.Thread)
+    public static class ThreadState extends CoreContext.ThreadState {
+      List<Long> openTxns = new ArrayList<>();
+
+      @TearDown
+      public void doTearDown() throws Exception {
+        client.abortTxns(openTxns);
+        LOG.debug("aborted all opened txns");
+      }
+
+      void addTxn(List<Long> openTxn) {
+        openTxns.addAll(openTxn);
+      }
+    }
+
+    @Benchmark
+    public void openTxn(TestOpenTxn.ThreadState state) throws TException {
+      state.addTxn(state.client.openTxn(howMany));
+      LOG.debug("opened txns, count=", howMany);
+    }
+  }
+
+  @State(Scope.Benchmark)
+  public static class TestLocking extends CoreContext {
+    private int nTables;
+
+    @Param("0")
+    private int nPartitions;
+
+    private List<LockComponent> lockComponents;
+
+    @Setup
+    public void setup() {
+      this.nTables = (nPartitions != 0) ? howMany / nPartitions : howMany;
+      createLockComponents();
+    }
+
+    @State(Scope.Thread)
+    public static class ThreadState extends CoreContext.ThreadState {
+      List<Long> openTxns = new ArrayList<>();
+      long txnId;
+
+      @Setup(org.openjdk.jmh.annotations.Level.Invocation)
+      public void iterSetup() {
+        txnId = executeOpenTxnAndGetTxnId(client);
+        LOG.debug("opened txn, id={}", txnId);
+        openTxns.add(txnId);
+      }
+
+      @TearDown
+      public void doTearDown() throws Exception {
+        client.abortTxns(openTxns);
+        if (BenchmarkUtils.checkTxnsCleaned(client, openTxns) == false) {
+          LOG.error("Something went wrong with the cleanup of txns");
+        }
+        LOG.debug("aborted all opened txns");
+      }
+    }
+
+    @Benchmark
+    public void lock(TestLocking.ThreadState state) {
+      LOG.debug("sending lock request");
+      executeLock(state.client, state.txnId, lockComponents);
+    }
+
+    private void createLockComponents() {
+      lockComponents = new ArrayList<>();
+
+      for (int i = 0; i < nTables; i++) {
+        for (int j = 0; j < nPartitions - (nPartitions > 1 ? 1 : 0); j++) {
+          lockComponents.add(
+            new Util.LockComponentBuilder()
+              .setDbName("default")
+              .setTableName(String.format("tmp_table_%d", i))
+              .setPartitionName("p_" + j)
+              .setShared()
+              .setOperationType(DataOperationType.SELECT)
+              .build());
+        }
+        if (nPartitions != 1) {
+          lockComponents.add(
+            new Util.LockComponentBuilder()
+              .setDbName("default")
+              .setTableName(String.format("tmp_table_%d", i))
+              .setShared()
+              .setOperationType(DataOperationType.SELECT)
+              .build());
+        }
+      }
+    }
+
+    private static long executeOpenTxnAndGetTxnId(HMSClient client) {
+      return throwingSupplierWrapper(() -> client.openTxn(1).get(0));
+    }
+
+    private void executeLock(HMSClient client, long txnId, List<LockComponent> lockComponents)
{
+      LockRequest req = new LockRequest(lockComponents, "hclient", "localhost");
+      req.setTxnid(txnId);
+      throwingSupplierWrapper(() -> client.lock(req));
+    }
+  }
+
+  @State(Scope.Benchmark)
+  public static class TestAllocateTableWriteIds extends CoreContext {
+    String dbName = "test_db";
+    String tblName = "tmp_table";
+
+    @State(Scope.Thread)
+    public static class ThreadState extends CoreContext.ThreadState {
+      List<Long> openTxns = new ArrayList<>();
+      long txnId;
+
+      @Setup
+      public void iterSetup() {

Review comment:
       rename to `doSetup` as it's global and not iteration scoped

##########
File path: standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkUtils.java
##########
@@ -0,0 +1,72 @@
+package org.apache.hadoop.hive.metastore.tools;
+
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.TxnInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.IntStream;
+
+import static org.apache.hadoop.hive.metastore.tools.Util.createSchema;
+import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper;
+
+public class BenchmarkUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(BenchmarkUtils.class);
+

Review comment:
       many new lines

##########
File path: standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkUtils.java
##########
@@ -0,0 +1,72 @@
+package org.apache.hadoop.hive.metastore.tools;
+
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.TxnInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.IntStream;
+
+import static org.apache.hadoop.hive.metastore.tools.Util.createSchema;
+import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper;
+
+public class BenchmarkUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(BenchmarkUtils.class);
+
+
+  static void createManyTables(HMSClient client, int howMany, String dbName, String format)
{
+    List<FieldSchema> columns = createSchema(new ArrayList<>(Arrays.asList("name",
"string")));

Review comment:
       Arrays.asList returns ArrayList, why to pass it into constructor of another ArrayList?

##########
File path: standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkUtils.java
##########
@@ -0,0 +1,72 @@
+package org.apache.hadoop.hive.metastore.tools;
+
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.TxnInfo;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.IntStream;
+
+import static org.apache.hadoop.hive.metastore.tools.Util.createSchema;
+import static org.apache.hadoop.hive.metastore.tools.Util.throwingSupplierWrapper;
+
+public class BenchmarkUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(BenchmarkUtils.class);
+
+
+  static void createManyTables(HMSClient client, int howMany, String dbName, String format)
{
+    List<FieldSchema> columns = createSchema(new ArrayList<>(Arrays.asList("name",
"string")));
+    List<FieldSchema> partitions = createSchema(new ArrayList<>(Arrays.asList("date",
"string")));
+    IntStream.range(0, howMany)
+        .forEach(i ->
+            throwingSupplierWrapper(() -> client.createTable(
+                new Util.TableBuilder(dbName, String.format(format, i))
+                    .withType(TableType.MANAGED_TABLE)
+                    .withColumns(columns)
+                    .withPartitionKeys(partitions)
+                    .build())));
+  }
+
+  static void dropManyTables(HMSClient client, int howMany, String dbName, String format)
{
+    IntStream.range(0, howMany)
+        .forEach(i ->
+            throwingSupplierWrapper(() -> client.dropTable(dbName, String.format(format,
i))));
+  }
+
+  // Create a simple table with a single column and single partition
+  static void createPartitionedTable(HMSClient client, String dbName, String tableName) {
+    throwingSupplierWrapper(() -> client.createTable(
+        new Util.TableBuilder(dbName, tableName)
+            .withType(TableType.MANAGED_TABLE)
+            .withColumns(createSchema(Collections.singletonList("name:string")))
+            .withPartitionKeys(createSchema(Collections.singletonList("date")))
+            .build()));
+  }
+
+  static boolean checkTxnsCleaned(HMSClient client, List<Long> txnsOpenedByBenchmark)
throws InterruptedException {
+    // let's wait the default cleaner run period
+    Thread.sleep(100000);
+    List<Long> notCleanedTxns = new ArrayList<>();
+    throwingSupplierWrapper(() -> {
+      List<TxnInfo> txnInfos = client.getOpenTxnsInfo();

Review comment:
       you can use txnInfos.stream().anyMatch(txnsOpenedByBenchmark::contains), change txnsOpenedByBenchmark
to Set

##########
File path: standalone-metastore/metastore-tools/tools-common/src/main/java/org/apache/hadoop/hive/metastore/tools/HMSClient.java
##########
@@ -345,21 +348,44 @@ boolean openTxn(int numTxns) throws TException {
     return openTxns;
   }
 
+  List<TxnInfo> getOpenTxnsInfo() throws TException {
+    return client.get_open_txns_info().getOpen_txns();
+  }
+
   boolean commitTxn(long txnId) throws TException {
     client.commit_txn(new CommitTxnRequest(txnId));
     return true;
   }
 
-  boolean abortTxn(long txnId) throws TException {
-    client.abort_txn(new AbortTxnRequest(txnId));
+  boolean abortTxns(List<Long> txnIds) throws TException {
+    client.abort_txns(new AbortTxnsRequest(txnIds));
     return true;
   }
 
-  boolean abortTxns(List<Long> txnIds) throws TException {
-    client.abort_txns(new AbortTxnsRequest(txnIds));
+  boolean allocateTableWriteIds(String dbName, String tableName, List<Long> openTxns)
throws TException {
+    AllocateTableWriteIdsRequest awiRqst = new AllocateTableWriteIdsRequest(dbName, tableName);
+    openTxns.forEach(t -> {
+      awiRqst.addToTxnIds(t);
+    });
+
+    client.allocate_table_write_ids(awiRqst);
     return true;
   }
 
+  boolean getValidWriteIds(List<String> fullTableNames) throws TException {

Review comment:
       I would expect this method to return list of validWriteIds, not just true. Should we
change the name?

##########
File path: standalone-metastore/metastore-tools/metastore-benchmarks/src/main/java/org/apache/hadoop/hive/metastore/tools/BenchmarkTool.java
##########
@@ -141,12 +175,62 @@ private static void saveDataFile(String location, String name,
     }
   }
 
-
   @Override
   public void run() {
-    LOG.info("Using warmup " + warmup +
-        " spin " + spinCount + " nparams " + nParameters + " threads " + nThreads);
+    LOG.info("Using warmup " + warmup + " spin " + spinCount + " nparams " + Arrays.toString(nParameters)
+ " threads "
+        + nThreads);
+    HMSConfig.getInstance().init(host, port, confDir);
+
+    if (runMode == RunModes.ALL) {

Review comment:
       I would use `switch` here and go with ALL by default




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 444355)
    Time Spent: 0.5h  (was: 20m)

> Add locking benchmark to metastore-tools/metastore-benchmarks
> -------------------------------------------------------------
>
>                 Key: HIVE-22869
>                 URL: https://issues.apache.org/jira/browse/HIVE-22869
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Zoltan Chovan
>            Assignee: Zoltan Chovan
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HIVE-22869.2.patch, HIVE-22869.3.patch, HIVE-22869.4.patch, HIVE-22869.5.patch,
HIVE-22869.6.patch, HIVE-22869.7.patch, HIVE-22869.8.patch, HIVE-22869.9.patch, HIVE-22869.patch
>
>          Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Add the possibility to run benchmarks on opening lock in the HMS



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Mime
View raw message