hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (Jira)" <j...@apache.org>
Subject [jira] [Work logged] (HIVE-25374) Fix TestReplicationOnHDFSEncryptedZones
Date Mon, 26 Jul 2021 08:54:00 GMT

     [ https://issues.apache.org/jira/browse/HIVE-25374?focusedWorklogId=627590&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-627590
]

ASF GitHub Bot logged work on HIVE-25374:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 26/Jul/21 08:53
            Start Date: 26/Jul/21 08:53
    Worklog Time Spent: 10m 
      Work Description: ayushtkn commented on a change in pull request #2519:
URL: https://github.com/apache/hive/pull/2519#discussion_r676273599



##########
File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java
##########
@@ -113,46 +129,96 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws
Throwable {
             "'" + HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname + "'='"
                     + UserGroupInformation.getCurrentUser().getUserName() +"'");
     WarehouseInstance.Tuple tuple =
-        primary.run("use " + primaryDbName)
-            .run("create table encrypted_table (id int, value string)")
-            .run("insert into table encrypted_table values (1,'value1')")
-            .run("insert into table encrypted_table values (2,'value2')")
+            primary.run("use " + primaryDbName)
+                    .run("create table encrypted_table (id int, value string)")
+                    .run("insert into table encrypted_table values (1,'value1')")
+                    .run("insert into table encrypted_table values (2,'value2')")
+                    .dump(primaryDbName, dumpWithClause);
+
+    replica
+            .run("repl load " + primaryDbName + " into " + replicatedDbName
+                    + " with('hive.repl.add.raw.reserved.namespace'='true', "
+                    + "'hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
+                    + "'hive.exec.copyfile.maxsize'='0', 'distcp.options.skipcrccheck'='')")
+            .run("use " + replicatedDbName)
+            .run("repl status " + replicatedDbName)
+            .verifyResult(tuple.lastReplicationId);
+
+    try {
+      replica
+              .run("select value from encrypted_table")
+              .verifyResults(new String[] { "value1", "value2" });
+      Assert.fail("Src EZKey shouldn't be present on target");
+    } catch (IOException e) {
+      Assert.assertTrue(e.getCause().getMessage().contains("KeyVersion name 'test_key@0'
does not exist"));
+    }
+
+    //read should pass without raw-byte distcp
+    dumpWithClause = Arrays.asList( "'" + HiveConf.ConfVars.REPL_EXTERNAL_TABLE_BASE_DIR.varname
+ "'='"
+            + replica.externalTableWarehouseRoot + "'");
+    tuple = primary.run("use " + primaryDbName)
+            .run("create external table encrypted_table2 (id int, value string)")
+            .run("insert into table encrypted_table2 values (1,'value1')")
+            .run("insert into table encrypted_table2 values (2,'value2')")
             .dump(primaryDbName, dumpWithClause);
 
     replica
-        .run("repl load " + primaryDbName + " into " + replicatedDbName
-                + " with('hive.repl.add.raw.reserved.namespace'='true', "
-                + "'hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
-                + "'distcp.options.pugpbx'='', 'distcp.options.skipcrccheck'='')")
-        .run("use " + replicatedDbName)
-        .run("repl status " + replicatedDbName)
-        .verifyResult(tuple.lastReplicationId)
-        .run("select value from encrypted_table")
-        .verifyFailure(new String[] { "value1", "value2" });
+            .run("repl load " + primaryDbName + " into " + replicatedDbName
+                    + " with('hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
+                    + "'hive.exec.copyfile.maxsize'='0', 'distcp.options.skipcrccheck'='')")
+            .run("use " + replicatedDbName)
+            .run("repl status " + replicatedDbName)
+            .verifyResult(tuple.lastReplicationId)
+            .run("select value from encrypted_table2")
+            .verifyResults(new String[] { "value1", "value2" });
   }
 
   @Ignore("this is ignored as minidfs cluster as of writing this test looked like did not
copy the "
               + "files correctly")
   @Test
   public void targetAndSourceHaveSameEncryptionZoneKeys() throws Throwable {
-    WarehouseInstance replica = new WarehouseInstance(LOG, miniDFSCluster,
+    String replicaBaseDir = Files.createTempDirectory("replica2").toFile().getAbsolutePath();

Review comment:
       The test is still disabled. Need to remove Ignore

##########
File path: itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationOnHDFSEncryptedZones.java
##########
@@ -113,46 +129,96 @@ public void targetAndSourceHaveDifferentEncryptionZoneKeys() throws
Throwable {
             "'" + HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname + "'='"
                     + UserGroupInformation.getCurrentUser().getUserName() +"'");
     WarehouseInstance.Tuple tuple =
-        primary.run("use " + primaryDbName)
-            .run("create table encrypted_table (id int, value string)")
-            .run("insert into table encrypted_table values (1,'value1')")
-            .run("insert into table encrypted_table values (2,'value2')")
+            primary.run("use " + primaryDbName)
+                    .run("create table encrypted_table (id int, value string)")
+                    .run("insert into table encrypted_table values (1,'value1')")
+                    .run("insert into table encrypted_table values (2,'value2')")
+                    .dump(primaryDbName, dumpWithClause);
+
+    replica
+            .run("repl load " + primaryDbName + " into " + replicatedDbName
+                    + " with('hive.repl.add.raw.reserved.namespace'='true', "
+                    + "'hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
+                    + "'hive.exec.copyfile.maxsize'='0', 'distcp.options.skipcrccheck'='')")
+            .run("use " + replicatedDbName)
+            .run("repl status " + replicatedDbName)
+            .verifyResult(tuple.lastReplicationId);
+
+    try {
+      replica
+              .run("select value from encrypted_table")
+              .verifyResults(new String[] { "value1", "value2" });
+      Assert.fail("Src EZKey shouldn't be present on target");
+    } catch (IOException e) {
+      Assert.assertTrue(e.getCause().getMessage().contains("KeyVersion name 'test_key@0'
does not exist"));
+    }
+
+    //read should pass without raw-byte distcp
+    dumpWithClause = Arrays.asList( "'" + HiveConf.ConfVars.REPL_EXTERNAL_TABLE_BASE_DIR.varname
+ "'='"
+            + replica.externalTableWarehouseRoot + "'");
+    tuple = primary.run("use " + primaryDbName)
+            .run("create external table encrypted_table2 (id int, value string)")
+            .run("insert into table encrypted_table2 values (1,'value1')")
+            .run("insert into table encrypted_table2 values (2,'value2')")
             .dump(primaryDbName, dumpWithClause);
 
     replica
-        .run("repl load " + primaryDbName + " into " + replicatedDbName
-                + " with('hive.repl.add.raw.reserved.namespace'='true', "
-                + "'hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
-                + "'distcp.options.pugpbx'='', 'distcp.options.skipcrccheck'='')")
-        .run("use " + replicatedDbName)
-        .run("repl status " + replicatedDbName)
-        .verifyResult(tuple.lastReplicationId)
-        .run("select value from encrypted_table")
-        .verifyFailure(new String[] { "value1", "value2" });
+            .run("repl load " + primaryDbName + " into " + replicatedDbName
+                    + " with('hive.repl.replica.external.table.base.dir'='" + replica.externalTableWarehouseRoot
+ "', "
+                    + "'hive.exec.copyfile.maxsize'='0', 'distcp.options.skipcrccheck'='')")
+            .run("use " + replicatedDbName)
+            .run("repl status " + replicatedDbName)
+            .verifyResult(tuple.lastReplicationId)
+            .run("select value from encrypted_table2")
+            .verifyResults(new String[] { "value1", "value2" });
   }
 
   @Ignore("this is ignored as minidfs cluster as of writing this test looked like did not
copy the "
               + "files correctly")
   @Test
   public void targetAndSourceHaveSameEncryptionZoneKeys() throws Throwable {
-    WarehouseInstance replica = new WarehouseInstance(LOG, miniDFSCluster,
+    String replicaBaseDir = Files.createTempDirectory("replica2").toFile().getAbsolutePath();
+    Configuration replicaConf = new Configuration();
+    replicaConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, replicaBaseDir);
+    replicaConf.set("dfs.client.use.datanode.hostname", "true");
+    replicaConf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + ".hosts", "*");
+    replicaConf.set("hadoop.security.key.provider.path", "jceks://file" + jksFile);
+    replicaConf.setBoolean("dfs.namenode.delegation.token.always-use", true);
+

Review comment:
       Add the below, else the test fails:
   ```
    System.setProperty("jceks.key.serialFilter",
           "java.lang.Enum;java.security.KeyRep;java.security.KeyRep$Type;javax.crypto.spec.SecretKeySpec;org.apache.hadoop.crypto.key.JavaKeyStoreProvider$KeyMetadata;!*");
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 627590)
    Time Spent: 20m  (was: 10m)

> Fix TestReplicationOnHDFSEncryptedZones
> ---------------------------------------
>
>                 Key: HIVE-25374
>                 URL: https://issues.apache.org/jira/browse/HIVE-25374
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Arko Sharma
>            Assignee: Arko Sharma
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 20m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Mime
View raw message