helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jiajunw...@apache.org
Subject [helix] branch helix-0.9.x updated: Fix flaky test testGetChildrenOnLargeNumChildren (#1194)
Date Fri, 09 Oct 2020 20:18:48 GMT
This is an automated email from the ASF dual-hosted git repository.

jiajunwang pushed a commit to branch helix-0.9.x
in repository https://gitbox.apache.org/repos/asf/helix.git


The following commit(s) were added to refs/heads/helix-0.9.x by this push:
     new 75b9688  Fix flaky test testGetChildrenOnLargeNumChildren (#1194)
75b9688 is described below

commit 75b9688c7e089f6e5b06a75237e6fa84a4d995cc
Author: Huizhi Lu <ihuizhi.lu@gmail.com>
AuthorDate: Wed Aug 5 10:56:55 2020 -0700

    Fix flaky test testGetChildrenOnLargeNumChildren (#1194)
    
    testGetChildrenOnLargeNumChildren becomes flaky after more commits are checked in because
of reflection doesn't work as expected. This commit fixes it by replacing reflection with
creating 110K children for the test.
---
 .../helix/manager/zk/zookeeper/ZkClient.java       | 10 +--
 .../apache/helix/manager/zk/TestRawZkClient.java   | 75 +++++++++-------------
 2 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
b/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
index 89b3c7a..6f7c8ef 100644
--- a/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
+++ b/helix-core/src/main/java/org/apache/helix/manager/zk/zookeeper/ZkClient.java
@@ -77,8 +77,9 @@ public class ZkClient implements Watcher {
 
   // If number of children exceeds this limit, getChildren() should not retry on connection
loss.
   // This is a workaround for exiting retry on connection loss because of large number of
children.
+  // 100K is specific for helix messages which use UUID, making packet length just below
4 MB.
   // TODO: remove it once we have a better way to exit retry for this case
-  private static final int NUM_CHILDREN_LIMIT;
+  private static final int NUM_CHILDREN_LIMIT = 100 * 1000;
 
   private final IZkConnection _connection;
   private final long _operationRetryTimeoutInMillis;
@@ -96,13 +97,6 @@ public class ZkClient implements Watcher {
   private PathBasedZkSerializer _pathBasedZkSerializer;
   private ZkClientMonitor _monitor;
 
-  static {
-    // 100K is specific for helix messages which use UUID, making packet length just below
4 MB.
-    // Set it here for unit test to use reflection to change value
-    // because compilers optimize constants by replacing them inline.
-    NUM_CHILDREN_LIMIT = 100 * 1000;
-  }
-
   private class IZkDataListenerEntry {
     final IZkDataListener _dataListener;
     final boolean _prefetchData;
diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
index 9cea4c2..1cc2f2f 100644
--- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
+++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestRawZkClient.java
@@ -27,6 +27,7 @@ import org.apache.helix.SystemPropertyKeys;
 import org.apache.helix.TestHelper;
 import org.apache.helix.ZNRecord;
 import org.apache.helix.ZkUnitTestBase;
+import org.apache.helix.common.ZkTestBase;
 import org.apache.helix.manager.zk.zookeeper.ZkConnection;
 import org.apache.helix.monitoring.mbeans.MBeanRegistrar;
 import org.apache.helix.monitoring.mbeans.MonitorDomainNames;
@@ -44,8 +45,8 @@ import org.testng.annotations.Test;
 import javax.management.MBeanServer;
 import javax.management.ObjectName;
 import java.lang.management.ManagementFactory;
-import java.lang.reflect.Field;
-import java.lang.reflect.Modifier;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Random;
 import java.util.UUID;
 import java.util.concurrent.ExecutorService;
@@ -413,66 +414,52 @@ public class TestRawZkClient extends ZkUnitTestBase {
     }
   }
 
-
   /*
    * Tests getChildren() when there are an excessive number of children and connection loss
happens,
    * the operation should terminate and exit retry loop.
    */
-  @Test
+  @Test(timeOut = 30 * 1000L)
   public void testGetChildrenOnLargeNumChildren() throws Exception {
-    // Default packetLen is 4M. It is static final and initialized
-    // when first zkClient is created.
-    // So we could not just set "jute.maxbuffer" to change the value.
-    // Reflection is needed to change the value.
-    // Remove "final" modifier
-    Field modifiersField = Field.class.getDeclaredField("modifiers");
-    boolean isModifierAccessible = modifiersField.isAccessible();
-    modifiersField.setAccessible(true);
-
-    Field packetLenField = ClientCnxn.class.getDeclaredField("packetLen");
-    Field childrenLimitField =
-            org.apache.helix.manager.zk.zookeeper.ZkClient.class.getDeclaredField("NUM_CHILDREN_LIMIT");
-    modifiersField.setInt(packetLenField, packetLenField.getModifiers() & ~Modifier.FINAL);
-    modifiersField.setInt(childrenLimitField, childrenLimitField.getModifiers() & ~Modifier.FINAL);
-
-    boolean isPacketLenAccessible = packetLenField.isAccessible();
-    packetLenField.setAccessible(true);
-    int originPacketLen = packetLenField.getInt(null);
-    // Keep 150 bytes for successfully creating each child node.
-    packetLenField.set(null, 150);
-
-    boolean isChildrenLimitAccessible = childrenLimitField.isAccessible();
-    childrenLimitField.setAccessible(true);
-    int originChildrenLimit = childrenLimitField.getInt(null);
-    childrenLimitField.set(null, 2);
-
-    String path = "/" + TestHelper.getTestMethodName();
-    // Create 5 children to make packet length of children exceed 150 bytes
+    final String methodName = TestHelper.getTestMethodName();
+    System.out.println("Start test: " + methodName);
+    // Create 110K children to make packet length of children exceed 4 MB
     // and cause connection loss for getChildren() operation
-    for (int i = 0; i < 5; i++) {
-      _zkClient.createPersistent(path + "/" + UUID.randomUUID().toString(), true);
+    String path = "/" + methodName;
+
+    _zkClient.createPersistent(path);
+
+    for (int i = 0; i < 110; i++) {
+      List<Op> ops = new ArrayList<>(1000);
+      for (int j = 0; j < 1000; j++) {
+        String childPath = path + "/" + UUID.randomUUID().toString();
+        // Create ephemeral nodes so closing zkClient deletes them for cleanup
+        ops.add(
+                Op.create(childPath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL));
+      }
+      // Reduce total creation time by batch creating znodes
+      _zkClient.multi(ops);
     }
 
     try {
       _zkClient.getChildren(path);
-      Assert.fail("Should not successfully get children.");
+      Assert.fail("Should not successfully get children because of connection loss.");
     } catch (ZkException expected) {
       Assert.assertEquals(expected.getMessage(),
               "org.apache.zookeeper.KeeperException$MarshallingErrorException: "
                       + "KeeperErrorCode = MarshallingError");
     } finally {
-      packetLenField.set(null, originPacketLen);
-      packetLenField.setAccessible(isPacketLenAccessible);
-
-      childrenLimitField.set(null, originChildrenLimit);
-      childrenLimitField.setAccessible(isChildrenLimitAccessible);
-
-      modifiersField.setAccessible(isModifierAccessible);
+      // Delete children ephemeral znodes
+      _zkClient.close();
+      _zkClient = new ZkClient(ZkTestBase.ZK_ADDR);
 
       Assert.assertTrue(TestHelper.verify(() -> {
-        _zkClient.deleteRecursively(path);
-        return !_zkClient.exists(path);
+        try {
+          return _zkClient.delete(path);
+        } catch (ZkException e) {
+          return false;
+        }
       }, TestHelper.WAIT_DURATION));
     }
+    System.out.println("End test: " + methodName);
   }
 }


Mime
View raw message