avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rskr...@apache.org
Subject [avro] branch master updated: AVRO-2905: Fix Utf8 hash cache (#955)
Date Wed, 30 Sep 2020 15:27:18 GMT
This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a6aa43  AVRO-2905: Fix Utf8 hash cache (#955)
9a6aa43 is described below

commit 9a6aa43c8699ccde7bef485206928acb35378bdb
Author: Kyoungha Min <kyoungha@molocoads.com>
AuthorDate: Wed Sep 30 08:27:07 2020 -0700

    AVRO-2905: Fix Utf8 hash cache (#955)
    
    * AVRO-2905: Fix Utf8 hash cache
    
    * AVRO-2905: Reflect Comment, add length check
---
 .../src/main/java/org/apache/avro/util/Utf8.java   | 52 ++++++++++++++--------
 .../test/java/org/apache/avro/util/TestUtf8.java   | 34 +++++++++++++-
 2 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 4a8f2ae..879a897 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -47,18 +47,21 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
     MAX_LENGTH = i;
   }
 
-  private byte[] bytes = EMPTY;
-  private int hash = 0;
-  private boolean hasHash = false;
+  private byte[] bytes;
+  private int hash;
   private int length;
   private String string;
 
   public Utf8() {
+    bytes = EMPTY;
   }
 
   public Utf8(String string) {
-    this.bytes = getBytesFor(string);
-    this.length = bytes.length;
+    byte[] bytes = getBytesFor(string);
+    int length = bytes.length;
+    checkLength(length);
+    this.bytes = bytes;
+    this.length = length;
     this.string = string;
   }
 
@@ -66,11 +69,14 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
     this.length = other.length;
     this.bytes = Arrays.copyOf(other.bytes, other.length);
     this.string = other.string;
+    this.hash = other.hash;
   }
 
   public Utf8(byte[] bytes) {
+    int length = bytes.length;
+    checkLength(length);
     this.bytes = bytes;
-    this.length = bytes.length;
+    this.length = length;
   }
 
   /**
@@ -111,24 +117,25 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
    * length does not change, as this also clears the cached String.
    */
   public Utf8 setByteLength(int newLength) {
-    if (newLength > MAX_LENGTH) {
-      throw new AvroRuntimeException("String length " + newLength + " exceeds maximum allowed");
-    }
+    checkLength(newLength);
     if (this.bytes.length < newLength) {
       this.bytes = Arrays.copyOf(this.bytes, newLength);
     }
     this.length = newLength;
     this.string = null;
-    this.hasHash = false;
+    this.hash = 0;
     return this;
   }
 
   /** Set to the contents of a String. */
   public Utf8 set(String string) {
-    this.bytes = getBytesFor(string);
-    this.length = bytes.length;
+    byte[] bytes = getBytesFor(string);
+    int length = bytes.length;
+    checkLength(length);
+    this.bytes = bytes;
+    this.length = length;
     this.string = string;
-    this.hasHash = false;
+    this.hash = 0;
     return this;
   }
 
@@ -140,7 +147,6 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
     System.arraycopy(other.bytes, 0, bytes, 0, length);
     this.string = other.string;
     this.hash = other.hash;
-    this.hasHash = other.hasHash;
     return this;
   }
 
@@ -172,13 +178,16 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
 
   @Override
   public int hashCode() {
-    if (!hasHash) {
+    int h = hash;
+    if (h == 0) {
+      byte[] bytes = this.bytes;
+      int length = this.length;
       for (int i = 0; i < length; i++) {
-        hash = hash * 31 + bytes[i];
+        h = h * 31 + bytes[i];
       }
-      hasHash = true;
+      this.hash = h;
     }
-    return hash;
+    return h;
   }
 
   @Override
@@ -202,9 +211,14 @@ public class Utf8 implements Comparable<Utf8>, CharSequence {
     return toString().subSequence(start, end);
   }
 
+  private static void checkLength(int length) {
+    if (length > MAX_LENGTH) {
+      throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed");
+    }
+  }
+
   /** Gets the UTF-8 bytes for a String */
   public static byte[] getBytesFor(String str) {
     return str.getBytes(StandardCharsets.UTF_8);
   }
-
 }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index 60c8f71..e62982b 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -17,6 +17,7 @@
  */
 package org.apache.avro.util;
 
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertEquals;
 
@@ -51,13 +52,42 @@ public class TestUtf8 {
 
   @Test
   public void testHashCodeReused() {
+    assertEquals(97, new Utf8("a").hashCode());
+    assertEquals(3904, new Utf8("zz").hashCode());
+    assertEquals(122, new Utf8("z").hashCode());
+    assertEquals(99162322, new Utf8("hello").hashCode());
+    assertEquals(3198781, new Utf8("hell").hashCode());
+
     Utf8 u = new Utf8("a");
     assertEquals(97, u.hashCode());
+    assertEquals(97, u.hashCode());
 
+    u.set("a");
+    assertEquals(97, u.hashCode());
+
+    u.setByteLength(1);
+    assertEquals(97, u.hashCode());
     u.setByteLength(2);
+    assertNotEquals(97, u.hashCode());
+
     u.set("zz");
+    assertEquals(3904, u.hashCode());
+    u.setByteLength(1);
+    assertEquals(122, u.hashCode());
+
+    u.set("hello");
+    assertEquals(99162322, u.hashCode());
+    u.setByteLength(4);
+    assertEquals(3198781, u.hashCode());
 
-    assertEquals(97121, u.hashCode());
-    assertEquals(97121, u.hashCode());
+    u.set(new Utf8("zz"));
+    assertEquals(3904, u.hashCode());
+    u.setByteLength(1);
+    assertEquals(122, u.hashCode());
+
+    u.set(new Utf8("hello"));
+    assertEquals(99162322, u.hashCode());
+    u.setByteLength(4);
+    assertEquals(3198781, u.hashCode());
   }
 }


Mime
View raw message