datafu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mha...@apache.org
Subject [2/3] git commit: DATAFU-68: SampleByKey can throw NullPointerException
Date Mon, 03 Nov 2014 22:13:50 GMT
DATAFU-68: SampleByKey can throw NullPointerException

https://issues.apache.org/jira/browse/DATAFU-68

Signed-off-by: Matthew Hayes <matthew.terence.hayes@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-datafu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-datafu/commit/2fef6eab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-datafu/tree/2fef6eab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-datafu/diff/2fef6eab

Branch: refs/heads/master
Commit: 2fef6eab201ef574d9c7ba8a54efac0b4c977c4d
Parents: 3b6554a
Author: Jarek Jarcec Cecho <jarcec@apache.org>
Authored: Mon Sep 8 10:51:16 2014 +0200
Committer: Matt Hayes <mhayes@linkedin.com>
Committed: Mon Nov 3 14:12:47 2014 -0800

----------------------------------------------------------------------
 .../java/datafu/pig/sampling/SampleByKey.java   | 29 ++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-datafu/blob/2fef6eab/datafu-pig/src/main/java/datafu/pig/sampling/SampleByKey.java
----------------------------------------------------------------------
diff --git a/datafu-pig/src/main/java/datafu/pig/sampling/SampleByKey.java b/datafu-pig/src/main/java/datafu/pig/sampling/SampleByKey.java
index d94a038..9c8cfd7 100644
--- a/datafu-pig/src/main/java/datafu/pig/sampling/SampleByKey.java
+++ b/datafu-pig/src/main/java/datafu/pig/sampling/SampleByKey.java
@@ -39,11 +39,11 @@ import org.apache.pig.data.Tuple;
  *
  * <p>
  * The only required parameter is the sampling probability.  This may be followed
- * by an optional seed value to control the random number generation.  
+ * by an optional seed value to control the random number generation.
  * </p>
  *
  * <p>
- * SampleByKey will work deterministically as long as the same seed is provided.  
+ * SampleByKey will work deterministically as long as the same seed is provided.
  * </p>
  *
  * Example:
@@ -51,12 +51,12 @@ import org.apache.pig.data.Tuple;
  * <pre>
  * {@code
  * DEFINE SampleByKey datafu.pig.sampling.SampleByKey('0.5');
- * 
+ *
  *-- input: (A,1), (A,2), (A,3), (B,1), (B,3)
- * 
+ *
  * data = LOAD 'input' AS (A_id:chararray, B_id:chararray, C:int);
  * output = FILTER data BY SampleByKey(A_id);
- * 
+ *
  * --output: (B,1), (B,3)
  * }
  * </pre>
@@ -67,14 +67,14 @@ import org.apache.pig.data.Tuple;
 public class SampleByKey extends FilterFunc
 {
   final static int PRIME_NUMBER = 31;
-  
+
   Integer seed = null;
   double probability;
-  
+
   public SampleByKey(String probability) {
     this.probability = Double.parseDouble(probability);
   }
-  
+
   public SampleByKey(String probability, String salt) {
     this(probability);
     this.seed = salt.hashCode();
@@ -83,20 +83,21 @@ public class SampleByKey extends FilterFunc
   @Override
   public void setUDFContextSignature(String signature)
   {
-    if (this.seed == null)
-      this.seed = signature.hashCode();
+    if (this.seed == null && signature != null) {
+        this.seed = signature.hashCode();
+    }
     super.setUDFContextSignature(signature);
   }
 
   @Override
-  public Boolean exec(Tuple input) throws IOException 
+  public Boolean exec(Tuple input) throws IOException
   {
     int hashCode = 0;
     for(int i=0; i<input.size(); i++) {
       Object each = input.get(i);
       hashCode = hashCode*PRIME_NUMBER + each.hashCode();
     }
-      
+
     try {
       return intToRandomDouble(hashCode) <= probability;
     }
@@ -105,7 +106,7 @@ public class SampleByKey extends FilterFunc
       throw new RuntimeException("Exception on intToRandomDouble");
     }
   }
-  
+
   private Double intToRandomDouble(int input) throws Exception
   {
     MessageDigest hasher = MessageDigest.getInstance("sha-1");
@@ -113,7 +114,7 @@ public class SampleByKey extends FilterFunc
     ByteBuffer b = ByteBuffer.allocate(4+4);
     ByteBuffer b2 = ByteBuffer.allocate(20);
 
-    b.putInt(seed);
+    b.putInt(seed == null ? PRIME_NUMBER : seed);
     b.putInt(input);
     byte[] digest = hasher.digest(b.array());
     b.clear();


Mime
View raw message