DRILL-6217: NaN/Inf NestedLoopJoin processes NaN values incorrectly
- Changed loggic for equality functions to handle NaN values as the biggest ones
closes #1154
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d09efb93
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d09efb93
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d09efb93
Branch: refs/heads/master
Commit: d09efb931f183d989516d1dd098ade546bbe3f16
Parents: f47af65
Author: Vladimir Tkach <vovatkach75@gmail.com>
Authored: Tue Mar 6 11:42:02 2018 +0200
Committer: Ben-Zvi <bben-zvi@mapr.com>
Committed: Fri Mar 9 18:51:15 2018 -0800
----------------------------------------------------------------------
.../codegen/templates/ComparisonFunctions.java | 39 +++++++++
.../fn/impl/TestMathFunctionsWithNanInf.java | 6 +-
.../vector/complex/writer/TestJsonNanInf.java | 88 ++++++++++++++++++--
3 files changed, 123 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
index 12a4ef2..502418e 100644
--- a/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
+++ b/exec/java-exec/src/main/codegen/templates/ComparisonFunctions.java
@@ -249,7 +249,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(left.value) || ( Double.isNaN(left.value) && Double.isNaN(right.value)))
{
+ out.value=0;
+ } else if (Double.isNaN(right.value) && !Double.isNaN(left.value)) {
+ out.value = 1;
+ } else {
out.value = left.value < right.value ? 1 : 0;
+ }
<#elseif typeGroup.mode == "varString"
|| typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" >
int cmp;
@@ -280,7 +287,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(right.value)){
+ out.value = 1;
+ } else if (!Double.isNaN(right.value) && Double.isNaN(left.value)) {
+ out.value = 0;
+ } else {
out.value = left.value <= right.value ? 1 : 0;
+ }
<#elseif typeGroup.mode == "varString"
|| typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" >
int cmp;
@@ -311,7 +325,14 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(right.value) || ( Double.isNaN(left.value) && Double.isNaN(right.value)))
{
+ out.value = 0;
+ } else if (Double.isNaN(left.value) && !Double.isNaN(right.value)) {
+ out.value = 1;
+ } else {
out.value = left.value > right.value ? 1 : 0;
+ }
<#elseif typeGroup.mode == "varString"
|| typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" >
int cmp;
@@ -342,7 +363,15 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(left.value)){
+ out.value=1;
+ } else if (!Double.isNaN(left.value) && Double.isNaN(right.value)) {
+ out.value = 0;
+ } else {
out.value = left.value >= right.value ? 1 : 0;
+ }
+
<#elseif typeGroup.mode == "varString"
|| typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" >
int cmp;
@@ -373,7 +402,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(left.value) && Double.isNaN(right.value)) {
+ out.value = 1;
+ } else {
out.value = left.value == right.value ? 1 : 0;
+ }
<#elseif typeGroup.mode == "varString" >
out.value = org.apache.drill.exec.expr.fn.impl.ByteFunctionHelpers.equal(
left.buffer, left.start, left.end, right.buffer, right.start, right.end);
@@ -406,7 +440,12 @@ public class GCompare${leftTypeBase}Vs${rightTypeBase} {
public void eval() {
<#if typeGroup.mode == "primitive">
+ // NaN is the biggest possible value, and NaN == NaN
+ if (Double.isNaN(left.value) && Double.isNaN(right.value)) {
+ out.value = 0;
+ } else {
out.value = left.value != right.value ? 1 : 0;
+ }
<#elseif typeGroup.mode == "varString"
|| typeGroup.mode == "intervalNameThis" || typeGroup.mode == "intervalDay" >
int cmp;
http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
index b692c9f..8131195 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java
@@ -54,7 +54,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
String query = String.format("select equal(nan_col, nan_col) as nan_col, equal(inf_col,
inf_col) as inf_col from dfs.`%s`", table_name);
String[] columns = {"nan_col", "inf_col"};
- Object[] values = {false, true};
+ Object[] values = {true, true};
evalTest(table_name, json, query, columns, values);
}
@@ -65,7 +65,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
String query = String.format("select not_equal(nan_col, nan_col) as nan_col, not_equal(inf_col,
inf_col) as inf_col from dfs.`%s`", table_name);
String[] columns = {"nan_col", "inf_col"};
- Object[] values = {true, false};
+ Object[] values = {false, false};
evalTest(table_name, json, query, columns, values);
}
@@ -85,7 +85,7 @@ public class TestMathFunctionsWithNanInf extends BaseTestQuery {
String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}";
String query = String.format("select greater_than(nan_col, 5) as nan_col, greater_than(inf_col,
5) as inf_col from dfs.`%s`", table_name);
String[] columns = {"nan_col", "inf_col"};
- Object[] values = {false, true};
+ Object[] values = {true, true};
evalTest(table_name, json, query, columns, values);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/d09efb93/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
index 95848c4..3ff5ba2 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java
@@ -18,6 +18,8 @@
package org.apache.drill.exec.vector.complex.writer;
import org.apache.commons.io.FileUtils;
+import org.apache.drill.exec.physical.impl.join.JoinTestBase;
+import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.test.BaseTestQuery;
import org.apache.drill.common.exceptions.UserRemoteException;
import org.apache.drill.common.expression.SchemaPath;
@@ -304,12 +306,20 @@ public class TestJsonNanInf extends BaseTestQuery {
}
@Test
- public void testInnerJoinWithNaN() throws Exception {
+ public void testNestedLoopJoinWithNaN() throws Exception {
String table_name = "nan_test.json";
- String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
- "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n"
+
- "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n"
+
- "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}";
+ String json = "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"object2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"object3\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"object4\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":Infinity}";
+ JoinTestBase.enableJoin(false, false, true);
String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s`
" +
" tt on t.attr4 = tt.attr4 ", table_name, table_name);
@@ -321,12 +331,76 @@ public class TestJsonNanInf extends BaseTestQuery {
.sqlQuery(query)
.ordered()
.baselineColumns("name")
- .baselineValues("obj1")
- .baselineValues("obj2")
+ .baselineValues("object1")
+ .baselineValues("object2")
+ .baselineValues("object3")
+ .baselineValues("object4")
.build()
.run();
} finally {
test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ JoinTestBase.resetJoinOptions();
+ FileUtils.deleteQuietly(file);
+ }
+ }
+
+ @Test
+ public void testHashJoinWithNaN() throws Exception {
+ String table_name = "nan_test.json";
+ String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n"
+
+ "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}";
+ JoinTestBase.enableJoin(true, false, false);
+ String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s`
" +
+ " tt on t.attr4 = tt.attr4 ", table_name, table_name);
+
+ File file = new File(dirTestWatcher.getRootDir(), table_name);
+ try {
+ FileUtils.writeStringToFile(file, json);
+ test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("name")
+ .baselineValues("obj1")
+ .baselineValues("obj2")
+ .build()
+ .run();
+ } finally {
+ test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ JoinTestBase.resetJoinOptions();
+ FileUtils.deleteQuietly(file);
+ }
+ }
+
+
+ @Test
+ public void testMergeJoinWithNaN() throws Exception {
+ String table_name = "nan_test.json";
+ String json = "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}\n"
+
+ "{\"name\":\"obj1\", \"attr1\":1, \"attr2\":2, \"attr3\":4, \"attr4\":Infinity}\n"
+
+ "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":5, \"attr4\":-Infinity}\n"
+
+ "{\"name\":\"obj2\", \"attr1\":1, \"attr2\":2, \"attr3\":3, \"attr4\":NaN}";
+ JoinTestBase.enableJoin(false, true, false);
+ String query = String.format("select distinct t.name from dfs.`%s` t inner join dfs.`%s`
" +
+ " tt on t.attr4 = tt.attr4 ", table_name, table_name);
+
+ File file = new File(dirTestWatcher.getRootDir(), table_name);
+ try {
+ FileUtils.writeStringToFile(file, json);
+ test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("name")
+ .baselineValues("obj1")
+ .baselineValues("obj2")
+ .build()
+ .run();
+ } finally {
+ test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE);
+ JoinTestBase.resetJoinOptions();
FileUtils.deleteQuietly(file);
}
}
|