tez-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "LuYao_whysomanydups (JIRA)" <j...@apache.org>
Subject [jira] [Created] (TEZ-3863) Possible logic problem when calculating an if statement of a nullable column
Date Sun, 12 Nov 2017 03:45:00 GMT
LuYao_whysomanydups created TEZ-3863:

             Summary: Possible logic problem when calculating an if statement of a nullable
                 Key: TEZ-3863
                 URL: https://issues.apache.org/jira/browse/TEZ-3863
             Project: Apache Tez
          Issue Type: Bug
    Affects Versions: 0.6.2
         Environment: Hive on Tez (Hive's original CLI, not beeline):
Hadoop 2.6.2
Hive 1.2.1
Tez 0.6.2
            Reporter: LuYao_whysomanydups
            Priority: Minor

To test a count(if(...)) logic, a Hive(MR) and a Hive(Tez) CLIs are started to have a snippet
of HiveQL executed:
select o1.k, o2.k, (if(o2.k>0, o2.k, null)) 
    select 1 as k union all select 0 union all select null union all select 2
) o1 
left outer join 
    select 1 as k union all select 0 union all select null
) o2 
on (o1.k = o2.k) ;

Hive(MR) returns:
1	1	1
0	0	NULL

Hive(Tez) returns:
1	1	1
0	0	NULL
2	NULL	2

The last line of Hive(Tez)'s result is quite strange, as the o2.k can never have a value of
May it be a bug or something this 0.6.2 version of Tez hasn't got patched?

w/ Tez init conf(hiveconf)
set mapreduce.map.memory.mb=4096;
set mapreduce.map.java.opts=-Xmx3280m;

set mapreduce.reduce.memory.mb=4096;
set mapreduce.reduce.java.opts=-Xmx3280m;

set mapreduce.framework.name=yarn-tez;
set hive.execution.engine=tez;

set tez.am.resource.memory.mb=4096;
set tez.task.resource.memory.mb=4096;
set hive.tez.container.size=4096;

set hive.auto.convert.sortmerge.join=false;
set hive.auto.convert.sortmerge.join.to.mapjoin=false;
set hive.convert.join.bucket.mapjoin.tez=false;

set hive.optimize.bucketmapjoin = false;
set hive.optimize.bucketmapjoin.sortedmerge = false;

set hive.enforce.sortmergebucketmapjoin=false;
set hive.exec.submit.local.task.via.child=true;
set hive.mapjoin.bucket.cache.size=10000;
set hive.mapjoin.optimized.hashtable=true;

set hive.tez.dynamic.partition.pruning=false;

set hive.tez.java.opts=-Xmx3280m;
set tez.am.java.opts=-Xmx3280m;
set tez.runtime.io.sort.mb=1638;
set tez.runtime.unordered.output.buffer.size-mb=409;

set hive.auto.convert.join.noconditionaltask.size=1365;
set tez.runtime.unordered.output.buffer.size-mb=409;

set tez.grouping.min-size=16777216;
set tez.grouping.max-size=1073741824;

set hive.exec.reducers.max=1099;
set hive.exec.reducers.bytes.per.reducer=258998272;

set mapred.max.split.size=100000000;

set hive.support.sql11.reserved.keywords=false; 
set hive.cbo.enable=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
set hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.reduce.enabled = true;
set hive.vectorized.execution.reduce.groupby.enabled = true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=16;
set hive.tez.auto.reducer.parallelism=true;
set hive.tez.min.partition.factor=0.25;
set hive.tez.max.partition.factor=2.0;
set mapred.reduce.tasks=-1;
set tez.shuffle-vertex-manager.min-src-fraction=0.25;
set tez.shuffle-vertex-manager.max-src-fraction=0.75;

set hive.tez.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;

This message was sent by Atlassian JIRA

View raw message