Bob Rumsby created DRILL-1394:
---------------------------------
Summary: COUNT(*) with UNION subquery returns two rows
Key: DRILL-1394
URL: https://issues.apache.org/jira/browse/DRILL-1394
Project: Apache Drill
Issue Type: Bug
Components: Query Planning & Optimization
Affects Versions: 0.5.0
Reporter: Bob Rumsby
The following COUNT(*) query with a UNION subquery returns two rows, one count for each side
of the union. Run by itself, the subquery returns 70000 rows.
0: jdbc:drill:> select count(*) from (select trans_id from `clicks/clicks.campaign.json`
union all select trans_id from `clicks/clicks.json`);
+------------+
| EXPR$0 |
+------------+
| 40000 |
| 30000 |
+------------+
2 rows selected (5.896 seconds)
0: jdbc:drill:> explain plan for select count(*) from (select trans_id from `clicks/clicks.campaign.json`
union all select trans_id from `clicks/clicks.json`);
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 StreamAgg(group=[{}], EXPR$0=[COUNT()])
00-02 Project($f0=[0])
00-03 UnionAll(all=[true])
00-05 Project(trans_id=[$1])
00-07 Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/demorig/data/nested/clicks/clicks.campaign.json,
columns = null]])
00-04 Project(trans_id=[$1])
00-06 Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/demorig/data/nested/clicks/clicks.json,
columns = null]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 7,
"files" : [ "maprfs:/mapr/demorig/data/nested/clicks/clicks.campaign.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/mapr/demorig/data",
"writable" : false,
"storageformat" : null
},
"nested" : {
"location" : "/mapr/demorig/data/nested",
"writable" : true,
"storageformat" : "parquet"
},
"flat" : {
"location" : "/mapr/demorig/data/flat",
"writable" : true,
"storageformat" : "parquet"
},
"views" : {
"location" : "/mapr/demorig/data/views",
"writable" : true,
"storageformat" : "parquet"
},
"yelp" : {
"location" : "/mapr/demorig/data/yelp",
"writable" : true,
"storageformat" : "json"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"selectionRoot" : "/mapr/demorig/data/nested/clicks/clicks.campaign.json",
"cost" : 7876.0
}, {
"pop" : "project",
"@id" : 5,
"exprs" : [ {
"ref" : "`trans_id`",
"expr" : "`trans_id`"
} ],
"child" : 7,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 7876.0
}, {
"pop" : "fs-scan",
"@id" : 6,
"files" : [ "maprfs:/mapr/demorig/data/nested/clicks/clicks.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/mapr/demorig/data",
"writable" : false,
"storageformat" : null
},
"nested" : {
"location" : "/mapr/demorig/data/nested",
"writable" : true,
"storageformat" : "parquet"
},
"flat" : {
"location" : "/mapr/demorig/data/flat",
"writable" : true,
"storageformat" : "parquet"
},
"views" : {
"location" : "/mapr/demorig/data/views",
"writable" : true,
"storageformat" : "parquet"
},
"yelp" : {
"location" : "/mapr/demorig/data/yelp",
"writable" : true,
"storageformat" : "json"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"selectionRoot" : "/mapr/demorig/data/nested/clicks/clicks.json",
"cost" : 5097.0
}, {
"pop" : "project",
"@id" : 4,
"exprs" : [ {
"ref" : "`trans_id`",
"expr" : "`trans_id`"
} ],
"child" : 6,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 5097.0
}, {
"pop" : "union-all",
"@id" : 3,
"children" : [ 5, 4 ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 12973.0
}, {
"pop" : "project",
"@id" : 2,
"exprs" : [ {
"ref" : "`$f0`",
"expr" : "0"
} ],
"child" : 3,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 12973.0
}, {
"pop" : "streaming-aggregate",
"@id" : 1,
"child" : 2,
"keys" : [ ],
"exprs" : [ {
"ref" : "`EXPR$0`",
"expr" : "count(1) "
} ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1.0
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1297.3
} ]
} |
+------------+------------+
1 row selected (0.142 seconds)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
|