drill-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Zelaine Fong <zf...@maprtech.com>
Subject Re: View causing poor performance on row queries on MapR-DB (and perhaps HBASE)
Date Sun, 07 Feb 2016 19:09:58 GMT
It looks like in the case of the non-view query, the WHERE clause is being
pushed down to MapR-DB.  The pushdown isn't happening in the case of the
view.  You can tell that's the case because in the case of the view query,
the explain plan has a Filter operation, whereas in the case of the
non-view query you do.  As you noted, in the case of the non-view, the Scan
has a startRow/stopRow/filter, which corresponds to the pushdown of the
filter.

I'm wondering if the problem is related to the CONVERT_FROM() in the WHERE
clause, or if all filters on views aren't being pushed down?

-- Zelaine

On Sat, Feb 6, 2016 at 2:02 PM, John Omernik <john@omernik.com> wrote:

> Hey all, I was running some queries on a MapR-DB Table I have.  I created a
> view to avoid forcing users to write queries that always included the
> CONVERT_FROM statements. (I am a huge advocate of making things easy for
> the the users and writing queries with CONVERT_FROM statements isn't easy).
>
> I ran a query the other day on one of these views and noticed that a query
> that took 30 seconds really shouldn't take 30 seconds.  What do I mean?
> well I wanted to get part of a record by looking up the MapR-DB Row key
> (equiv. to HBASE row key)  That should be an instant lookup.  Sure enough,
> when I tried it in the hbase shell that returns instantly.  So why did
> Drill take 30 seconds?  I shot an email to Ted and Jim at MapR to ask this
> very question. Ted suggested that I try the query without a view.  Sure
> enough, If I use the convert_from in a direct query, it's an instant (sub
> second) return.  Thus it appears something in the view is not allowing the
> query to short circuit the read.
>
> Ted suggests I post here  (I am curious if anyone who has HBASE setup is
> seeing this same issue with views) but also include the EXPLAIN plan.
> Basically, using my very limited ability to read EXPLAIN plans (If someone
> has a pointer to a blog post or docs on how to read EXPLAIN I would love
> that!) it looks like in the view the startRow and stopRow in the
> hbaseScanSpec are not set, seeming to cause a scan.  Is there any away to
> assist the planner when running this through a view so that we can get the
> performance of the query without the view but with the easy of
> use/readability of using the view?
>
> Thanks!!!
>
> John
>
> View Creation
>
> CREATE VIEW view_testpaste as
> SELECT
> CONVERT_FROM(row_key, 'UTF8') AS pasteid,
> CONVERT_FROM(pastes.pdata.lang, 'UTF8') AS lang,
> CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
> FROM dfs.`pastes`.`/pastes` pastes;
>
>
> Select from view takes 32 seconds (seems to be a scan)
>
> > select paste from view_testpaste where pasteid = 'djHEHcPM'
>
> 1 row selected (32.302 seconds)
>
>
> Just a direct select returns very fast (0.486 seconds)
>
> > select CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
> FROM dfs.`pastes`.`/pastes` pastes where
> CONVERT_FROM(row_key, 'UTF8') = 'djHEHcPM';
>
> 1 row selected (0.486 seconds)
>
>
>
>
> EXPLAIN PLAN FOR select paste from view_testpaste where pasteid =
> 'djHEHcPM'
>
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      UnionExchange
> 01-01        Project(paste=[CONVERT_FROMUTF8($1)])
> 01-02          SelectionVectorRemover
> 01-03            Filter(condition=[=(CONVERT_FROMUTF8($0), 'djHEHcPM')])
> 01-04              Project(row_key=[$1], ITEM=[ITEM($0, 'paste')])
> 01-05                Scan(groupscan=[MapRDBGroupScan
> [HBaseScanSpec=HBaseScanSpec [tableName=maprfs:///data/pastebiner/pastes,
> startRow=null, stopRow=null, filter=null], columns=[`row_key`,
> `raw`.`paste`]]])
>  | {
>   "head" : {
>     "version" : 1,
>     "generator" : {
>       "type" : "ExplainHandler",
>       "info" : ""
>     },
>     "type" : "APACHE_DRILL_PHYSICAL",
>     "options" : [ ],
>     "queue" : 0,
>     "resultMode" : "EXEC"
>   },
>   "graph" : [ {
>     "pop" : "maprdb-scan",
>     "@id" : 65541,
>     "userName" : "darkness",
>     "hbaseScanSpec" : {
>       "tableName" : "maprfs:///data/pastebiner/pastes",
>       "startRow" : "",
>       "stopRow" : "",
>       "serializedFilter" : null
>     },
>     "storage" : {
>       "type" : "file",
>       "enabled" : true,
>       "connection" : "maprfs:///",
>       "workspaces" : {
>         "root" : {
>           "location" : "/",
>           "writable" : false,
>           "defaultInputFormat" : null
>         },
>          "pastes" : {
>           "location" : "/data/pastebiner",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "dev" : {
>           "location" : "/data/dev",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "hive" : {
>           "location" : "/user/hive",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "tmp" : {
>           "location" : "/tmp",
>           "writable" : true,
>           "defaultInputFormat" : null
>         }
>       },
>       "formats" : {
>         "psv" : {
>           "type" : "text",
>           "extensions" : [ "tbl" ],
>           "delimiter" : "|"
>         },
>         "csv" : {
>           "type" : "text",
>           "extensions" : [ "csv" ],
>           "escape" : "`",
>           "delimiter" : ","
>         },
>         "tsv" : {
>           "type" : "text",
>           "extensions" : [ "tsv" ],
>           "delimiter" : "\t"
>         },
>         "parquet" : {
>           "type" : "parquet"
>         },
>         "json" : {
>           "type" : "json"
>         },
>         "maprdb" : {
>           "type" : "maprdb"
>         }
>       }
>     },
>     "columns" : [ "`row_key`", "`raw`.`paste`" ],
>     "cost" : 573950.0
>   }, {
>     "pop" : "project",
>     "@id" : 65540,
>     "exprs" : [ {
>       "ref" : "`row_key`",
>       "expr" : "`row_key`"
>     }, {
>       "ref" : "`ITEM`",
>       "expr" : "`raw`.`paste`"
>     } ],
>     "child" : 65541,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 573950.0
>   }, {
>     "pop" : "filter",
>     "@id" : 65539,
>     "child" : 65540,
>     "expr" : "equal(convert_fromutf8(`row_key`) , 'djHEHcPM') ",
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 86092.5
>   }, {
>     "pop" : "selection-vector-remover",
>     "@id" : 65538,
>     "child" : 65539,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 86092.5
>   }, {
>     "pop" : "project",
>     "@id" : 65537,
>     "exprs" : [ {
>       "ref" : "`paste`",
>       "expr" : "convert_fromutf8(`ITEM`) "
>     } ],
>     "child" : 65538,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 86092.5
>   }, {
>     "pop" : "union-exchange",
>     "@id" : 1,
>     "child" : 65537,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 86092.5
>   }, {
>     "pop" : "screen",
>     "@id" : 0,
>     "child" : 1,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 86092.5
>   } ]
> } |
> +------+------+
> 1 row selected (0.42 seconds)
>
>
>
> EXPLAIN PLAN FOR select CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
> FROM dfs.`pastes`.`/pastes` pastes where
> CONVERT_FROM(row_key, 'UTF8') = 'djHEHcPM';
>
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      Project(paste=[CONVERT_FROMUTF8($1)])
> 00-02        Project(row_key=[$1], ITEM=[ITEM($0, 'paste')])
> 00-03          Scan(groupscan=[MapRDBGroupScan [HBaseScanSpec=HBaseScanSpec
> [tableName=maprfs:///data/pastebiner/pastes, startRow=djHEHcPM,
> stopRow=djHEHcPM\x00, filter=RowFilter (EQUAL, djHEHcPM)],
> columns=[`row_key`, `raw`.`paste`]]])
>  | {
>   "head" : {
>     "version" : 1,
>     "generator" : {
>       "type" : "ExplainHandler",
>       "info" : ""
>     },
>     "type" : "APACHE_DRILL_PHYSICAL",
>     "options" : [ ],
>     "queue" : 0,
>     "resultMode" : "EXEC"
>   },
>   "graph" : [ {
>     "pop" : "maprdb-scan",
>     "@id" : 3,
>     "userName" : "darkness",
>     "hbaseScanSpec" : {
>       "tableName" : "maprfs:///data/pastebiner/pastes",
>       "startRow" : "ZGpIRUhjUE0=",
>       "stopRow" : "ZGpIRUhjUE0A",
>       "serializedFilter" :
>
> "CihvcmcuYXBhY2hlLmhhZG9vcC5oYmFzZS5maWx0ZXIuUm93RmlsdGVyEkUKQwgCEj8KL29yZy5hcGFjaGUuaGFkb29wLmhiYXNlLmZpbHRlci5CaW5hcnlDb21wYXJhdG9yEgwKCgoIZGpIRUhjUE0="
>     },
>     "storage" : {
>       "type" : "file",
>       "enabled" : true,
>       "connection" : "maprfs:///",
>       "workspaces" : {
>         "root" : {
>           "location" : "/",
>           "writable" : false,
>           "defaultInputFormat" : null
>         },
>         "pastes" : {
>           "location" : "/data/pastebiner",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "dev" : {
>           "location" : "/data/dev",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "hive" : {
>           "location" : "/user/hive",
>           "writable" : true,
>           "defaultInputFormat" : null
>         },
>         "tmp" : {
>           "location" : "/tmp",
>           "writable" : true,
>           "defaultInputFormat" : null
>         }
>       },
>       "formats" : {
>         "psv" : {
>           "type" : "text",
>           "extensions" : [ "tbl" ],
>           "delimiter" : "|"
>         },
>         "csv" : {
>           "type" : "text",
>           "extensions" : [ "csv" ],
>           "escape" : "`",
>           "delimiter" : ","
>         },
>         "tsv" : {
>           "type" : "text",
>           "extensions" : [ "tsv" ],
>           "delimiter" : "\t"
>         },
>         "parquet" : {
>           "type" : "parquet"
>         },
>         "json" : {
>           "type" : "json"
>         },
>         "maprdb" : {
>           "type" : "maprdb"
>         }
>       }
>     },
>     "columns" : [ "`row_key`", "`raw`.`paste`" ],
>     "cost" : 286975.0
>   }, {
>     "pop" : "project",
>     "@id" : 2,
>     "exprs" : [ {
>       "ref" : "`row_key`",
>       "expr" : "`row_key`"
>     }, {
>       "ref" : "`ITEM`",
>       "expr" : "`raw`.`paste`"
>     } ],
>     "child" : 3,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 286975.0
>   }, {
>     "pop" : "project",
>     "@id" : 1,
>     "exprs" : [ {
>       "ref" : "`paste`",
>       "expr" : "convert_fromutf8(`ITEM`) "
>     } ],
>     "child" : 2,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 286975.0
>   }, {
>     "pop" : "screen",
>     "@id" : 0,
>     "child" : 1,
>     "initialAllocation" : 1000000,
>     "maxAllocation" : 10000000000,
>     "cost" : 286975.0
>   } ]
> } |
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message