drill-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From John Omernik <j...@omernik.com>
Subject View causing poor performance on row queries on MapR-DB (and perhaps HBASE)
Date Sat, 06 Feb 2016 22:02:16 GMT
Hey all, I was running some queries on a MapR-DB Table I have.  I created a
view to avoid forcing users to write queries that always included the
CONVERT_FROM statements. (I am a huge advocate of making things easy for
the the users and writing queries with CONVERT_FROM statements isn't easy).

I ran a query the other day on one of these views and noticed that a query
that took 30 seconds really shouldn't take 30 seconds.  What do I mean?
well I wanted to get part of a record by looking up the MapR-DB Row key
(equiv. to HBASE row key)  That should be an instant lookup.  Sure enough,
when I tried it in the hbase shell that returns instantly.  So why did
Drill take 30 seconds?  I shot an email to Ted and Jim at MapR to ask this
very question. Ted suggested that I try the query without a view.  Sure
enough, If I use the convert_from in a direct query, it's an instant (sub
second) return.  Thus it appears something in the view is not allowing the
query to short circuit the read.

Ted suggests I post here  (I am curious if anyone who has HBASE setup is
seeing this same issue with views) but also include the EXPLAIN plan.
Basically, using my very limited ability to read EXPLAIN plans (If someone
has a pointer to a blog post or docs on how to read EXPLAIN I would love
that!) it looks like in the view the startRow and stopRow in the
hbaseScanSpec are not set, seeming to cause a scan.  Is there any away to
assist the planner when running this through a view so that we can get the
performance of the query without the view but with the easy of
use/readability of using the view?

Thanks!!!

John

View Creation

CREATE VIEW view_testpaste as
SELECT
CONVERT_FROM(row_key, 'UTF8') AS pasteid,
CONVERT_FROM(pastes.pdata.lang, 'UTF8') AS lang,
CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
FROM dfs.`pastes`.`/pastes` pastes;


Select from view takes 32 seconds (seems to be a scan)

> select paste from view_testpaste where pasteid = 'djHEHcPM'

1 row selected (32.302 seconds)


Just a direct select returns very fast (0.486 seconds)

> select CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
FROM dfs.`pastes`.`/pastes` pastes where
CONVERT_FROM(row_key, 'UTF8') = 'djHEHcPM';

1 row selected (0.486 seconds)




EXPLAIN PLAN FOR select paste from view_testpaste where pasteid = 'djHEHcPM'

+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      UnionExchange
01-01        Project(paste=[CONVERT_FROMUTF8($1)])
01-02          SelectionVectorRemover
01-03            Filter(condition=[=(CONVERT_FROMUTF8($0), 'djHEHcPM')])
01-04              Project(row_key=[$1], ITEM=[ITEM($0, 'paste')])
01-05                Scan(groupscan=[MapRDBGroupScan
[HBaseScanSpec=HBaseScanSpec [tableName=maprfs:///data/pastebiner/pastes,
startRow=null, stopRow=null, filter=null], columns=[`row_key`,
`raw`.`paste`]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "maprdb-scan",
    "@id" : 65541,
    "userName" : "darkness",
    "hbaseScanSpec" : {
      "tableName" : "maprfs:///data/pastebiner/pastes",
      "startRow" : "",
      "stopRow" : "",
      "serializedFilter" : null
    },
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "defaultInputFormat" : null
        },
         "pastes" : {
          "location" : "/data/pastebiner",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dev" : {
          "location" : "/data/dev",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "hive" : {
          "location" : "/user/hive",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : null
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "escape" : "`",
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        },
        "maprdb" : {
          "type" : "maprdb"
        }
      }
    },
    "columns" : [ "`row_key`", "`raw`.`paste`" ],
    "cost" : 573950.0
  }, {
    "pop" : "project",
    "@id" : 65540,
    "exprs" : [ {
      "ref" : "`row_key`",
      "expr" : "`row_key`"
    }, {
      "ref" : "`ITEM`",
      "expr" : "`raw`.`paste`"
    } ],
    "child" : 65541,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 573950.0
  }, {
    "pop" : "filter",
    "@id" : 65539,
    "child" : 65540,
    "expr" : "equal(convert_fromutf8(`row_key`) , 'djHEHcPM') ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 86092.5
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 65538,
    "child" : 65539,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 86092.5
  }, {
    "pop" : "project",
    "@id" : 65537,
    "exprs" : [ {
      "ref" : "`paste`",
      "expr" : "convert_fromutf8(`ITEM`) "
    } ],
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 86092.5
  }, {
    "pop" : "union-exchange",
    "@id" : 1,
    "child" : 65537,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 86092.5
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 86092.5
  } ]
} |
+------+------+
1 row selected (0.42 seconds)



EXPLAIN PLAN FOR select CONVERT_FROM(pastes.raw.paste, 'UTF8') AS paste
FROM dfs.`pastes`.`/pastes` pastes where
CONVERT_FROM(row_key, 'UTF8') = 'djHEHcPM';

+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(paste=[CONVERT_FROMUTF8($1)])
00-02        Project(row_key=[$1], ITEM=[ITEM($0, 'paste')])
00-03          Scan(groupscan=[MapRDBGroupScan [HBaseScanSpec=HBaseScanSpec
[tableName=maprfs:///data/pastebiner/pastes, startRow=djHEHcPM,
stopRow=djHEHcPM\x00, filter=RowFilter (EQUAL, djHEHcPM)],
columns=[`row_key`, `raw`.`paste`]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "maprdb-scan",
    "@id" : 3,
    "userName" : "darkness",
    "hbaseScanSpec" : {
      "tableName" : "maprfs:///data/pastebiner/pastes",
      "startRow" : "ZGpIRUhjUE0=",
      "stopRow" : "ZGpIRUhjUE0A",
      "serializedFilter" :
"CihvcmcuYXBhY2hlLmhhZG9vcC5oYmFzZS5maWx0ZXIuUm93RmlsdGVyEkUKQwgCEj8KL29yZy5hcGFjaGUuaGFkb29wLmhiYXNlLmZpbHRlci5CaW5hcnlDb21wYXJhdG9yEgwKCgoIZGpIRUhjUE0="
    },
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "defaultInputFormat" : null
        },
        "pastes" : {
          "location" : "/data/pastebiner",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dev" : {
          "location" : "/data/dev",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "hive" : {
          "location" : "/user/hive",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : null
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "escape" : "`",
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        },
        "maprdb" : {
          "type" : "maprdb"
        }
      }
    },
    "columns" : [ "`row_key`", "`raw`.`paste`" ],
    "cost" : 286975.0
  }, {
    "pop" : "project",
    "@id" : 2,
    "exprs" : [ {
      "ref" : "`row_key`",
      "expr" : "`row_key`"
    }, {
      "ref" : "`ITEM`",
      "expr" : "`raw`.`paste`"
    } ],
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 286975.0
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`paste`",
      "expr" : "convert_fromutf8(`ITEM`) "
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 286975.0
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 286975.0
  } ]
} |

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message