drill-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Nitin Pawar <nitinpawar...@gmail.com>
Subject Re: This query is so slow
Date Tue, 07 Aug 2018 18:00:25 GMT
Depending how many unique keys are there it can cause issues

can you try following query with tweaking settings around hash_join

select * from dfs.`/iswdata/rj/201805` t1 join (select unique_key,
count(unique_key) as total from dfs.`/iswdata/rj/201805` group by
unique_key)t on t1.unique_key = t.unique_key where t.total > 1

On Tue, Aug 7, 2018 at 9:23 PM, Peter Edike <
peter.edike@interswitchgroup.com> wrote:

> Did an explain plan on the query since the query never completes
> successfully (I can't see the profile because of that), Here is the result
> of the explain plan query
>
> "head":{
>    "version":1,
>    "generator":{
>       "type":"ExplainHandler",
>       "info":""
>    },
>    "type":"APACHE_DRILL_PHYSICAL",
>    "options":[
>
>    ],
>    "queue":0,
>    "hasResourcePlan":false,
>    "resultMode":"EXEC"
> },
> "graph":[
>    {
>       "pop":"parquet-scan",
>       "@id":327683,
>       "userName":"mapr",
>       "entries":[
>          {
>             "path":"maprfs:///iswdata/rj"
>          }
>       ],
>       "storage":{
>          "type":"file",
>          "enabled":true,
>          "connection":"maprfs:///",
>          "config":null,
>          "workspaces":{
>             "root":{
>                "location":"/",
>                "writable":false,
>                "defaultInputFormat":null,
>                "allowAccessOutsideWorkspace":false
>             },
>             "tmp":{
>                "location":"/tmp",
>                "writable":true,
>                "defaultInputFormat":null,
>                "allowAccessOutsideWorkspace":false
>             }
>          },
>          "formats":{
>             "psv":{
>                "type":"text",
>                "extensions":[
>                   "tbl"
>                ],
>                "delimiter":"|"
>             },
>             "csv":{
>                "type":"text",
>                "extensions":[
>                   "csv"
>                ],
>                "delimiter":","
>             },
>             "tsv":{
>                "type":"text",
>                "extensions":[
>                   "tsv"
>                ],
>                "delimiter":"\t"
>             },
>             "parquet":{
>                "type":"parquet"
>             },
>             "json":{
>                "type":"json",
>                "extensions":[
>                   "json"
>                ]
>             },
>             "maprdb":{
>                "type":"maprdb"
>             }
>          }
>       },
>       "format":{
>          "type":"parquet"
>       },
>       "columns":[
>          "`unique_key`"
>       ],
>       "selectionRoot":"maprfs:/iswdata/rj",
>       "filter":"true",
>       "fileSet":[
>          "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
>          "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
>          "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
>          "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
>          "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
>          "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
>          "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
>          "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
>          "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
>       ],
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"hash-aggregate",
>       "@id":327682,
>       "child":327683,
>       "cardinality":1.0,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "aggPhase":"PHASE_1of2",
>       "groupByExprs":[
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "aggrExprs":[
>          {
>             "ref":"`$f1`",
>             "expr":"count(`unique_key`) "
>          }
>       ],
>       "cost":2.36912098E8
>    },
>    {
>       "pop":"project",
>       "@id":327681,
>       "exprs":[
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          },
>          {
>             "ref":"`$f1`",
>             "expr":"`$f1`"
>          },
>          {
>             "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
>             "expr":"hash32asdouble(`unique_key`, 1301011) "
>          }
>       ],
>       "child":327682,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E7
>    },
>    {
>       "pop":"unordered-mux-exchange",
>       "@id":196609,
>       "child":327681,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E7
>    },
>    {
>       "pop":"hash-to-random-exchange",
>       "@id":65548,
>       "child":196609,
>       "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E7
>    },
>    {
>       "pop":"project",
>       "@id":65547,
>       "exprs":[
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          },
>          {
>             "ref":"`$f1`",
>             "expr":"`$f1`"
>          }
>       ],
>       "child":65548,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E7
>    },
>    {
>       "pop":"hash-aggregate",
>       "@id":65546,
>       "child":65547,
>       "cardinality":1.0,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "aggPhase":"PHASE_2of2",
>       "groupByExprs":[
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "aggrExprs":[
>          {
>             "ref":"`$f1`",
>             "expr":"$sum0(`$f1`) "
>          }
>       ],
>       "cost":2.36912098E7
>    },
>    {
>       "pop":"filter",
>       "@id":65545,
>       "child":65546,
>       "expr":"greater_than(`$f1`, 1) ",
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":2369120.98
>    },
>    {
>       "pop":"selection-vector-remover",
>       "@id":65544,
>       "child":65545,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":2369120.98
>    },
>    {
>       "pop":"project",
>       "@id":65542,
>       "exprs":[
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "child":65544,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":2369120.98
>    },
>    {
>       "pop":"project",
>       "@id":65540,
>       "exprs":[
>          {
>             "ref":"`unique_key0`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "child":65542,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":2369120.98
>    },
>    {
>       "pop":"parquet-scan",
>       "@id":262147,
>       "userName":"mapr",
>       "entries":[
>          {
>             "path":"maprfs:///iswdata/rj"
>          }
>       ],
>       "storage":{
>          "type":"file",
>          "enabled":true,
>          "connection":"maprfs:///",
>          "config":null,
>          "workspaces":{
>             "root":{
>                "location":"/",
>                "writable":false,
>                "defaultInputFormat":null,
>                "allowAccessOutsideWorkspace":false
>             },
>             "tmp":{
>                "location":"/tmp",
>                "writable":true,
>                "defaultInputFormat":null,
>                "allowAccessOutsideWorkspace":false
>             }
>          },
>          "formats":{
>             "psv":{
>                "type":"text",
>                "extensions":[
>                   "tbl"
>                ],
>                "delimiter":"|"
>             },
>             "csv":{
>                "type":"text",
>                "extensions":[
>                   "csv"
>                ],
>                "delimiter":","
>             },
>             "tsv":{
>                "type":"text",
>                "extensions":[
>                   "tsv"
>                ],
>                "delimiter":"\t"
>             },
>             "parquet":{
>                "type":"parquet"
>             },
>             "json":{
>                "type":"json",
>                "extensions":[
>                   "json"
>                ]
>             },
>             "maprdb":{
>                "type":"maprdb"
>             }
>          }
>       },
>       "format":{
>          "type":"parquet"
>       },
>       "columns":[
>          "`**`"
>       ],
>       "selectionRoot":"maprfs:/iswdata/rj",
>       "filter":"true",
>       "fileSet":[
>          "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
>          "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
>          "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
>          "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
>          "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
>          "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
>          "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
>          "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
>          "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
>       ],
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"project",
>       "@id":262146,
>       "exprs":[
>          {
>             "ref":"`T0¦¦**`",
>             "expr":"`**`"
>          },
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "child":262147,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"project",
>       "@id":262145,
>       "exprs":[
>          {
>             "ref":"`T0¦¦**`",
>             "expr":"`T0¦¦**`"
>          },
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          },
>          {
>             "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
>             "expr":"hash32asdouble(`unique_key`, 1301011) "
>          }
>       ],
>       "child":262146,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"unordered-mux-exchange",
>       "@id":131073,
>       "child":262145,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"hash-to-random-exchange",
>       "@id":65543,
>       "child":131073,
>       "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"project",
>       "@id":65541,
>       "exprs":[
>          {
>             "ref":"`T0¦¦**`",
>             "expr":"`T0¦¦**`"
>          },
>          {
>             "ref":"`unique_key`",
>             "expr":"`unique_key`"
>          }
>       ],
>       "child":65543,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"hash-join",
>       "@id":65539,
>       "left":65541,
>       "right":65540,
>       "conditions":[
>          {
>             "relationship":"EQUALS",
>             "left":"`unique_key`",
>             "right":"`unique_key0`"
>          }
>       ],
>       "joinType":"INNER",
>       "isRowKeyJoin":false,
>       "joinControl":0,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":4.73824196E8
>    },
>    {
>       "pop":"limit",
>       "@id":65538,
>       "child":65539,
>       "first":0,
>       "last":40,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"selection-vector-remover",
>       "@id":65537,
>       "child":65538,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"union-exchange",
>       "@id":5,
>       "child":65537,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"limit",
>       "@id":4,
>       "child":5,
>       "first":0,
>       "last":40,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"selection-vector-remover",
>       "@id":3,
>       "child":4,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"project",
>       "@id":2,
>       "exprs":[
>          {
>             "ref":"`T0¦¦**`",
>             "expr":"`T0¦¦**`"
>          }
>       ],
>       "child":3,
>       "outputProj":true,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"project",
>       "@id":1,
>       "exprs":[
>          {
>             "ref":"`**`",
>             "expr":"`T0¦¦**`"
>          }
>       ],
>       "child":2,
>       "outputProj":false,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    },
>    {
>       "pop":"screen",
>       "@id":0,
>       "child":1,
>       "initialAllocation":1000000,
>       "maxAllocation":10000000000,
>       "cost":40.0
>    }
> ]
> }
>
> Please what exactly Am I looking for as to why it never completes. I can
> see outofmemory errors but then Drill is running with 16gb on each node and
> there are five nodes in the cluster
>
> Best regards,
> Peter Edike
>
> Senior Software Engineer
> Interswitch
>
> Tel.  | Mobile.  | IP Phone.
> Fax.  | mailto:peter.edike@interswitchgroup.com | http://
>
> http://www.interswitchgroup.com
>
> InterswitchThis e-mail and all attachments transmitted with it remain the
> property of InterSwitch Limited , the information contained herein  are
> private  confidential and intended solely for the use of the addressee. If
> you have received this e-mail in error, kindly notify the sender. If you
> are not the addressee, you should not disseminate, distribute or copy this
> e-mail. Kindly notify InterSwitch immediately by email if you have received
> this email in error and delete this email and any attachment from your
> system  Emails cannot be guaranteed to be secure or error free as the
> message and any attachments could be intercepted, corrupted, lost, delayed,
> incomplete or amended. the contents of this email or its attachments have
> been scanned for all viruses and all reasonable measures have been taken to
> ensure that no viruses are present.  InterSwitch Limited and its
> subsidiaries do not accept liability for damage caused by this email or any
> attachments.
>
>
>
> This message has been marked as CONFIDENTIAL on Tuesday, August 7, 2018 @
> 4:54:16 PM
>
> -----Original Message-----
> From: Kunal Khatua <kunal@apache.org>
> Sent: Monday, August 6, 2018 6:41 PM
> To: user@drill.apache.org
> Subject: Re: This query is so slow
>
> Hi Peter
>
> What does the profile for the query indicate?
>
> Take a look at the operator overview. It will indicate which operator is
> using the most CPU cycles. If the average and max processing times vary
> wildly, it might be a problem of skew, where some fragments are doing a
> relatively excessive work.
> If that is the case, within that operator's profile segment, you can see
> the distribution of the fragments and see which ones are the long pole in
> your run.
>
> ~ KK
>
> On 8/6/2018 9:01:25 AM, Peter Edike <peter.edike@interswitchgroup.com>
> wrote:
> Hello all,
>
> The following query takes ages to complete in drill and more often that
> not, fails
>
> select * from dfs.`/iswdata/rj/201805` where unique_key in
>     (
>          select unique_key from dfs.`/iswdata/rj/201805` group by
> unique_key having count(unique_key) > 1
>     )
> limit 40
>
> Please what can I do to improve the performance of this query
>
> Kind Regards
>
> Peter Edike
> Senior Software Engineer
> Research and Development, ENG
> Engineering
>
> Office  NO:
> Mobile NO:
> Email: peter.edike@interswitchgroup.com [mailto:peter.edike@
> interswitchgroup.com]
> Interswitch Limited
> 1648C Oko-Awo Street, Victoria Island Lagos Customer Contact Centre
> 0700-9065000 ü http://www.interswitchgroup.com [
> http://www.interswitchgroup.com/]
> [https://www.quickteller.com/loan-request]
> This e-mail and all  attachments transmitted with it remain the property
> of Interswitch Limited , the information contained herein  are private
> confidential and intended solely for the use of the addressee. If you have
> received this e-mail in error, kindly notify the sender. If you are not the
> addressee, you should not disseminate, distribute or copy this e-mail.
> Kindly notify Interswitch immediately by email if you have received this
> email in error and delete this email and any attachment from your system
> Emails cannot be guaranteed to be secure or error free as the message and
> any attachments could be intercepted, corrupted, lost, delayed, incomplete
> or amended. the contents of this email or its attachments have been scanned
> for all viruses and all reasonable measures have been taken to ensure that
> no viruses are present. Interswitch Limited and its subsidiaries do not
> accept liability for damage caused by this email or any attachments.
>



-- 
Nitin Pawar

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message