drill-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Peter Edike <peter.ed...@interswitchgroup.com>
Subject RE: This query is so slow
Date Tue, 07 Aug 2018 15:53:57 GMT
Did an explain plan on the query since the query never completes successfully (I can't see
the profile because of that), Here is the result of the explain plan query 

"head":{  
   "version":1,
   "generator":{  
      "type":"ExplainHandler",
      "info":""
   },
   "type":"APACHE_DRILL_PHYSICAL",
   "options":[  

   ],
   "queue":0,
   "hasResourcePlan":false,
   "resultMode":"EXEC"
},
"graph":[  
   {  
      "pop":"parquet-scan",
      "@id":327683,
      "userName":"mapr",
      "entries":[  
         {  
            "path":"maprfs:///iswdata/rj"
         }
      ],
      "storage":{  
         "type":"file",
         "enabled":true,
         "connection":"maprfs:///",
         "config":null,
         "workspaces":{  
            "root":{  
               "location":"/",
               "writable":false,
               "defaultInputFormat":null,
               "allowAccessOutsideWorkspace":false
            },
            "tmp":{  
               "location":"/tmp",
               "writable":true,
               "defaultInputFormat":null,
               "allowAccessOutsideWorkspace":false
            }
         },
         "formats":{  
            "psv":{  
               "type":"text",
               "extensions":[  
                  "tbl"
               ],
               "delimiter":"|"
            },
            "csv":{  
               "type":"text",
               "extensions":[  
                  "csv"
               ],
               "delimiter":","
            },
            "tsv":{  
               "type":"text",
               "extensions":[  
                  "tsv"
               ],
               "delimiter":"\t"
            },
            "parquet":{  
               "type":"parquet"
            },
            "json":{  
               "type":"json",
               "extensions":[  
                  "json"
               ]
            },
            "maprdb":{  
               "type":"maprdb"
            }
         }
      },
      "format":{  
         "type":"parquet"
      },
      "columns":[  
         "`unique_key`"
      ],
      "selectionRoot":"maprfs:/iswdata/rj",
      "filter":"true",
      "fileSet":[  
         "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
         "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
         "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
         "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
         "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
         "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
         "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
         "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
         "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
      ],
      "cost":4.73824196E8
   },
   {  
      "pop":"hash-aggregate",
      "@id":327682,
      "child":327683,
      "cardinality":1.0,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "aggPhase":"PHASE_1of2",
      "groupByExprs":[  
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         }
      ],
      "aggrExprs":[  
         {  
            "ref":"`$f1`",
            "expr":"count(`unique_key`) "
         }
      ],
      "cost":2.36912098E8
   },
   {  
      "pop":"project",
      "@id":327681,
      "exprs":[  
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         },
         {  
            "ref":"`$f1`",
            "expr":"`$f1`"
         },
         {  
            "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
            "expr":"hash32asdouble(`unique_key`, 1301011) "
         }
      ],
      "child":327682,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E7
   },
   {  
      "pop":"unordered-mux-exchange",
      "@id":196609,
      "child":327681,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E7
   },
   {  
      "pop":"hash-to-random-exchange",
      "@id":65548,
      "child":196609,
      "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E7
   },
   {  
      "pop":"project",
      "@id":65547,
      "exprs":[  
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         },
         {  
            "ref":"`$f1`",
            "expr":"`$f1`"
         }
      ],
      "child":65548,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E7
   },
   {  
      "pop":"hash-aggregate",
      "@id":65546,
      "child":65547,
      "cardinality":1.0,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "aggPhase":"PHASE_2of2",
      "groupByExprs":[  
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         }
      ],
      "aggrExprs":[  
         {  
            "ref":"`$f1`",
            "expr":"$sum0(`$f1`) "
         }
      ],
      "cost":2.36912098E7
   },
   {  
      "pop":"filter",
      "@id":65545,
      "child":65546,
      "expr":"greater_than(`$f1`, 1) ",
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":2369120.98
   },
   {  
      "pop":"selection-vector-remover",
      "@id":65544,
      "child":65545,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":2369120.98
   },
   {  
      "pop":"project",
      "@id":65542,
      "exprs":[  
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         }
      ],
      "child":65544,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":2369120.98
   },
   {  
      "pop":"project",
      "@id":65540,
      "exprs":[  
         {  
            "ref":"`unique_key0`",
            "expr":"`unique_key`"
         }
      ],
      "child":65542,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":2369120.98
   },
   {  
      "pop":"parquet-scan",
      "@id":262147,
      "userName":"mapr",
      "entries":[  
         {  
            "path":"maprfs:///iswdata/rj"
         }
      ],
      "storage":{  
         "type":"file",
         "enabled":true,
         "connection":"maprfs:///",
         "config":null,
         "workspaces":{  
            "root":{  
               "location":"/",
               "writable":false,
               "defaultInputFormat":null,
               "allowAccessOutsideWorkspace":false
            },
            "tmp":{  
               "location":"/tmp",
               "writable":true,
               "defaultInputFormat":null,
               "allowAccessOutsideWorkspace":false
            }
         },
         "formats":{  
            "psv":{  
               "type":"text",
               "extensions":[  
                  "tbl"
               ],
               "delimiter":"|"
            },
            "csv":{  
               "type":"text",
               "extensions":[  
                  "csv"
               ],
               "delimiter":","
            },
            "tsv":{  
               "type":"text",
               "extensions":[  
                  "tsv"
               ],
               "delimiter":"\t"
            },
            "parquet":{  
               "type":"parquet"
            },
            "json":{  
               "type":"json",
               "extensions":[  
                  "json"
               ]
            },
            "maprdb":{  
               "type":"maprdb"
            }
         }
      },
      "format":{  
         "type":"parquet"
      },
      "columns":[  
         "`**`"
      ],
      "selectionRoot":"maprfs:/iswdata/rj",
      "filter":"true",
      "fileSet":[  
         "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
         "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
         "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
         "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
         "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
         "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
         "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
         "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
         "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
      ],
      "cost":4.73824196E8
   },
   {  
      "pop":"project",
      "@id":262146,
      "exprs":[  
         {  
            "ref":"`T0¦¦**`",
            "expr":"`**`"
         },
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         }
      ],
      "child":262147,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"project",
      "@id":262145,
      "exprs":[  
         {  
            "ref":"`T0¦¦**`",
            "expr":"`T0¦¦**`"
         },
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         },
         {  
            "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
            "expr":"hash32asdouble(`unique_key`, 1301011) "
         }
      ],
      "child":262146,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"unordered-mux-exchange",
      "@id":131073,
      "child":262145,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"hash-to-random-exchange",
      "@id":65543,
      "child":131073,
      "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"project",
      "@id":65541,
      "exprs":[  
         {  
            "ref":"`T0¦¦**`",
            "expr":"`T0¦¦**`"
         },
         {  
            "ref":"`unique_key`",
            "expr":"`unique_key`"
         }
      ],
      "child":65543,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"hash-join",
      "@id":65539,
      "left":65541,
      "right":65540,
      "conditions":[  
         {  
            "relationship":"EQUALS",
            "left":"`unique_key`",
            "right":"`unique_key0`"
         }
      ],
      "joinType":"INNER",
      "isRowKeyJoin":false,
      "joinControl":0,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":4.73824196E8
   },
   {  
      "pop":"limit",
      "@id":65538,
      "child":65539,
      "first":0,
      "last":40,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"selection-vector-remover",
      "@id":65537,
      "child":65538,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"union-exchange",
      "@id":5,
      "child":65537,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"limit",
      "@id":4,
      "child":5,
      "first":0,
      "last":40,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"selection-vector-remover",
      "@id":3,
      "child":4,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"project",
      "@id":2,
      "exprs":[  
         {  
            "ref":"`T0¦¦**`",
            "expr":"`T0¦¦**`"
         }
      ],
      "child":3,
      "outputProj":true,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"project",
      "@id":1,
      "exprs":[  
         {  
            "ref":"`**`",
            "expr":"`T0¦¦**`"
         }
      ],
      "child":2,
      "outputProj":false,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   },
   {  
      "pop":"screen",
      "@id":0,
      "child":1,
      "initialAllocation":1000000,
      "maxAllocation":10000000000,
      "cost":40.0
   }
]
}

Please what exactly Am I looking for as to why it never completes. I can see outofmemory errors
but then Drill is running with 16gb on each node and there are five nodes in the cluster

Best regards,
Peter Edike

Senior Software Engineer
Interswitch

Tel.  | Mobile.  | IP Phone. 
Fax.  | mailto:peter.edike@interswitchgroup.com | http://

http://www.interswitchgroup.com

InterswitchThis e-mail and all attachments transmitted with it remain the property of InterSwitch
Limited , the information contained herein  are private  confidential and intended solely
for the use of the addressee. If you have received this e-mail in error, kindly notify the
sender. If you are not the addressee, you should not disseminate, distribute or copy this
e-mail. Kindly notify InterSwitch immediately by email if you have received this email in
error and delete this email and any attachment from your system  Emails cannot be guaranteed
to be secure or error free as the message and any attachments could be intercepted, corrupted,
lost, delayed, incomplete or amended. the contents of this email or its attachments have been
scanned for all viruses and all reasonable measures have been taken to ensure that no viruses
are present.  InterSwitch Limited and its subsidiaries do not accept liability for damage
caused by this email or any attachments.



This message has been marked as CONFIDENTIAL on Tuesday, August 7, 2018 @ 4:54:16 PM

-----Original Message-----
From: Kunal Khatua <kunal@apache.org> 
Sent: Monday, August 6, 2018 6:41 PM
To: user@drill.apache.org
Subject: Re: This query is so slow

Hi Peter

What does the profile for the query indicate?

Take a look at the operator overview. It will indicate which operator is using the most CPU
cycles. If the average and max processing times vary wildly, it might be a problem of skew,
where some fragments are doing a relatively excessive work.
If that is the case, within that operator's profile segment, you can see the distribution
of the fragments and see which ones are the long pole in your run.

~ KK 

On 8/6/2018 9:01:25 AM, Peter Edike <peter.edike@interswitchgroup.com> wrote:
Hello all,
 
The following query takes ages to complete in drill and more often that not, fails
 
select * from dfs.`/iswdata/rj/201805` where unique_key in
    (
         select unique_key from dfs.`/iswdata/rj/201805` group by unique_key having
count(unique_key) > 1
    )
limit 40
 
Please what can I do to improve the performance of this query
 
Kind Regards
 
Peter Edike
Senior Software Engineer
Research and Development, ENG
Engineering

Office  NO:
Mobile NO:
Email: peter.edike@interswitchgroup.com [mailto:peter.edike@interswitchgroup.com]
Interswitch Limited
1648C Oko-Awo Street, Victoria Island Lagos Customer Contact Centre 0700-9065000 ü http://www.interswitchgroup.com
[http://www.interswitchgroup.com/]
[https://www.quickteller.com/loan-request]
This e-mail and all  attachments transmitted with it remain the property of Interswitch Limited
, the information contained herein  are private  confidential and intended solely for the
use of the addressee. If you have received this e-mail in error, kindly notify the sender.
If you are not the addressee, you should not disseminate, distribute or copy this e-mail.
Kindly notify Interswitch immediately by email if you have received this email in error and
delete this email and any attachment from your system  Emails cannot be guaranteed to be
secure or error free as the message and any attachments could be intercepted, corrupted, lost,
delayed, incomplete or amended. the contents of this email or its attachments have been scanned
for all viruses and all reasonable measures have been taken to ensure that no viruses are
present. Interswitch Limited and its subsidiaries do not accept liability for damage caused
by this email or any attachments.
Mime
View raw message