drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Neeraja (JIRA)" <j...@apache.org>
Subject [jira] [Created] (DRILL-1396) Query with EXISTS clause and correlation fails
Date Wed, 10 Sep 2014 13:46:28 GMT
Neeraja created DRILL-1396:
------------------------------

             Summary: Query with EXISTS clause and correlation fails
                 Key: DRILL-1396
                 URL: https://issues.apache.org/jira/browse/DRILL-1396
             Project: Apache Drill
          Issue Type: Bug
            Reporter: Neeraja
            Priority: Critical


The following query fails.
//Get the clickstream activity for for all the customers who have order total >100

select t.trans_info.purch_flag,
 t.user_info.cust_id, t.trans_info.prod_id
from `Clickstream.clicks`.`/json/clicks.json` t 
where  exists (select * from hive.orders o where o.cust_id = t.user_info.cust_id and o.order_total
> 100)

Query failed: Failure while running fragment. Failure finding function that runtime code generation
expected.  Signature: compare_to( MAP:REQUIREDMAP:REQUIRED,  ) returns INT:REQUIRED [d6401ddd-f9bc-496d-ae0c-b5cde35bf289]


Below is the explain plan:


+------------+------------+
|    text    |    json    |
+------------+------------+
| 00-00    Screen
00-01      Project(EXPR$0=[$0], EXPR$1=[$1], EXPR$2=[$2])
00-02        Project(EXPR$0=[ITEM($2, 'purch_flag')], EXPR$1=[ITEM($1, 'cust_id')], EXPR$2=[ITEM($2,
'prod_id')])
00-03          SelectionVectorRemover
00-04            Filter(condition=[IS TRUE($4)])
00-05              HashJoin(condition=[=($1, $3)], joinType=[left])
00-07                Project(T24¦¦*=[$0], T24¦¦user_info=[$1], T24¦¦trans_info=[$2])
00-09                  Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/my.cluster.com/demo/clicks/json/clicks.json,
columns = null]])
00-06                HashAgg(group=[{0}], agg#0=[MIN($1)])
00-08                  Project(T25¦¦user_info=[$1], $f0=[true])
00-10                    HashJoin(condition=[=($0, $2)], joinType=[inner])
00-12                      Project($f7=[CAST($0):ANY])
00-14                        SelectionVectorRemover
00-16                          Filter(condition=[>($1, 100)])
00-18                            Project(cust_id=[$1], order_total=[$0])
00-20                              Scan(groupscan=[HiveScan [table=Table(tableName:orders,
dbName:default, owner:root, createTime:1409956843, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:order_id,
type:bigint, comment:null), FieldSchema(name:month, type:string, comment:null), FieldSchema(name:purchdate,
type:timestamp, comment:null), FieldSchema(name:cust_id, type:bigint, comment:null), FieldSchema(name:state,
type:string, comment:null), FieldSchema(name:prod_id, type:bigint, comment:null), FieldSchema(name:order_total,
type:int, comment:null)], location:maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders,
inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat,
compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe,
parameters:{serialization.format=,, field.delim=,}), bucketCols:[], sortCols:[], parameters:{},
skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}),
storedAsSubDirectories:false), partitionKeys:[], parameters:{EXTERNAL=TRUE, transient_lastDdlTime=1409956843},
viewOriginalText:null, viewExpandedText:null, tableType:EXTERNAL_TABLE), inputSplits=[maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month2.agg.orders.csv:0+640155,
maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month5.agg.orders.csv:0+775506, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month6.agg.orders.csv:0+791685,
maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month8.agg.orders.csv:0+805072, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month4.agg.orders.csv:0+603886,
maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month9.agg.orders.csv:0+846270, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month1.agg.orders.csv:0+461090,
maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month7.agg.orders.csv:0+771399, maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders/month3.agg.orders.csv:0+806738],
columns=[SchemaPath [`cust_id`], SchemaPath [`order_total`]]]])
00-11                      Project(T25¦¦user_info=[$0], $f1=[ITEM($0, 'cust_id')])
00-13                        HashAgg(group=[{0}])
00-15                          Project(T25¦¦user_info=[$1])
00-17                            Project(T25¦¦*=[$0], T25¦¦user_info=[$1], T25¦¦trans_info=[$2])
00-19                              Scan(groupscan=[EasyGroupScan [selectionRoot=/mapr/my.cluster.com/demo/clicks/json/clicks.json,
columns = null]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "hive-scan",
    "@id" : 20,
    "hive-table" : {
      "table" : {
        "tableName" : "orders",
        "dbName" : "default",
        "owner" : "root",
        "createTime" : 1409956843,
        "lastAccessTime" : 0,
        "retention" : 0,
        "sd" : {
          "cols" : [ {
            "name" : "order_id",
            "type" : "bigint",
            "comment" : null
          }, {
            "name" : "month",
            "type" : "string",
            "comment" : null
          }, {
            "name" : "purchdate",
            "type" : "timestamp",
            "comment" : null
          }, {
            "name" : "cust_id",
            "type" : "bigint",
            "comment" : null
          }, {
            "name" : "state",
            "type" : "string",
            "comment" : null
          }, {
            "name" : "prod_id",
            "type" : "bigint",
            "comment" : null
          }, {
            "name" : "order_total",
            "type" : "int",
            "comment" : null
          } ],
          "location" : "maprfs:/mapr/my.cluster.com/drill-beta-demo/data/orders",
          "inputFormat" : "org.apache.hadoop.mapred.TextInputFormat",
          "outputFormat" : "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
          "compressed" : false,
          "numBuckets" : -1,
          "serDeInfo" : {
            "name" : null,
            "serializationLib" : "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
            "parameters" : {
              "serialization.format" : ",",
              "field.delim" : ","
            }
          },
          "sortCols" : [ ],
          "parameters" : { }
        },
        "partitionKeys" : [ ],
        "parameters" : {
          "EXTERNAL" : "TRUE",
          "transient_lastDdlTime" : "1409956843"
        },
        "viewOriginalText" : null,
        "viewExpandedText" : null,
        "tableType" : "EXTERNAL_TABLE"
      },
      "partitions" : null,
      "hiveConfigOverride" : {
        "hive.metastore.uris" : "thrift://192.168.208.143:9083",
        "hive.metastore.sasl.enabled" : "false"
      }
    },
    "storage-plugin" : "hive",
    "columns" : [ "`cust_id`", "`order_total`" ],
    "cost" : 6349.0
  }, {
    "pop" : "project",
    "@id" : 18,
    "exprs" : [ {
      "ref" : "`cust_id`",
      "expr" : "`cust_id`"
    }, {
      "ref" : "`order_total`",
      "expr" : "`order_total`"
    } ],
    "child" : 20,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 6349.0
  }, {
    "pop" : "filter",
    "@id" : 16,
    "child" : 18,
    "expr" : "greater_than(`order_total`, 100) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 3174.5
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 14,
    "child" : 16,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 3174.5
  }, {
    "pop" : "project",
    "@id" : 12,
    "exprs" : [ {
      "ref" : "`$f7`",
      "expr" : "`cust_id`"
    } ],
    "child" : 14,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 3174.5
  }, {
    "pop" : "fs-scan",
    "@id" : 9,
    "files" : [ "maprfs:/mapr/my.cluster.com/demo/clicks/json/clicks.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/mapr/my.cluster.com/demo",
          "writable" : false,
          "storageformat" : null
        },
        "clicks" : {
          "location" : "/mapr/my.cluster.com/demo/clicks",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "views" : {
          "location" : "/mapr/my.cluster.com/demo/views",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "selectionRoot" : "/mapr/my.cluster.com/demo/clicks/json/clicks.json",
    "cost" : 5097.0
  }, {
    "pop" : "project",
    "@id" : 7,
    "exprs" : [ {
      "ref" : "`T24¦¦*`",
      "expr" : "`*`"
    } ],
    "child" : 9,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 5097.0
  }, {
    "pop" : "fs-scan",
    "@id" : 19,
    "files" : [ "maprfs:/mapr/my.cluster.com/demo/clicks/json/clicks.json" ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/mapr/my.cluster.com/demo",
          "writable" : false,
          "storageformat" : null
        },
        "clicks" : {
          "location" : "/mapr/my.cluster.com/demo/clicks",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "views" : {
          "location" : "/mapr/my.cluster.com/demo/views",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "selectionRoot" : "/mapr/my.cluster.com/demo/clicks/json/clicks.json",
    "cost" : 5097.0
  }, {
    "pop" : "project",
    "@id" : 17, |
+------------+------------+



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message