sqoop-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Markus Kemper (JIRA)" <j...@apache.org>
Subject [jira] [Created] (SQOOP-3166) Sqoop1 (import + --query + aggregate function + --split-by -m >=2) fails with error (unknown column)
Date Mon, 03 Apr 2017 16:57:41 GMT
Markus Kemper created SQOOP-3166:
------------------------------------

             Summary: Sqoop1 (import + --query + aggregate function + --split-by -m >=2)
fails with error (unknown column)
                 Key: SQOOP-3166
                 URL: https://issues.apache.org/jira/browse/SQOOP-3166
             Project: Sqoop
          Issue Type: Bug
            Reporter: Markus Kemper


This issue appears to be RDBMS generic.

*Test Case*
{noformat}
########################################################################
# Description:
# 
# 1. Sqoop import fails with Unknown Column error with the following conditions
# 1.1. Using --query + sql aggregate function() + --split-by + --num-mappers >1 fails
# 2. The Sqoop documentation does not seem to clarify requirements for "select list" and "--split-by"

#
# Documentation
# http://archive.cloudera.com/cdh5/cdh/5/sqoop-1.4.6-cdh5.10.0/SqoopUserGuide.html#_controlling_parallelism
# 7.2.4. Controlling Parallelism
########################################################################

#################
# STEP 01 - [ORACLE] Create Data
#################

export MYCONN=jdbc:oracle:thin:@oracle1.cloudera.com:1521/db11g;
export MYUSER=sqoop
export MYPSWD=cloudera

sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "drop table t1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "drop view v1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "create table t1
(c1 int, c2 date, c3 varchar(10))"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "insert into t1
values (1, current_date, 'some data')"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "create view v1
as select c1 as \"ID\", c2, c3 from t1" 
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1"

------------------------
| COUNT(C1)            | 
------------------------
| 1                    | 
------------------------
~~~~~
------------------------
| COUNT(ID)            | 
------------------------
| 1                    | 
------------------------

#################
# STEP 02 - [ORACLE] Import from Table/View with --num-mappes 2 (reproduction)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 2
--split-by C1 --verbose
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 2
--split-by ID --verbose

Output:
17/04/03 09:09:11 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(C1), MAX(C1)
FROM (select count(c1) from t1 where  (1 = 1) ) t1
17/04/03 09:09:11 INFO mapreduce.JobSubmitter: Cleaning up the staging area /user/root/.staging/job_1490976836761_0069
17/04/03 09:09:11 WARN security.UserGroupInformation: PriviledgedActionException as:root (auth:SIMPLE)
cause:java.io.IOException: java.sql.SQLSyntaxErrorException: ORA-00904: "C1": invalid identifier
~~~~~
17/04/03 09:10:01 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(ID), MAX(ID)
FROM (select count(id) from v1 where  (1 = 1) ) t1
17/04/03 09:10:01 INFO mapreduce.JobSubmitter: Cleaning up the staging area /user/root/.staging/job_1490976836761_0070
17/04/03 09:10:01 WARN security.UserGroupInformation: PriviledgedActionException as:root (auth:SIMPLE)
cause:java.io.IOException: java.sql.SQLSyntaxErrorException: ORA-00904: "ID": invalid identifier

#################
# STEP 03 - [ORACLE] Import from Table/View with --num-mappes 1 (workaround)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 1
--split-by C1 --verbose
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 1
--split-by ID --verbose

Output:
17/04/03 09:07:11 INFO mapreduce.ImportJobBase: Transferred 2 bytes in 21.5799 seconds (0.0927
bytes/sec)
17/04/03 09:07:11 INFO mapreduce.ImportJobBase: Retrieved 1 records.
~~~~~
17/04/03 09:08:13 INFO mapreduce.ImportJobBase: Transferred 2 bytes in 20.4732 seconds (0.0977
bytes/sec)
17/04/03 09:08:13 INFO mapreduce.ImportJobBase: Retrieved 1 records.

#################
# STEP 04 - [MYSQL] Create Data
#################

export MYCONN=jdbc:mysql://mysql1.cloudera.com:3306/db_coe
export MYUSER=sqoop
export MYPSWD=cloudera

sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "drop table t1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "drop view v1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "create table t1
(c1 int, c2 date, c3 varchar(10))"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "insert into t1
values (1, current_date, 'some data')"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "create view v1
as select c1 as id, c2, c3 from t1" 
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1"
sqoop eval --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1"

Output:
------------------------
| count(c1)            | 
------------------------
| 1                    | 
------------------------
~~~~~
------------------------
| count(id)            | 
------------------------
| 1                    | 
------------------------

#################
# STEP 05 - [MYSQL] Import from Table/View with --num-mappes 2 (reproduction)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 2
--split-by c1 --verbose
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 2
--split-by id --verbose

Output:
17/04/03 09:37:57 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(c1), MAX(c1)
FROM (select count(c1) from t1 where  (1 = 1) ) AS t1
17/04/03 09:37:57 INFO mapreduce.JobSubmitter: Cleaning up the staging area /user/root/.staging/job_1490976836761_0078
17/04/03 09:37:57 WARN security.UserGroupInformation: PriviledgedActionException as:root (auth:SIMPLE)
cause:java.io.IOException: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Unknown
column 'c1' in 'field list'
~~~~~
17/04/03 09:38:29 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(id), MAX(id)
FROM (select count(id) from v1 where  (1 = 1) ) AS t1
17/04/03 09:38:29 INFO mapreduce.JobSubmitter: Cleaning up the staging area /user/root/.staging/job_1490976836761_0079
17/04/03 09:38:29 WARN security.UserGroupInformation: PriviledgedActionException as:root (auth:SIMPLE)
cause:java.io.IOException: com.mysql.jdbc.exceptions.jdbc4.MySQLSyntaxErrorException: Unknown
column 'id' in 'field list'

#################
# STEP 06 - [MYSQL] Import from Table/View with --num-mappes 1 (workaround)
#################

sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(c1)
from t1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 1
--split-by c1 --verbose
sqoop import --connect $MYCONN --username $MYUSER --password $MYPSWD --query "select count(id)
from v1 where \$CONDITIONS" --target-dir /user/root/t1 --delete-target-dir --num-mappers 1
--split-by id --verbose

Output:
17/04/03 09:40:40 INFO mapreduce.ImportJobBase: Transferred 2 bytes in 36.5269 seconds (0.0548
bytes/sec)
17/04/03 09:40:40 INFO mapreduce.ImportJobBase: Retrieved 1 records.
~~~~~
17/04/03 09:45:19 INFO mapreduce.ImportJobBase: Transferred 2 bytes in 22.4301 seconds (0.0892
bytes/sec)
17/04/03 09:45:19 INFO mapreduce.ImportJobBase: Retrieved 1 records.
{noformat}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Mime
View raw message