sqoop-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Venkat Ranganathan <vranganat...@hortonworks.com>
Subject Fwd: Sqoop import Issue
Date Fri, 14 Mar 2014 19:17:30 GMT
Moving to sqoop user mailing list.

This points to error in your query.   $CONDIITONS evaluates to the per
mapper range.   In your case, your split column is not present in
every place you have $CONDITIONS so you should remove them from places
where it does not make sense to have.   For example,

==

SELECT  T.*

FROM    SI_F_L0_TRADE T

WHERE   T.REVENUE_DATE  >=  '01-JAN-2014'  and \$CONDITIONS

    AND
===

The $CONDITIONS will be subsituted to refer to columns that does not
exist in the subquery context.



In general, youcan enable --verbose option, get the query being
executed, and try to run it with SQL*Plus or some other tool to see if
it is valid SQL


Venkat



---------- Forwarded message ----------
From: Sethuramaswamy, Suresh <suresh.sethuramaswamy@credit-suisse.com>
Date: Fri, Mar 14, 2014 at 9:40 AM
Subject: RE: Sqoop import Issue
To: "vranganathan@hortonworks.com" <vranganathan@hortonworks.com>


Greetings Venkat,



Am trying to run a free form SQL on sqoop to import data from Oracle
10g , everytime  I keep getting  the split-by column mentioned is
invalid . Can you please help.





Sqoop version : 1.4.3

Oracle 10g

Ojdbc6.jar



Error



14/03/14 08:55:16 INFO mapred.JobClient: Task Id :
attempt_201403100830_0118_m_000009_0, Status : FAILED

java.io.IOException: SQLException in nextKeyValue

        at org.apache.sqoop.mapreduce.db.DBRecordReader.nextKeyValue(DBRecordReader.java:265)

        at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.nextKeyValue(MapTask.java:530)

        at org.apache.hadoop.mapreduce.MapContext.nextKeyValue(MapContext.java:67)

        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)

        at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)

        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:763)

        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:363)

        at org.apache.hadoop.mapred.Child$4.run(Child.java:255)

        at java.security.AccessController.doPrivileged(Native Method)

        at javax.security.auth.Subject.doAs(Subject.java:396)

        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1232)

        at org.apache.hadoop.mapred.Child.main(Child.java:249)

Caused by: java.sql.SQLSyntaxErrorException: ORA-00904:
"SI_CUSTOMER_XREF2"."CUSTOMER_ID": invalid identifier



QUERY



  sqoop import  --connect  <<connection parameter>>   -query



"

  SELECT

  SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,

  SI_CUSTOMER_XREF2.GESC_CLIENT_ID,

  SI_F_L0_TRADE.DATA_SRC_SYS_CD,

  SI_F_L0_TRADE.SI_TRADE_ID,

  SI_CUSTOMER_XREF2.CUSTOMER_ID,



****

****



FROM

  (

  SELECT * FROM SI_CUSTOMER_XREF2 WHERE  RUN_ID='209'

  and \$CONDITIONS

  )  SI_CUSTOMER_XREF2 join

  (

  SELECT  T.*

FROM    SI_F_L0_TRADE T

WHERE   T.REVENUE_DATE  >=  '01-JAN-2014'  and \$CONDITIONS

    AND

   T.ORG_DC_SNAPSHOT_ID  = (SELECT D.DC_SNAPSHOT_ID FROM DC_DATE_CARD
D WHERE  D.RUN_ID = '209'  and \$CONDITIONS

   )

  )  SI_F_L0_TRADE  ON (
SI_F_L0_TRADE.CUSTOMER_SGK=SI_CUSTOMER_XREF2.CUSTOMER_SGK  )

  join

  (

  SELECT * FROM SI_F_L0_TRADE_GROUP_ID G where RUN_ID='209'  and \$CONDITIONS

   AND REVENUE_DATE  >=  '01-JAN-2014'

  AND G.ORG_DC_SNAPSHOT_ID = (SELECT D.DC_SNAPSHOT_ID FROM
DC_DATE_CARD D WHERE  D.RUN_ID = '209'  and \$CONDITIONS

  )

  )  SI_F_L0_TRADE_GROUP_ID ON (
SI_F_L0_TRADE.TRADE_SGK=SI_F_L0_TRADE_GROUP_ID.TRADE_SGK  )

   join

  (  SELECT * FROM cpl_trade_instrument_ref WHERE  RUN_ID= '209' --
and \$CONDITIONS

  )  CPL_INSTRUMENT_REF ON  (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=SI_F_L0_TRADE.INSTRUMENT_SGK  )

  JOIN

  (

  SELECT * FROM CPL_INSTRUMENT_REF_EXT WHERE  RUN_ID='209'  and \$CONDITIONS

  )  CPL_INSTRUMENT_REF_EXT ON    (
CPL_INSTRUMENT_REF.INSTRUMENT_SGK=CPL_INSTRUMENT_REF_EXT.INSTRUMENT_SGK)

  join

  (

  SELECT * FROM SI_BUSINESS_REGION WHERE RUN_ID='209'   and \$CONDITIONS

  )  ISSUER_BUSINESS_REGION ON  (
ISSUER_BUSINESS_REGION.ISO_CNTRY_3_CHAR_CD=CPL_INSTRUMENT_REF_EXT.ISSUER_COUNTRY_CD
 )





    join

    (

  SELECT (product_line_cd ||

                    CASE WHEN nvl(new_issue_ind, '0') = '1' THEN 'P'
ELSE 'S' END || '_' ||

                              nvl(business_line_cd, 'N') || '_' ||

                              nvl(trade_type_cd, 'N') || '_' ||

                              nvl(region_cd, 'N'
        )) product_cd, SI_GROUP_ID_XREF.* FROM SI_GROUP_ID_XREF

WHERE RUN_ID  = '209'  and \$CONDITIONS

  )  SI_GROUP_ID_XREF ON   (
SI_F_L0_TRADE_GROUP_ID.GROUP_ID=SI_GROUP_ID_XREF.GROUP_ID  )

  JOIN

  (

  SELECT * FROM SI_PRODUCT_LINE WHERE  RUN_ID= '209'  and \$CONDITIONS

  )  SI_PRODUCT_LINE ON       (
SI_GROUP_ID_XREF.PRODUCT_LINE_CD=SI_PRODUCT_LINE.PRODUCT_LINE_L6_SGK
)





    join

  (

  SELECT * FROM SI_BUSINESS_REGION WHERE  RUN_ID= '209'  and \$CONDITIONS

  )  SI_BUSINESS_REGION ON    (
SI_GROUP_ID_XREF.REGION_CD=SI_BUSINESS_REGION.REGION_L6_SGK  )

  JOIN

  (

  SELECT * FROM si_salesrep_xref WHERE run_id = '209'  and \$CONDITIONS

  )  MARKETER     ON  ( MARKETER.SRC_SALESREP_ID=SI_F_L0_TRADE.SRC_SALES_REP_ID)

   join

  (

  SELECT * FROM CDW_PRESENTATION.CPL_EMPLOYEE_REF WHERE run_id ='209'
and \$CONDITIONS

  )  MARKETER_EMPLOYEE_REF ON  (
MARKETER.CS_EMPLOYEE_ID=MARKETER_EMPLOYEE_REF.EMPLOYEE_ID  )

   join

  (

  SELECT * FROM SI_BOOK WHERE  RUN_ID= '209'  and \$CONDITIONS

  )  SI_BOOK  ON   ( SI_BOOK.BOOK_SGK=SI_F_L0_TRADE.BOOK_SGK  )

   join

  (SELECT * FROM cpl_sfo_country_xref ) DOMICILE_SFO_CNTRY_XREF     ON
 ( SI_CUSTOMER_XREF2.DOMICILE_CNTRY_ISO_CD=DOMICILE_SFO_CNTRY_XREF.CNTRY_ISO_CD)

  join

  (

  SELECT * FROM CPL_CLIENT_REF WHERE  RUN_ID='209'   and \$CONDITIONS

   AND SRC_SYS_CD = 'GESC'   )  CPL_CLIENT_REF

ON       ( SI_CUSTOMER_XREF2.GESC_CLIENT_ID=CPL_CLIENT_REF.CLIENT_CD  )

JOIN

  (

  select * from si_client_type where run_id = '209'  and \$CONDITIONS

  )  CLIENT_TYPE      ON  ( CPL_CLIENT_REF.CLIENT_SGK=CLIENT_TYPE.CLIENT_SGK  )



   join



  (

  select * from si_client_types where run_id = '209'  and \$CONDITIONS

  )  CLIENT_TYPES     ON  (
CLIENT_TYPE.CLIENT_TYPES_SGK=CLIENT_TYPES.CLIENT_TYPES_SGK  )



    WHERE  MARKETER.SRC_SYS_CD=SI_F_L0_TRADE.SREP_SRC_SYS_CD  and \$CONDITIONS

    AND

  (

   ( ( SI_F_L0_TRADE.IGNORE_REASON_CD ) IS NULL

  AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) IS NOT NULL

  AND ( SI_F_L0_TRADE_GROUP_ID.GROUP_ID ) >0

  AND SI_GROUP_ID_XREF.Can_Double_Count = 0

  AND SI_GROUP_ID_XREF.Product_Line_Cd IS NOT NULL

  AND SI_GROUP_ID_XREF.LOSS_IND >=0

  )

  AND

   SI_PRODUCT_LINE.PRODUCT_LINE_L1_NAME  =  'All (Equities)'   )

GROUP BY

  SI_CUSTOMER_XREF2.GESC_CLIENT_NAME,

  SI_CUSTOMER_XREF2.GESC_CLIENT_ID,

  SI_F_L0_TRADE.DATA_SRC_SYS_CD,

  SI_F_L0_TRADE.SI_TRADE_ID,

  SI_CUSTOMER_XREF2.CUSTOMER_ID,

****

***

" --split-by  SI_CUSTOMER_XREF2.customer_id --boundary-query "select
min(CUSTOMER_ID),max(CUSTOMER_ID) from  SI_CUSTOMER_XREF2 WHERE
RUN_ID='209'" -m 48 --fields-terminated-by "|" --target-dir
"/SqoopImports/daily_eqt" --verbose;







Suresh Sethuramaswamy

Cross Div MIS & Power User Tools



From: Venkat Ranganathan [mailto:vranganathan@hortonworks.com]
Sent: Thursday, March 13, 2014 5:30 PM
To: user@sqoop.apache.org
Subject: Re: Sqoop import into a Hive table with 2 columns as partition keys



If your partition columns are dynamic, then using hcatalog import will
be enough.   The Sqoop HCatalog integration section in the Sqoop  user
guide has more information on the HCatalog integration (that I linked
earlier).   The restriction is that you can't have more than one
static partition key.





Venkat



On Wed, Mar 12, 2014 at 11:28 PM, Richa Sharma
<mailtorichasharma@gmail.com> wrote:

Thanks for the inputs.



In my case the partition columns are dynamic.. so I guess it will make
sense to load it into 2 hops.



Venkat, thanks for sharing the ppt on Hcatalog - I couldn't find much
online so it is very helpful :-)



Richa



On Thu, Mar 13, 2014 at 11:33 AM, Venkat Ranganathan
<vranganathan@hortonworks.com> wrote:

In Sqoop 1.4.4 we have Hcatalog Integration.   This allows importing
data into hive partition tables with multiple partition keys (but only
can be a static partitioning key).    See
http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_sqoop_hcatalog_integration



There is a presentation done at Hadoop World on Hcatalog integration
that you can refer to for more information



https://cwiki.apache.org/confluence/download/attachments/27361435/SqoopHCatIntegration-HadoopWorld2013.pptx



Venkat



On Wed, Mar 12, 2014 at 9:19 PM, bejoy ks <bejoyks@gmail.com> wrote:

AFAIK. Sqoop 1x imports supports hive single column partitions.



If it is multi column partitions you might have to take a two hop approach

1. Load the data into a single/non partitioned hive tabe

2. From the above staging table load to the final one.



On Tue, Mar 11, 2014 at 7:23 AM, Richa Sharma
<mailtorichasharma@gmail.com> wrote:

Hi,



I am trying to run sqoop import into a Hive table partitioned on 2
columns (col1 string, col2 string).



I looked at the documentation for both --hive-import and --hcatalog
and could not find any such option.



It works successfully when table is partitioned on 1 column but not
when i increase number of partition columns.



Has anyone tried it and knows how to achieve it ?



Thanks,

Richa





-- 
Regards
       Bejoy





CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.






CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or
entity to which it is addressed and may contain information that is
confidential, privileged and exempt from disclosure under applicable
law. If the reader of this message is not the intended recipient, you
are hereby notified that any printing, copying, dissemination,
distribution, disclosure or forwarding of this communication is
strictly prohibited. If you have received this communication in error,
please contact the sender immediately and delete it from your system.
Thank You.



==============================================================================
Please access the attached hyperlink for an important electronic
communications disclaimer:
http://www.credit-suisse.com/legal/en/disclaimer_email_ib.html
==============================================================================

-- 
CONFIDENTIALITY NOTICE
NOTICE: This message is intended for the use of the individual or entity to 
which it is addressed and may contain information that is confidential, 
privileged and exempt from disclosure under applicable law. If the reader 
of this message is not the intended recipient, you are hereby notified that 
any printing, copying, dissemination, distribution, disclosure or 
forwarding of this communication is strictly prohibited. If you have 
received this communication in error, please contact the sender immediately 
and delete it from your system. Thank You.

Mime
View raw message