sqoop-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jarek Jarcec Cecho <jar...@apache.org>
Subject Re: HSQLDB issue with null strings
Date Fri, 22 Nov 2013 19:24:55 GMT
Hi sir,
it seems that HSQLDB is reporting the column "STRING" as type CLOB. The parameters --(input-)null-(non-)string
are working only for a string based columns (CHAR, VARCHAR, NCHAR, ...) and not for a CLOB.
You might be able to overcome this by using --map-column-java parameter and force it's type
to String (and not a CLOB):

  sqoop import --map-column-java STRING=String ...

You can find more details about type mapping in Sqoop User Guide:

  http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_controlling_type_mapping

Jarcec

On Thu, Nov 21, 2013 at 09:56:15PM -0800, redshift-etl-user wrote:
> Jarek - class included below. Thanks!
> 
> 
> // ORM class for table 'null'
> // WARNING: This class is AUTO-GENERATED. Modify at your own risk.
> //
> // Debug information:
> // Generated date: Thu Nov 21 21:52:32 PST 2013
> // For connector: org.apache.sqoop.manager.GenericJdbcManager
> import org.apache.hadoop.io.BytesWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.io.Writable;
> import org.apache.hadoop.mapred.lib.db.DBWritable;
> import com.cloudera.sqoop.lib.JdbcWritableBridge;
> import com.cloudera.sqoop.lib.DelimiterSet;
> import com.cloudera.sqoop.lib.FieldFormatter;
> import com.cloudera.sqoop.lib.RecordParser;
> import com.cloudera.sqoop.lib.BooleanParser;
> import com.cloudera.sqoop.lib.BlobRef;
> import com.cloudera.sqoop.lib.ClobRef;
> import com.cloudera.sqoop.lib.LargeObjectLoader;
> import com.cloudera.sqoop.lib.SqoopRecord;
> import java.sql.PreparedStatement;
> import java.sql.ResultSet;
> import java.sql.SQLException;
> import java.io.DataInput;
> import java.io.DataOutput;
> import java.io.IOException;
> import java.nio.ByteBuffer;
> import java.nio.CharBuffer;
> import java.sql.Date;
> import java.sql.Time;
> import java.sql.Timestamp;
> import java.util.Arrays;
> import java.util.Iterator;
> import java.util.List;
> import java.util.Map;
> import java.util.TreeMap;
> 
> public class gXoTNYSfULokaKs extends SqoopRecord  implements DBWritable,
> Writable {
>   private final int PROTOCOL_VERSION = 3;
>   public int getClassFormatVersion() { return PROTOCOL_VERSION; }
>   protected ResultSet __cur_result_set;
>   private Integer ID;
>   public Integer get_ID() {
>     return ID;
>   }
>   public void set_ID(Integer ID) {
>     this.ID = ID;
>   }
>   public gXoTNYSfULokaKs with_ID(Integer ID) {
>     this.ID = ID;
>     return this;
>   }
>   private com.cloudera.sqoop.lib.ClobRef STRING;
>   public com.cloudera.sqoop.lib.ClobRef get_STRING() {
>     return STRING;
>   }
>   public void set_STRING(com.cloudera.sqoop.lib.ClobRef STRING) {
>     this.STRING = STRING;
>   }
>   public gXoTNYSfULokaKs with_STRING(com.cloudera.sqoop.lib.ClobRef STRING)
> {
>     this.STRING = STRING;
>     return this;
>   }
>   public boolean equals(Object o) {
>     if (this == o) {
>       return true;
>     }
>     if (!(o instanceof gXoTNYSfULokaKs)) {
>       return false;
>     }
>     gXoTNYSfULokaKs that = (gXoTNYSfULokaKs) o;
>     boolean equal = true;
>     equal = equal && (this.ID == null ? that.ID == null :
> this.ID.equals(that.ID));
>     equal = equal && (this.STRING == null ? that.STRING == null :
> this.STRING.equals(that.STRING));
>     return equal;
>   }
>   public void readFields(ResultSet __dbResults) throws SQLException {
>     this.__cur_result_set = __dbResults;
>     this.ID = JdbcWritableBridge.readInteger(1, __dbResults);
>     this.STRING = JdbcWritableBridge.readClobRef(2, __dbResults);
>   }
>   public void loadLargeObjects(LargeObjectLoader __loader)
>       throws SQLException, IOException, InterruptedException {
>     this.STRING = __loader.readClobRef(2, this.__cur_result_set);
>   }
>   public void write(PreparedStatement __dbStmt) throws SQLException {
>     write(__dbStmt, 0);
>   }
> 
>   public int write(PreparedStatement __dbStmt, int __off) throws
> SQLException {
>     JdbcWritableBridge.writeInteger(ID, 1 + __off, 4, __dbStmt);
>     JdbcWritableBridge.writeClobRef(STRING, 2 + __off, 2005, __dbStmt);
>     return 2;
>   }
>   public void readFields(DataInput __dataIn) throws IOException {
>     if (__dataIn.readBoolean()) {
>         this.ID = null;
>     } else {
>     this.ID = Integer.valueOf(__dataIn.readInt());
>     }
>     if (__dataIn.readBoolean()) {
>         this.STRING = null;
>     } else {
>     this.STRING =
> com.cloudera.sqoop.lib.LobSerializer.readClobFields(__dataIn);
>     }
>   }
>   public void write(DataOutput __dataOut) throws IOException {
>     if (null == this.ID) {
>         __dataOut.writeBoolean(true);
>     } else {
>         __dataOut.writeBoolean(false);
>     __dataOut.writeInt(this.ID);
>     }
>     if (null == this.STRING) {
>         __dataOut.writeBoolean(true);
>     } else {
>         __dataOut.writeBoolean(false);
>     com.cloudera.sqoop.lib.LobSerializer.writeClob(this.STRING, __dataOut);
>     }
>   }
>   private final DelimiterSet __outputDelimiters = new DelimiterSet((char)
> 44, (char) 10, (char) 34, (char) 92, true);
>   public String toString() {
>     return toString(__outputDelimiters, true);
>   }
>   public String toString(DelimiterSet delimiters) {
>     return toString(delimiters, true);
>   }
>   public String toString(boolean useRecordDelim) {
>     return toString(__outputDelimiters, useRecordDelim);
>   }
>   public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
>     StringBuilder __sb = new StringBuilder();
>     char fieldDelim = delimiters.getFieldsTerminatedBy();
>     __sb.append(FieldFormatter.escapeAndEnclose(ID==null?"":"" + ID,
> delimiters));
>     __sb.append(fieldDelim);
>     __sb.append(FieldFormatter.escapeAndEnclose(STRING==null?"":"" +
> STRING, delimiters));
>     if (useRecordDelim) {
>       __sb.append(delimiters.getLinesTerminatedBy());
>     }
>     return __sb.toString();
>   }
>   private final DelimiterSet __inputDelimiters = new DelimiterSet((char)
> 44, (char) 10, (char) 34, (char) 92, true);
>   private RecordParser __parser;
>   public void parse(Text __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   public void parse(CharSequence __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   public void parse(byte [] __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   public void parse(char [] __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   public void parse(ByteBuffer __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   public void parse(CharBuffer __record) throws RecordParser.ParseError {
>     if (null == this.__parser) {
>       this.__parser = new RecordParser(__inputDelimiters);
>     }
>     List<String> __fields = this.__parser.parseRecord(__record);
>     __loadFromFields(__fields);
>   }
> 
>   private void __loadFromFields(List<String> fields) {
>     Iterator<String> __it = fields.listIterator();
>     String __cur_str;
>     __cur_str = __it.next();
>     if (__cur_str.equals("null") || __cur_str.length() == 0) { this.ID =
> null; } else {
>       this.ID = Integer.valueOf(__cur_str);
>     }
> 
>     __cur_str = __it.next();
>     if (__cur_str.equals("null") || __cur_str.length() == 0) { this.STRING
> = null; } else {
>       this.STRING = ClobRef.parse(__cur_str);
>     }
> 
>   }
> 
>   public Object clone() throws CloneNotSupportedException {
>     gXoTNYSfULokaKs o = (gXoTNYSfULokaKs) super.clone();
>     o.STRING = (o.STRING != null) ? (com.cloudera.sqoop.lib.ClobRef)
> o.STRING.clone() : null;
>     return o;
>   }
> 
>   public Map<String, Object> getFieldMap() {
>     Map<String, Object> __sqoop$field_map = new TreeMap<String, Object>();
>     __sqoop$field_map.put("ID", this.ID);
>     __sqoop$field_map.put("STRING", this.STRING);
>     return __sqoop$field_map;
>   }
> 
>   public void setField(String __fieldName, Object __fieldVal) {
>     if ("ID".equals(__fieldName)) {
>       this.ID = (Integer) __fieldVal;
>     }
>     else    if ("STRING".equals(__fieldName)) {
>       this.STRING = (com.cloudera.sqoop.lib.ClobRef) __fieldVal;
>     }
>     else {
>       throw new RuntimeException("No such field: " + __fieldName);
>     }
>   }
> }
> 
> 
> On Wed, Nov 20, 2013 at 8:55 AM, Jarek Jarcec Cecho <jarcec@apache.org>wrote:
> 
> > Thank you sir!
> >
> > Would you mind also sharing with the generated class? I do not see
> > anything suspicious, so I would like to explore the generated code.
> >
> > Also please note that direct usage of Sqoop Java API is not recommended as
> > Sqoop at that point expect that entire environment will be properly
> > configured. I would strongly suggest you to use the sqoop binary shipped
> > with Sqoop.
> >
> > Jarcec
> >
> > On Mon, Nov 18, 2013 at 04:48:57PM -0800, redshift-etl-user wrote:
> > > Hi Jarek,
> > >
> > > Sure! Note that I'm running Sqoop through "Sqoop.runTool". Responses
> > inline.
> > >
> > > On Sun, Nov 17, 2013 at 5:47 PM, Jarek Jarcec Cecho <jarcec@apache.org
> > >wrote:
> > >
> > > > Hi sir,
> > > > would you mind sharing with us more details about your use case? Sqoop
> > and
> > > > HSQLDB versions,
> > >
> > >
> > > sqoop-1.4.4-hadoop100.jar
> > > hsqldb-1.8.0.10.jar
> > >
> > >
> > > > command that you're using,
> > >
> > >
> > > import --connect jdbc:h2:mem:play-test-985978706 --username sa --password
> > > sa --verbose --query SELECT id,string FROM test WHERE $CONDITIONS  ORDER
> > BY
> > > id LIMIT 200 -m 1 --target-dir
> > > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/data
> > > --fields-terminated-by , --escaped-by \ --enclosed-by "
> > > --null-non-string  *--null-string
> > > asdf* --outdir
> > > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/classes
> > > --class-name avyhOWkUKUQHvkr --driver org.hsqldb.jdbcDriver --split-by id
> > > --verbose
> > >
> > >
> > > > log generated by Sqoop with parameter --verbose.
> > >
> > >
> > > 16:34:26.380 [ [33mwarn [0m] [pool-3-thread-1]
> > > org.apache.sqoop.tool.SqoopTool - $SQOOP_CONF_DIR has not been set in the
> > > environment. Cannot check for additional configuration.
> > > 16:34:26.433 [ [33mwarn [0m] [pool-3-thread-1]
> > > org.apache.sqoop.tool.BaseSqoopTool - Setting your password on the
> > > command-line is insecure. Consider using -P instead.
> > > 16:34:26.439 [ [33mwarn [0m] [pool-3-thread-1]
> > org.apache.sqoop.ConnFactory
> > > - $SQOOP_CONF_DIR has not been set in the environment. Cannot check for
> > > additional configuration.
> > > 16:34:26.456 [ [33mwarn [0m] [pool-3-thread-1]
> > org.apache.sqoop.ConnFactory
> > > - Parameter --driver is set to an explicit driver however appropriate
> > > connection manager is not being set (via --connection-manager). Sqoop is
> > > going to fall back to org.apache.sqoop.manager.GenericJdbcManager. Please
> > > specify explicitly which connection manager should be used next time.
> > > Note:
> > >
> > /tmp/sqoop-romming/compile/8541deacc9cf2714256c59d89dd9bf0a/avyhOWkUKUQHvkr.java
> > > uses or overrides a deprecated API.
> > > Note: Recompile with -Xlint:deprecation for details.
> > > 2013-11-18 16:34:27.319 java[48163:13c07] Unable to load realm info from
> > > SCDynamicStore
> > > 16:34:27.397 [ [33mwarn [0m] [pool-3-thread-1]
> > > org.apache.sqoop.mapreduce.JobBase - SQOOP_HOME is unset. May not be able
> > > to find all job dependencies.
> > > 16:34:27.423 [ [33mwarn [0m] [pool-3-thread-1]
> > > o.a.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library
> > for
> > > your platform... using builtin-java classes where applicable
> > >
> > >
> > > > Perhaps even a simplified data that will cause this behaviour?
> > >
> > > CREATE TABLE IF NOT EXISTS test (id INTEGER, string TEXT, PRIMARY KEY
> > (id));
> > > INSERT INTO test (id, string) VALUES (1, 'test');
> > > INSERT INTO test (id) VALUES (2);
> > > INSERT INTO test (id, string) VALUES (3, 'test');
> > > This produces a file containing:
> > > 1,test
> > > 2,
> > > 3,test
> > >
> > > When it really ought to be
> > > 1,test
> > > 2,asdf
> > > 3,test
> > >
> > >
> > >
> > > > Jarcec
> > > >
> > > > On Fri, Nov 15, 2013 at 01:29:36PM -0800, redshift-etl-user wrote:
> > > > > Hi,
> > > > >
> > > > > I'm using the "--null-string" option to control the value of null
> > string
> > > > > columns for imports. I've tested this with MySQL and Postgres and
it
> > > > seems
> > > > > to work fine. However, when I try with HSQLDB, it seems to ignore
> > this
> > > > > option and just return an empty string for nulls. In fact, when the
> > > > > "--null-string" option isn't present it's supposed to return the
> > string
> > > > > "null" according to the spec, and it returns an empty string in this
> > case
> > > > > as well.
> > > > >
> > > > > Could someone else confirm this behavior? Seems like a bug.
> > > > >
> > > > > Thanks!
> > > >
> >

Mime
View raw message