sqoop-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jarek Jarcec Cecho <jar...@apache.org>
Subject Re: HSQLDB issue with null strings
Date Fri, 22 Nov 2013 23:53:24 GMT
I'm glad to hear that the problem was solved! Would you mind sharing what compatibility mode
has worked for you? It might be useful for other users following this mailing list.

Jarcec

On Fri, Nov 22, 2013 at 03:41:13PM -0800, redshift-etl-user wrote:
> Thanks, Jarek! Was able to fix the problem by upgrading HSQLDB and setting
> the DB compatibility mode in the connection string.
> 
> Thanks again.
> 
> 
> On Fri, Nov 22, 2013 at 11:24 AM, Jarek Jarcec Cecho <jarcec@apache.org>wrote:
> 
> > Hi sir,
> > it seems that HSQLDB is reporting the column "STRING" as type CLOB. The
> > parameters --(input-)null-(non-)string are working only for a string based
> > columns (CHAR, VARCHAR, NCHAR, ...) and not for a CLOB. You might be able
> > to overcome this by using --map-column-java parameter and force it's type
> > to String (and not a CLOB):
> >
> >   sqoop import --map-column-java STRING=String ...
> >
> > You can find more details about type mapping in Sqoop User Guide:
> >
> >
> > http://sqoop.apache.org/docs/1.4.4/SqoopUserGuide.html#_controlling_type_mapping
> >
> > Jarcec
> >
> > On Thu, Nov 21, 2013 at 09:56:15PM -0800, redshift-etl-user wrote:
> > > Jarek - class included below. Thanks!
> > >
> > >
> > > // ORM class for table 'null'
> > > // WARNING: This class is AUTO-GENERATED. Modify at your own risk.
> > > //
> > > // Debug information:
> > > // Generated date: Thu Nov 21 21:52:32 PST 2013
> > > // For connector: org.apache.sqoop.manager.GenericJdbcManager
> > > import org.apache.hadoop.io.BytesWritable;
> > > import org.apache.hadoop.io.Text;
> > > import org.apache.hadoop.io.Writable;
> > > import org.apache.hadoop.mapred.lib.db.DBWritable;
> > > import com.cloudera.sqoop.lib.JdbcWritableBridge;
> > > import com.cloudera.sqoop.lib.DelimiterSet;
> > > import com.cloudera.sqoop.lib.FieldFormatter;
> > > import com.cloudera.sqoop.lib.RecordParser;
> > > import com.cloudera.sqoop.lib.BooleanParser;
> > > import com.cloudera.sqoop.lib.BlobRef;
> > > import com.cloudera.sqoop.lib.ClobRef;
> > > import com.cloudera.sqoop.lib.LargeObjectLoader;
> > > import com.cloudera.sqoop.lib.SqoopRecord;
> > > import java.sql.PreparedStatement;
> > > import java.sql.ResultSet;
> > > import java.sql.SQLException;
> > > import java.io.DataInput;
> > > import java.io.DataOutput;
> > > import java.io.IOException;
> > > import java.nio.ByteBuffer;
> > > import java.nio.CharBuffer;
> > > import java.sql.Date;
> > > import java.sql.Time;
> > > import java.sql.Timestamp;
> > > import java.util.Arrays;
> > > import java.util.Iterator;
> > > import java.util.List;
> > > import java.util.Map;
> > > import java.util.TreeMap;
> > >
> > > public class gXoTNYSfULokaKs extends SqoopRecord  implements DBWritable,
> > > Writable {
> > >   private final int PROTOCOL_VERSION = 3;
> > >   public int getClassFormatVersion() { return PROTOCOL_VERSION; }
> > >   protected ResultSet __cur_result_set;
> > >   private Integer ID;
> > >   public Integer get_ID() {
> > >     return ID;
> > >   }
> > >   public void set_ID(Integer ID) {
> > >     this.ID = ID;
> > >   }
> > >   public gXoTNYSfULokaKs with_ID(Integer ID) {
> > >     this.ID = ID;
> > >     return this;
> > >   }
> > >   private com.cloudera.sqoop.lib.ClobRef STRING;
> > >   public com.cloudera.sqoop.lib.ClobRef get_STRING() {
> > >     return STRING;
> > >   }
> > >   public void set_STRING(com.cloudera.sqoop.lib.ClobRef STRING) {
> > >     this.STRING = STRING;
> > >   }
> > >   public gXoTNYSfULokaKs with_STRING(com.cloudera.sqoop.lib.ClobRef
> > STRING)
> > > {
> > >     this.STRING = STRING;
> > >     return this;
> > >   }
> > >   public boolean equals(Object o) {
> > >     if (this == o) {
> > >       return true;
> > >     }
> > >     if (!(o instanceof gXoTNYSfULokaKs)) {
> > >       return false;
> > >     }
> > >     gXoTNYSfULokaKs that = (gXoTNYSfULokaKs) o;
> > >     boolean equal = true;
> > >     equal = equal && (this.ID == null ? that.ID == null :
> > > this.ID.equals(that.ID));
> > >     equal = equal && (this.STRING == null ? that.STRING == null :
> > > this.STRING.equals(that.STRING));
> > >     return equal;
> > >   }
> > >   public void readFields(ResultSet __dbResults) throws SQLException {
> > >     this.__cur_result_set = __dbResults;
> > >     this.ID = JdbcWritableBridge.readInteger(1, __dbResults);
> > >     this.STRING = JdbcWritableBridge.readClobRef(2, __dbResults);
> > >   }
> > >   public void loadLargeObjects(LargeObjectLoader __loader)
> > >       throws SQLException, IOException, InterruptedException {
> > >     this.STRING = __loader.readClobRef(2, this.__cur_result_set);
> > >   }
> > >   public void write(PreparedStatement __dbStmt) throws SQLException {
> > >     write(__dbStmt, 0);
> > >   }
> > >
> > >   public int write(PreparedStatement __dbStmt, int __off) throws
> > > SQLException {
> > >     JdbcWritableBridge.writeInteger(ID, 1 + __off, 4, __dbStmt);
> > >     JdbcWritableBridge.writeClobRef(STRING, 2 + __off, 2005, __dbStmt);
> > >     return 2;
> > >   }
> > >   public void readFields(DataInput __dataIn) throws IOException {
> > >     if (__dataIn.readBoolean()) {
> > >         this.ID = null;
> > >     } else {
> > >     this.ID = Integer.valueOf(__dataIn.readInt());
> > >     }
> > >     if (__dataIn.readBoolean()) {
> > >         this.STRING = null;
> > >     } else {
> > >     this.STRING =
> > > com.cloudera.sqoop.lib.LobSerializer.readClobFields(__dataIn);
> > >     }
> > >   }
> > >   public void write(DataOutput __dataOut) throws IOException {
> > >     if (null == this.ID) {
> > >         __dataOut.writeBoolean(true);
> > >     } else {
> > >         __dataOut.writeBoolean(false);
> > >     __dataOut.writeInt(this.ID);
> > >     }
> > >     if (null == this.STRING) {
> > >         __dataOut.writeBoolean(true);
> > >     } else {
> > >         __dataOut.writeBoolean(false);
> > >     com.cloudera.sqoop.lib.LobSerializer.writeClob(this.STRING,
> > __dataOut);
> > >     }
> > >   }
> > >   private final DelimiterSet __outputDelimiters = new DelimiterSet((char)
> > > 44, (char) 10, (char) 34, (char) 92, true);
> > >   public String toString() {
> > >     return toString(__outputDelimiters, true);
> > >   }
> > >   public String toString(DelimiterSet delimiters) {
> > >     return toString(delimiters, true);
> > >   }
> > >   public String toString(boolean useRecordDelim) {
> > >     return toString(__outputDelimiters, useRecordDelim);
> > >   }
> > >   public String toString(DelimiterSet delimiters, boolean
> > useRecordDelim) {
> > >     StringBuilder __sb = new StringBuilder();
> > >     char fieldDelim = delimiters.getFieldsTerminatedBy();
> > >     __sb.append(FieldFormatter.escapeAndEnclose(ID==null?"":"" + ID,
> > > delimiters));
> > >     __sb.append(fieldDelim);
> > >     __sb.append(FieldFormatter.escapeAndEnclose(STRING==null?"":"" +
> > > STRING, delimiters));
> > >     if (useRecordDelim) {
> > >       __sb.append(delimiters.getLinesTerminatedBy());
> > >     }
> > >     return __sb.toString();
> > >   }
> > >   private final DelimiterSet __inputDelimiters = new DelimiterSet((char)
> > > 44, (char) 10, (char) 34, (char) 92, true);
> > >   private RecordParser __parser;
> > >   public void parse(Text __record) throws RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   public void parse(CharSequence __record) throws
> > RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   public void parse(byte [] __record) throws RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   public void parse(char [] __record) throws RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   public void parse(ByteBuffer __record) throws RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   public void parse(CharBuffer __record) throws RecordParser.ParseError {
> > >     if (null == this.__parser) {
> > >       this.__parser = new RecordParser(__inputDelimiters);
> > >     }
> > >     List<String> __fields = this.__parser.parseRecord(__record);
> > >     __loadFromFields(__fields);
> > >   }
> > >
> > >   private void __loadFromFields(List<String> fields) {
> > >     Iterator<String> __it = fields.listIterator();
> > >     String __cur_str;
> > >     __cur_str = __it.next();
> > >     if (__cur_str.equals("null") || __cur_str.length() == 0) { this.ID =
> > > null; } else {
> > >       this.ID = Integer.valueOf(__cur_str);
> > >     }
> > >
> > >     __cur_str = __it.next();
> > >     if (__cur_str.equals("null") || __cur_str.length() == 0) {
> > this.STRING
> > > = null; } else {
> > >       this.STRING = ClobRef.parse(__cur_str);
> > >     }
> > >
> > >   }
> > >
> > >   public Object clone() throws CloneNotSupportedException {
> > >     gXoTNYSfULokaKs o = (gXoTNYSfULokaKs) super.clone();
> > >     o.STRING = (o.STRING != null) ? (com.cloudera.sqoop.lib.ClobRef)
> > > o.STRING.clone() : null;
> > >     return o;
> > >   }
> > >
> > >   public Map<String, Object> getFieldMap() {
> > >     Map<String, Object> __sqoop$field_map = new TreeMap<String,
> > Object>();
> > >     __sqoop$field_map.put("ID", this.ID);
> > >     __sqoop$field_map.put("STRING", this.STRING);
> > >     return __sqoop$field_map;
> > >   }
> > >
> > >   public void setField(String __fieldName, Object __fieldVal) {
> > >     if ("ID".equals(__fieldName)) {
> > >       this.ID = (Integer) __fieldVal;
> > >     }
> > >     else    if ("STRING".equals(__fieldName)) {
> > >       this.STRING = (com.cloudera.sqoop.lib.ClobRef) __fieldVal;
> > >     }
> > >     else {
> > >       throw new RuntimeException("No such field: " + __fieldName);
> > >     }
> > >   }
> > > }
> > >
> > >
> > > On Wed, Nov 20, 2013 at 8:55 AM, Jarek Jarcec Cecho <jarcec@apache.org
> > >wrote:
> > >
> > > > Thank you sir!
> > > >
> > > > Would you mind also sharing with the generated class? I do not see
> > > > anything suspicious, so I would like to explore the generated code.
> > > >
> > > > Also please note that direct usage of Sqoop Java API is not
> > recommended as
> > > > Sqoop at that point expect that entire environment will be properly
> > > > configured. I would strongly suggest you to use the sqoop binary
> > shipped
> > > > with Sqoop.
> > > >
> > > > Jarcec
> > > >
> > > > On Mon, Nov 18, 2013 at 04:48:57PM -0800, redshift-etl-user wrote:
> > > > > Hi Jarek,
> > > > >
> > > > > Sure! Note that I'm running Sqoop through "Sqoop.runTool". Responses
> > > > inline.
> > > > >
> > > > > On Sun, Nov 17, 2013 at 5:47 PM, Jarek Jarcec Cecho <
> > jarcec@apache.org
> > > > >wrote:
> > > > >
> > > > > > Hi sir,
> > > > > > would you mind sharing with us more details about your use case?
> > Sqoop
> > > > and
> > > > > > HSQLDB versions,
> > > > >
> > > > >
> > > > > sqoop-1.4.4-hadoop100.jar
> > > > > hsqldb-1.8.0.10.jar
> > > > >
> > > > >
> > > > > > command that you're using,
> > > > >
> > > > >
> > > > > import --connect jdbc:h2:mem:play-test-985978706 --username sa
> > --password
> > > > > sa --verbose --query SELECT id,string FROM test WHERE $CONDITIONS
> >  ORDER
> > > > BY
> > > > > id LIMIT 200 -m 1 --target-dir
> > > > > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/data
> > > > > --fields-terminated-by , --escaped-by \ --enclosed-by "
> > > > > --null-non-string  *--null-string
> > > > > asdf* --outdir
> > > > >
> > /var/folders/mm/m69802p900d9pqwxw3l85wd80000gn/T/1384821266308-0/classes
> > > > > --class-name avyhOWkUKUQHvkr --driver org.hsqldb.jdbcDriver
> > --split-by id
> > > > > --verbose
> > > > >
> > > > >
> > > > > > log generated by Sqoop with parameter --verbose.
> > > > >
> > > > >
> > > > > 16:34:26.380 [ [33mwarn [0m] [pool-3-thread-1]
> > > > > org.apache.sqoop.tool.SqoopTool - $SQOOP_CONF_DIR has not been set
> > in the
> > > > > environment. Cannot check for additional configuration.
> > > > > 16:34:26.433 [ [33mwarn [0m] [pool-3-thread-1]
> > > > > org.apache.sqoop.tool.BaseSqoopTool - Setting your password on the
> > > > > command-line is insecure. Consider using -P instead.
> > > > > 16:34:26.439 [ [33mwarn [0m] [pool-3-thread-1]
> > > > org.apache.sqoop.ConnFactory
> > > > > - $SQOOP_CONF_DIR has not been set in the environment. Cannot check
> > for
> > > > > additional configuration.
> > > > > 16:34:26.456 [ [33mwarn [0m] [pool-3-thread-1]
> > > > org.apache.sqoop.ConnFactory
> > > > > - Parameter --driver is set to an explicit driver however appropriate
> > > > > connection manager is not being set (via --connection-manager).
> > Sqoop is
> > > > > going to fall back to org.apache.sqoop.manager.GenericJdbcManager.
> > Please
> > > > > specify explicitly which connection manager should be used next time.
> > > > > Note:
> > > > >
> > > >
> > /tmp/sqoop-romming/compile/8541deacc9cf2714256c59d89dd9bf0a/avyhOWkUKUQHvkr.java
> > > > > uses or overrides a deprecated API.
> > > > > Note: Recompile with -Xlint:deprecation for details.
> > > > > 2013-11-18 16:34:27.319 java[48163:13c07] Unable to load realm info
> > from
> > > > > SCDynamicStore
> > > > > 16:34:27.397 [ [33mwarn [0m] [pool-3-thread-1]
> > > > > org.apache.sqoop.mapreduce.JobBase - SQOOP_HOME is unset. May not
be
> > able
> > > > > to find all job dependencies.
> > > > > 16:34:27.423 [ [33mwarn [0m] [pool-3-thread-1]
> > > > > o.a.hadoop.util.NativeCodeLoader - Unable to load native-hadoop
> > library
> > > > for
> > > > > your platform... using builtin-java classes where applicable
> > > > >
> > > > >
> > > > > > Perhaps even a simplified data that will cause this behaviour?
> > > > >
> > > > > CREATE TABLE IF NOT EXISTS test (id INTEGER, string TEXT, PRIMARY
KEY
> > > > (id));
> > > > > INSERT INTO test (id, string) VALUES (1, 'test');
> > > > > INSERT INTO test (id) VALUES (2);
> > > > > INSERT INTO test (id, string) VALUES (3, 'test');
> > > > > This produces a file containing:
> > > > > 1,test
> > > > > 2,
> > > > > 3,test
> > > > >
> > > > > When it really ought to be
> > > > > 1,test
> > > > > 2,asdf
> > > > > 3,test
> > > > >
> > > > >
> > > > >
> > > > > > Jarcec
> > > > > >
> > > > > > On Fri, Nov 15, 2013 at 01:29:36PM -0800, redshift-etl-user
wrote:
> > > > > > > Hi,
> > > > > > >
> > > > > > > I'm using the "--null-string" option to control the value
of null
> > > > string
> > > > > > > columns for imports. I've tested this with MySQL and Postgres
> > and it
> > > > > > seems
> > > > > > > to work fine. However, when I try with HSQLDB, it seems
to ignore
> > > > this
> > > > > > > option and just return an empty string for nulls. In fact,
when
> > the
> > > > > > > "--null-string" option isn't present it's supposed to return
the
> > > > string
> > > > > > > "null" according to the spec, and it returns an empty string
in
> > this
> > > > case
> > > > > > > as well.
> > > > > > >
> > > > > > > Could someone else confirm this behavior? Seems like a
bug.
> > > > > > >
> > > > > > > Thanks!
> > > > > >
> > > >
> >

Mime
View raw message