sqoop-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fszabo2 <...@git.apache.org>
Subject [GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...
Date Tue, 04 Dec 2018 14:52:27 GMT
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238693735
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined
for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL,
false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType()
instanceof Decimal) {
    +      Decimal decimal = (Decimal) schema.getLogicalType();
    +
    +      // trimming precision and scale to Hive's maximum values.
    +      int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
    +      if (precision < decimal.getPrecision()) {
    +        LOG.warn("Warning! Precision in the Hive table definition will be smaller than
the actual precision of the column on storage! Hive may not be able to read data from this
column.");
    --- End diff --
    
    Do you think we should remove this warning? (I think, even if it's redundant, it's useful
to write this out.)


---

Mime
View raw message