drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [drill] vvysotskyi commented on a change in pull request #2241: DRILL-7938: Convert JDBC Storage Plugin to EVF
Date Wed, 02 Jun 2021 15:13:02 GMT

vvysotskyi commented on a change in pull request #2241:
URL: https://github.com/apache/drill/pull/2241#discussion_r644063373



##########
File path: contrib/storage-jdbc/src/main/java/org/apache/drill/exec/store/jdbc/JdbcBatchReader.java
##########
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.jdbc;
+
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.CustomErrorContext;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
+import org.apache.drill.exec.physical.impl.scan.framework.SchemaNegotiator;
+import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
+import org.apache.drill.exec.physical.resultSet.RowSetLoader;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.store.jdbc.writers.JdbcBigintWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcBitWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcColumnWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcDateWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcDoubleWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcFloatWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcIntWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcTimeWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcTimestampWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcVarbinaryWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcVarcharWriter;
+import org.apache.drill.exec.store.jdbc.writers.JdbcVardecimalWriter;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.sql.DataSource;
+import java.lang.reflect.Field;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.drill.exec.planner.types.DrillRelDataTypeSystem.DRILL_REL_DATATYPE_SYSTEM;
+
+public class JdbcBatchReader implements ManagedReader<SchemaNegotiator> {
+
+  private static final Logger logger = LoggerFactory.getLogger(JdbcBatchReader.class);
+  private static final ImmutableMap<Integer, MinorType> JDBC_TYPE_MAPPINGS;
+  private final DataSource source;
+  private final String sql;
+  private final List<SchemaPath> columns;
+  private Connection connection;
+  private PreparedStatement statement;
+  private ResultSet resultSet;
+  private SchemaBuilder builder;
+  private RowSetLoader rowWriter;
+  private CustomErrorContext errorContext;
+  private List<JdbcColumnWriter> columnWriters;
+  private List<JdbcColumn> jdbcColumns;
+
+
+  public JdbcBatchReader(DataSource source, String sql, List<SchemaPath> columns) {
+    this.source = source;
+    this.sql = sql;
+    this.columns = columns;
+  }
+
+  static {
+    JDBC_TYPE_MAPPINGS = ImmutableMap.<Integer, MinorType>builder()
+      .put(java.sql.Types.DOUBLE, MinorType.FLOAT8)
+      .put(java.sql.Types.FLOAT, MinorType.FLOAT4)
+      .put(java.sql.Types.TINYINT, MinorType.INT)
+      .put(java.sql.Types.SMALLINT, MinorType.INT)
+      .put(java.sql.Types.INTEGER, MinorType.INT)
+      .put(java.sql.Types.BIGINT, MinorType.BIGINT)
+
+      .put(java.sql.Types.CHAR, MinorType.VARCHAR)
+      .put(java.sql.Types.VARCHAR, MinorType.VARCHAR)
+      .put(java.sql.Types.LONGVARCHAR, MinorType.VARCHAR)
+      .put(java.sql.Types.CLOB, MinorType.VARCHAR)
+
+      .put(java.sql.Types.NCHAR, MinorType.VARCHAR)
+      .put(java.sql.Types.NVARCHAR, MinorType.VARCHAR)
+      .put(java.sql.Types.LONGNVARCHAR, MinorType.VARCHAR)
+
+      .put(java.sql.Types.VARBINARY, MinorType.VARBINARY)
+      .put(java.sql.Types.LONGVARBINARY, MinorType.VARBINARY)
+      .put(java.sql.Types.BLOB, MinorType.VARBINARY)
+
+      .put(java.sql.Types.NUMERIC, MinorType.FLOAT8)
+      .put(java.sql.Types.DECIMAL, MinorType.VARDECIMAL)
+      .put(java.sql.Types.REAL, MinorType.FLOAT8)
+
+      .put(java.sql.Types.DATE, MinorType.DATE)
+      .put(java.sql.Types.TIME, MinorType.TIME)
+      .put(java.sql.Types.TIMESTAMP, MinorType.TIMESTAMP)
+
+      .put(java.sql.Types.BOOLEAN, MinorType.BIT)
+      .put(java.sql.Types.BIT, MinorType.BIT)
+
+      .build();
+  }
+
+  @Override
+  public boolean open(SchemaNegotiator negotiator) {
+
+    this.errorContext = negotiator.parentErrorContext();
+    try {
+      connection = source.getConnection();
+      statement = connection.prepareStatement(sql);
+      resultSet = statement.executeQuery();
+      builder = new SchemaBuilder();
+
+      TupleMetadata drillSchema = buildSchema();
+      negotiator.tableSchema(drillSchema, true);
+      ResultSetLoader resultSetLoader = negotiator.build();
+
+      // Create ScalarWriters
+      rowWriter = resultSetLoader.writer();
+      populateWriterArray();
+
+    } catch (SQLException e) {
+      throw UserException.dataReadError(e)
+        .message("The JDBC storage plugin failed while trying setup the SQL query. ")
+        .addContext("Sql", sql)
+        .addContext(errorContext)
+        .build(logger);
+    }
+
+    return true;
+  }
+
+  @Override
+  public boolean next() {
+    while (!rowWriter.isFull()) {
+      if (!processRow()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private boolean processRow() {
+    try {
+      if (!resultSet.next()) {
+        return false;
+      }
+      rowWriter.start();
+      // Process results
+      for (JdbcColumnWriter writer : columnWriters) {
+        writer.load(resultSet);
+      }
+      rowWriter.save();
+    } catch (SQLException e) {
+      throw UserException
+        .dataReadError(e)
+        .message("Failure while attempting to read from database.")
+        .addContext("Sql", sql)
+        .addContext(errorContext)
+        .build(logger);
+    }
+
+    return true;
+  }
+
+  @Override
+  public void close() {
+    AutoCloseables.closeSilently(resultSet, statement, connection);
+  }
+
+  private TupleMetadata buildSchema() throws SQLException {
+    ResultSetMetaData meta = resultSet.getMetaData();
+    jdbcColumns = new ArrayList<>();
+
+    int columnsCount = meta.getColumnCount();
+
+    if (columns.size() != columnsCount) {
+      throw UserException
+        .validationError()
+        .message(
+          "Expected columns count differs from the returned one.\n" +
+            "Expected columns: %s\n" +
+            "Returned columns count: %s",
+          columns, columnsCount)
+        .addContext("Sql", sql)
+        .addContext(errorContext)
+        .build(logger);
+    }
+
+    for (int i = 1; i <= columnsCount; i++) {
+      String name = columns.get(i - 1).getRootSegmentPath();
+      // column index in ResultSetMetaData starts from 1
+      int jdbcType = meta.getColumnType(i);
+      int width = Math.min(meta.getPrecision(i), DRILL_REL_DATATYPE_SYSTEM.getMaxNumericPrecision());
+      int scale = Math.min(meta.getScale(i), DRILL_REL_DATATYPE_SYSTEM.getMaxNumericScale());
+
+      MinorType minorType = JDBC_TYPE_MAPPINGS.get(jdbcType);
+      if (minorType == null) {
+        logger.warn("Ignoring column that is unsupported.", UserException
+          .unsupportedError()
+          .message(
+            "A column you queried has a data type that is not currently supported by the
JDBC storage plugin. "
+              + "The column's name was %s and its JDBC data type was %s. ",
+            name,
+            nameFromType(jdbcType))
+          .addContext("Sql", sql)
+          .addContext("Column Name", name)
+          .addContext(errorContext)
+          .build(logger));
+        continue;
+      }
+
+      jdbcColumns.add(new JdbcColumn(name, minorType));
+      if (minorType == MinorType.VARDECIMAL) {

Review comment:
       Precision shows the max possible size for varchar of binary, so Drill will be able
to allocate less memory compared to the default one. Also, this precision may be exposed to
the end-user, and he might use it. 
   Initially, it was set for all types but looks like this change breaks it.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message