From 68c91e7a718e01795c798a423f905afc4f5ce9d8 Mon Sep 17 00:00:00 2001 From: Mike Pigott Date: Sat, 8 Dec 2018 16:11:58 -0500 Subject: [PATCH] Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to receive JdbcToArrowConfig objects. --- .../arrow/adapter/jdbc/JdbcToArrowUtils.java | 60 +++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index 3425fa6471e87..2f4ea3ab41ab8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -38,6 +38,7 @@ import java.util.Calendar; import java.util.List; +import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -90,6 +91,21 @@ public class JdbcToArrowUtils { private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024; private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256; + /** + * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. + * + * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. + * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. + * @return {@link Schema} + * @throws SQLException on error + */ + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { + Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); + Preconditions.checkNotNull(calendar, "Calendar object can't be null"); + + return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + } + /** * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. * @@ -120,14 +136,15 @@ public class JdbcToArrowUtils { * CLOB --> ArrowType.Utf8 * BLOB --> ArrowType.Binary * - * @param rsmd ResultSetMetaData + * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. + * @param config The configuration to use when constructing the schema. * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { - + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); + Preconditions.checkNotNull(config, "The configuration object must not be null"); + Preconditions.checkArgument(config.isValid(), "The configuration object must be valid"); List fields = new ArrayList<>(); int columnCount = rsmd.getColumnCount(); @@ -179,7 +196,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar break; case Types.TIMESTAMP: fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, - calendar.getTimeZone().getID())), null)); + config.getCalendar().getTimeZone().getID())), null)); break; case Types.BINARY: case Types.VARBINARY: @@ -222,17 +239,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) { * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param calendar The calendar to use when reading time-based data. * @throws SQLException on error */ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); - Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); + Preconditions.checkNotNull(root, "Vector Schema cannot be null"); Preconditions.checkNotNull(calendar, "Calendar object can't be null"); + jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + } + + /** + * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate + * the given Arrow Vector objects. + * + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param config The configuration to use when reading the data. + * @throws SQLException on error + */ + public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) + throws SQLException, IOException { + + Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); + Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); + Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null"); + Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid"); + ResultSetMetaData rsmd = rs.getMetaData(); int columnCount = rsmd.getColumnCount(); @@ -289,16 +327,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen break; case Types.DATE: updateVector((DateMilliVector) root.getVector(columnName), - rs.getDate(i, calendar), !rs.wasNull(), rowCount); + rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount); break; case Types.TIME: updateVector((TimeMilliVector) root.getVector(columnName), - rs.getTime(i, calendar), !rs.wasNull(), rowCount); + rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount); break; case Types.TIMESTAMP: // TODO: Need to handle precision such as milli, micro, nano updateVector((TimeStampVector) root.getVector(columnName), - rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount); + rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount); break; case Types.BINARY: case Types.VARBINARY: