From 1cf4cdd40604299b18f2fca3daec6cb77bc63ca1 Mon Sep 17 00:00:00 2001 From: Mike Pigott Date: Wed, 6 Feb 2019 18:10:04 +0100 Subject: [PATCH] ARROW-3966 [Java] JDBC Column Metadata in Arrow Field Metadata https://issues.apache.org/jira/browse/ARROW-3966 This change includes #3133, and supports a new configuration item called "Include Metadata." If true, metadata from the JDBC ResultSetMetaData object is pulled along to the Schema Field Metadata. For now, this includes: * Catalog Name * Table Name * Column Name * Column Type Name Author: Mike Pigott Author: Michael Pigott Closes #3134 from mikepigott/jdbc-column-metadata and squashes the following commits: 02f2f348 ARROW-3966: Picking up lost change to support null calendars. 7049c36a Merge branch 'master' into jdbc-column-metadata e9a9b2b1 Merge pull request #6 from apache/master 65741a9e ARROW-3966: Code review feedback cc6cc88a ARROW-3966: Using a 1:N loop instead of a 0:N-1 loop for fewer index offsets in code. cfb2ba68 ARROW-3966: Using a helper method for building a UTC calendar with root locale. 29285132 ARROW-3966: Moving the metadata flag assignment into the builder. 69022c26 ARROW-3966: Fixing merge. 4a6de863 Merge branch 'master' into jdbc-column-metadata 509a1cc5 Merge pull request #5 from apache/master 789c8c84 Merge pull request #4 from apache/master e5b19eee Merge pull request #3 from apache/master 3b17c297 Merge pull request #2 from apache/master d847ebc4 Fixing file location 1ceac9eb Merge branch 'master' into jdbc-column-metadata 881c6c83 Merge pull request #1 from apache/master 03091a86 Unit tests for including result set metadata. 72d64cc6 Affirming the field metadata is empty when the configuration excludes field metadata. 7b4527c0 Test for the include-metadata flag in the configuration. 7e9ce373 Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata bb3165b9 Updating the function calls to use the JdbcToArrowConfig versions. a6fb1be4 Fixing function call 5bfd6a29 Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata 68c91e7a Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to receive JdbcToArrowConfig objects. b5b0cb11 Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata 8d6cf008 Documentation for public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) 4f1260ce Adding documentation for public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config) e34a9e79 Fixing formatting. fe097c88 Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata df632e36 Updating the SQL tests to include JdbcToArrowConfig versions. b2700448 Updated validaton & documentation, and unit tests for the new JdbcToArrowConfig. da77cbe8 Creating a configuration class for the JDBC-to-Arrow converter. a78c7704 Updating Javadocs. 523387f3 Updating the API to support an optional 'includeMetadata' field. 5af1b5b2 Separating out the field-type creation from the field creation. --- .../apache/arrow/adapter/jdbc/Constants.java | 27 ++++++ .../arrow/adapter/jdbc/JdbcToArrow.java | 20 +++-- .../arrow/adapter/jdbc/JdbcToArrowConfig.java | 18 +++- .../jdbc/JdbcToArrowConfigBuilder.java | 41 ++++++++- .../arrow/adapter/jdbc/JdbcToArrowUtils.java | 88 +++++++++++++------ .../adapter/jdbc/JdbcToArrowConfigTest.java | 26 +++++- .../adapter/jdbc/JdbcToArrowTestHelper.java | 42 +++++++++ .../jdbc/h2/JdbcToArrowCharSetTest.java | 15 ++++ .../jdbc/h2/JdbcToArrowDataTypesTest.java | 15 ++++ .../adapter/jdbc/h2/JdbcToArrowNullTest.java | 14 +++ .../adapter/jdbc/h2/JdbcToArrowTest.java | 15 ++++ .../jdbc/h2/JdbcToArrowTimeZoneTest.java | 16 ++++ 12 files changed, 295 insertions(+), 42 deletions(-) create mode 100644 java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java new file mode 100644 index 0000000000000..c6aa018c831e3 --- /dev/null +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +public class Constants { + + public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; + public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; + public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; + public static final String SQL_TYPE_KEY = "SQL_TYPE"; + +} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java index ddf70df9ad2ce..79102043a0f83 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -23,8 +23,6 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.Calendar; -import java.util.Locale; -import java.util.TimeZone; import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.RootAllocator; @@ -90,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT)); + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false); return sqlToArrow(connection, query, config); } @@ -112,12 +110,13 @@ public static VectorSchemaRoot sqlToArrow( String query, BaseAllocator allocator, Calendar calendar) throws SQLException, IOException { + Preconditions.checkNotNull(connection, "JDBC connection object can not be null"); Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty"); Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); Preconditions.checkNotNull(calendar, "Calendar object can not be null"); - return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar)); + return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false)); } /** @@ -154,7 +153,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); - return sqlToArrow(resultSet, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT)); + return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar()); } /** @@ -171,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT)); + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false); return sqlToArrow(resultSet, config); } @@ -186,7 +185,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); - return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar)); + return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false)); } /** @@ -198,12 +197,15 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar) + public static VectorSchemaRoot sqlToArrow( + ResultSet resultSet, + BaseAllocator allocator, + Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); - return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar)); + return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false)); } /** diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java index e9fcffb36b666..8f2a8ef54f839 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -37,20 +37,23 @@ public final class JdbcToArrowConfig { private Calendar calendar; private BaseAllocator allocator; + private boolean includeMetadata; /** * Constructs a new configuration from the provided allocator and calendar. The allocator * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. */ - JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) { + JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) { Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); this.allocator = allocator; this.calendar = calendar; + this.includeMetadata = includeMetadata; } /** @@ -70,4 +73,13 @@ public Calendar getCalendar() { public BaseAllocator getAllocator() { return allocator; } + + /** + * Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. + * + * @return true to include field metadata, false to exclude it. + */ + public boolean shouldIncludeMetadata() { + return includeMetadata; + } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java index 9ba69639905ce..51327aa2d0f5d 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -29,6 +29,7 @@ public class JdbcToArrowConfigBuilder { private Calendar calendar; private BaseAllocator allocator; + private boolean includeMetadata; /** * Default constructor for the JdbcToArrowConfigBuilder}. @@ -38,6 +39,7 @@ public class JdbcToArrowConfigBuilder { public JdbcToArrowConfigBuilder() { this.allocator = null; this.calendar = null; + this.includeMetadata = false; } /** @@ -62,6 +64,32 @@ public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) { this.allocator = allocator; this.calendar = calendar; + this.includeMetadata = false; + } + + /** + * Constructor for the JdbcToArrowConfigBuilder. Both the + * allocator and calendar are required. A {@link NullPointerException} + * will be thrown if either of those arguments is null. + *

+ * The allocator is used to construct Arrow vectors from the JDBC ResultSet. + * The calendar is used to determine the time zone of {@link java.sql.Timestamp} + * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and + * {@link java.sql.Timestamp} fields to a single, common time zone when reading + * from the result set. + *

+ *

+ * The includeMetadata argument, if true will cause + * various information about each database field to be added to the Vector + * Schema's field metadata. + *

+ * + * @param allocator The Arrow Vector memory allocator. + * @param calendar The calendar to use when constructing timestamp fields. + */ + public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) { + this(allocator, calendar); + this.includeMetadata = includeMetadata; } /** @@ -87,6 +115,17 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { return this; } + /** + * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. + * + * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. + * @return This instance of the JdbcToArrowConfig, for chaining. + */ + public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { + this.includeMetadata = includeMetadata; + return this; + } + /** * This builds the {@link JdbcToArrowConfig} from the provided * {@link BaseAllocator} and {@link Calendar}. @@ -95,6 +134,6 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { * @throws NullPointerException if either the allocator or calendar was not set. */ public JdbcToArrowConfig build() { - return new JdbcToArrowConfig(allocator, calendar); + return new JdbcToArrowConfig(allocator, calendar, includeMetadata); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index b6adbbc7334a4..833ca8410a969 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -36,7 +36,11 @@ import java.sql.Types; import java.util.ArrayList; import java.util.Calendar; +import java.util.HashMap; import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TimeZone; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -103,7 +107,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); + } + + /** + * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. + */ + public static Calendar getUtcCalendar() { + return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); } /** @@ -145,39 +156,60 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(config, "The configuration object must not be null"); + final String timezone; + if (config.getCalendar() != null) { + timezone = config.getCalendar().getTimeZone().getID(); + } else { + timezone = null; + } + List fields = new ArrayList<>(); int columnCount = rsmd.getColumnCount(); for (int i = 1; i <= columnCount; i++) { - String columnName = rsmd.getColumnName(i); + final String columnName = rsmd.getColumnName(i); + final FieldType fieldType; + + final Map metadata; + if (config.shouldIncludeMetadata()) { + metadata = new HashMap<>(); + metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); + metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); + metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); + metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); + + } else { + metadata = null; + } + switch (rsmd.getColumnType(i)) { case Types.BOOLEAN: case Types.BIT: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null)); + fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata); break; case Types.TINYINT: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(8, true)), null)); + fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata); break; case Types.SMALLINT: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(16, true)), null)); + fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata); break; case Types.INTEGER: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null)); + fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata); break; case Types.BIGINT: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(64, true)), null)); + fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata); break; case Types.NUMERIC: case Types.DECIMAL: int precision = rsmd.getPrecision(i); int scale = rsmd.getScale(i); - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Decimal(precision, scale)), null)); + fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata); break; case Types.REAL: case Types.FLOAT: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(SINGLE)), null)); + fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata); break; case Types.DOUBLE: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(DOUBLE)), null)); + fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata); break; case Types.CHAR: case Types.NCHAR: @@ -185,38 +217,42 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig case Types.NVARCHAR: case Types.LONGVARCHAR: case Types.LONGNVARCHAR: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); + case Types.CLOB: + fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata); break; case Types.DATE: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Date(DateUnit.MILLISECOND)), null)); + fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata); break; case Types.TIME: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null)); + fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata); break; case Types.TIMESTAMP: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, - config.getCalendar().getTimeZone().getID())), null)); + fieldType = + new FieldType( + true, + new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone), + null, + metadata); break; case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); - break; - case Types.ARRAY: - // TODO Need to handle this type - // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); - break; - case Types.CLOB: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); - break; case Types.BLOB: - fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); + fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata); break; + case Types.ARRAY: + // TODO Need to handle this type + // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); default: // no-op, shouldn't get here + fieldType = null; break; } + + if (fieldType != null) { + fields.add(new Field(columnName, fieldType, null)); + } } return new Schema(fields, null); @@ -250,7 +286,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); - jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); } /** diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index 1d02c888f8537..bafb2dcdcc341 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -34,7 +34,7 @@ public class JdbcToArrowConfigTest { @Test(expected = NullPointerException.class) public void testConfigNullArguments() { - new JdbcToArrowConfig(null, null); + new JdbcToArrowConfig(null, null, false); } @Test(expected = NullPointerException.class) @@ -43,7 +43,7 @@ public void testBuilderNullArguments() { } public void testConfigNullCalendar() { - JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null); + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null, false); assertNull(config.getCalendar()); } @@ -56,7 +56,7 @@ public void testBuilderNullCalendar() { @Test(expected = NullPointerException.class) public void testConfigNullAllocator() { - new JdbcToArrowConfig(null, calendar); + new JdbcToArrowConfig(null, calendar, false); } @Test(expected = NullPointerException.class) @@ -94,4 +94,24 @@ public void testConfig() { assertTrue(newAllocator == config.getAllocator()); assertTrue(newCalendar == config.getCalendar()); } + + @Test public void testIncludeMetadata() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); + + JdbcToArrowConfig config = builder.build(); + assertFalse(config.shouldIncludeMetadata()); + + builder.setIncludeMetadata(true); + config = builder.build(); + assertTrue(config.shouldIncludeMetadata()); + + config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); + assertTrue(config.shouldIncludeMetadata()); + + config = new JdbcToArrowConfig(allocator, calendar, true); + assertTrue(config.shouldIncludeMetadata()); + + config = new JdbcToArrowConfig(allocator, calendar, false); + assertFalse(config.shouldIncludeMetadata()); + } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java index ec8e61bc8ce87..3e1eb09d1f590 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -24,6 +24,10 @@ import java.math.BigDecimal; import java.nio.charset.Charset; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BigIntVector; @@ -39,6 +43,9 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; /** * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object. @@ -166,6 +173,41 @@ public static void assertNullValues(BaseValueVector vector, int rowCount) { } } + public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { + assertNotNull(schema); + assertNotNull(schema.getSchema()); + assertNotNull(schema.getSchema().getFields()); + + for (Field field : schema.getSchema().getFields()) { + assertNotNull(field.getMetadata()); + assertEquals(0, field.getMetadata().size()); + } + } + + public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema) + throws SQLException { + assertNotNull(schema); + assertNotNull(schema.getFields()); + assertNotNull(rsmd); + + List fields = schema.getFields(); + + assertEquals(rsmd.getColumnCount(), fields.size()); + + // Vector columns are created in the same order as ResultSet columns. + for (int i = 1; i <= rsmd.getColumnCount(); ++i) { + Map metadata = fields.get(i - 1).getMetadata(); + + assertNotNull(metadata); + assertEquals(4, metadata.size()); + + assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY)); + assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY)); + assertEquals(rsmd.getColumnName(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY)); + assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY)); + } + } + public static byte[] hexStringToByteArray(String s) { int len = s.length(); byte[] data = new byte[len / 2]; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java index d33c07a075e81..ff31da436792f 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.DriverManager; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.util.Arrays; @@ -31,11 +32,15 @@ import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.adapter.jdbc.Table; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -126,12 +131,22 @@ public void testJdbcToArroValues() throws SQLException, IOException { new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); } + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test */ public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java index 5bdb38ff8be9f..a8a1a16dce0da 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -33,6 +33,7 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; import java.io.IOException; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Arrays; import java.util.Calendar; @@ -40,7 +41,10 @@ import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.adapter.jdbc.Table; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; @@ -57,6 +61,7 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -152,12 +157,22 @@ public void testJdbcToArroValues() throws SQLException, IOException { new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); } + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test */ public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + switch (table.getType()) { case BIGINT: assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length, diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java index 629bcfeaed304..7ef8c795fa1d9 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -20,6 +20,7 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullValues; import java.io.IOException; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Arrays; import java.util.Calendar; @@ -27,7 +28,10 @@ import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.adapter.jdbc.Table; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; @@ -44,6 +48,7 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -109,6 +114,13 @@ public void testJdbcToArroValues() throws SQLException, IOException { new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); } + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } /** * This method calls the assert methods for various DataSets. @@ -116,6 +128,8 @@ public void testJdbcToArroValues() throws SQLException, IOException { * @param root VectorSchemaRoot for test */ public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + switch (table.getType()) { case NULL: sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java index f74e683d7d753..e0011330089e6 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -41,6 +41,7 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; import java.io.IOException; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Arrays; import java.util.Calendar; @@ -48,7 +49,10 @@ import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.adapter.jdbc.Table; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; @@ -65,6 +69,7 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -143,12 +148,22 @@ public void testJdbcToArroValues() throws SQLException, IOException { new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); } + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test */ public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), getLongValues(table.getValues(), BIGINT)); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java index fee56c7c07e91..8e83d6f7a78e2 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -22,6 +22,7 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; import java.io.IOException; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Arrays; import java.util.Calendar; @@ -30,13 +31,17 @@ import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; import org.apache.arrow.adapter.jdbc.Table; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -119,12 +124,23 @@ public void testJdbcToArroValues() throws SQLException, IOException { Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build())); } + @Test + public void testJdbcSchemaMetadata() throws SQLException { + Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test */ public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + switch (table.getType()) { case EST_DATE: case GMT_DATE: