From d904102e369c7e646c231c561ae23899439b07c6 Mon Sep 17 00:00:00 2001 From: Bart Maertens Date: Tue, 21 Feb 2023 10:13:13 +0100 Subject: [PATCH] initial version for DuckDB database type. fixes #2448 --- .../databases/duckdb-assemblies/pom.xml | 52 ++++ .../src/assembly/assembly.xml | 57 ++++ .../src/main/resources/version.xml | 20 ++ assemblies/plugins/databases/pom.xml | 1 + assemblies/plugins/dist/pom.xml | 13 + docs/hop-user-manual/modules/ROOT/nav.adoc | 3 +- .../ROOT/pages/database/databases/duckdb.adoc | 39 +++ integration-tests/duckdb/dev-env-config.json | 7 + integration-tests/duckdb/duckdb-read.hpl | 204 ++++++++++++ integration-tests/duckdb/duckdb-write.hpl | 156 ++++++++++ integration-tests/duckdb/hop-config.json | 290 ++++++++++++++++++ .../duckdb/main-0001-read-write-duckdb.hwf | 159 ++++++++++ .../pipeline-run-configuration/local.json | 21 ++ .../duckdb/metadata/rdbms/duckdb.json | 26 ++ .../workflow-run-configuration/local.json | 11 + integration-tests/duckdb/project-config.json | 13 + plugins/databases/duckdb/pom.xml | 42 +++ .../databases/duckdb/DuckDBDatabaseMeta.java | 162 ++++++++++ plugins/databases/pom.xml | 1 + 19 files changed, 1276 insertions(+), 1 deletion(-) create mode 100644 assemblies/plugins/databases/duckdb-assemblies/pom.xml create mode 100644 assemblies/plugins/databases/duckdb-assemblies/src/assembly/assembly.xml create mode 100644 assemblies/plugins/databases/duckdb-assemblies/src/main/resources/version.xml create mode 100644 docs/hop-user-manual/modules/ROOT/pages/database/databases/duckdb.adoc create mode 100644 integration-tests/duckdb/dev-env-config.json create mode 100644 integration-tests/duckdb/duckdb-read.hpl create mode 100644 integration-tests/duckdb/duckdb-write.hpl create mode 100644 integration-tests/duckdb/hop-config.json create mode 100644 integration-tests/duckdb/main-0001-read-write-duckdb.hwf create mode 100644 integration-tests/duckdb/metadata/pipeline-run-configuration/local.json create mode 100644 integration-tests/duckdb/metadata/rdbms/duckdb.json create mode 100644 integration-tests/duckdb/metadata/workflow-run-configuration/local.json create mode 100644 integration-tests/duckdb/project-config.json create mode 100644 plugins/databases/duckdb/pom.xml create mode 100644 plugins/databases/duckdb/src/main/java/org/apache/hop/databases/duckdb/DuckDBDatabaseMeta.java diff --git a/assemblies/plugins/databases/duckdb-assemblies/pom.xml b/assemblies/plugins/databases/duckdb-assemblies/pom.xml new file mode 100644 index 00000000000..d23c65ab799 --- /dev/null +++ b/assemblies/plugins/databases/duckdb-assemblies/pom.xml @@ -0,0 +1,52 @@ + + + + + 4.0.0 + + + hop-assemblies-plugins-databases + org.apache.hop + 2.4.0-SNAPSHOT + + + + hop-assemblies-plugins-databases-duckdb + 2.4.0-SNAPSHOT + pom + + Hop Assemblies Plugins Databases DuckDB + + + + + + + org.apache.hop + hop-databases-duckdb + ${project.version} + + + org.duckdb + duckdb_jdbc + 0.7.0 + + + + \ No newline at end of file diff --git a/assemblies/plugins/databases/duckdb-assemblies/src/assembly/assembly.xml b/assemblies/plugins/databases/duckdb-assemblies/src/assembly/assembly.xml new file mode 100644 index 00000000000..90b02f184ac --- /dev/null +++ b/assemblies/plugins/databases/duckdb-assemblies/src/assembly/assembly.xml @@ -0,0 +1,57 @@ + + + + hop-assemblies-plugins-databases-duckdb + + zip + + databases/duckdb + + + ${project.basedir}/src/main/resources/version.xml + . + true + + + + + lib + + **/* + + + + + + false + + org.apache.hop:hop-databases-duckdb:jar + + + + false + lib + + org.duckdb:duckdb_jdbc:jar + + + + diff --git a/assemblies/plugins/databases/duckdb-assemblies/src/main/resources/version.xml b/assemblies/plugins/databases/duckdb-assemblies/src/main/resources/version.xml new file mode 100644 index 00000000000..6be576acae9 --- /dev/null +++ b/assemblies/plugins/databases/duckdb-assemblies/src/main/resources/version.xml @@ -0,0 +1,20 @@ + + + +${project.version} \ No newline at end of file diff --git a/assemblies/plugins/databases/pom.xml b/assemblies/plugins/databases/pom.xml index 68ac86d7f09..285563cbfcb 100644 --- a/assemblies/plugins/databases/pom.xml +++ b/assemblies/plugins/databases/pom.xml @@ -49,6 +49,7 @@ infobright-assemblies infinidb-assemblies derby-assemblies + duckdb-assemblies monetdb-assemblies cache-assemblies exasol4-assemblies diff --git a/assemblies/plugins/dist/pom.xml b/assemblies/plugins/dist/pom.xml index 311a8d3cb9d..7d0fb947003 100644 --- a/assemblies/plugins/dist/pom.xml +++ b/assemblies/plugins/dist/pom.xml @@ -2925,6 +2925,19 @@ + + org.apache.hop + hop-assemblies-plugins-databases-duckdb + ${project.version} + zip + + + * + * + + + + org.apache.hop hop-assemblies-plugins-databases-exasol4 diff --git a/docs/hop-user-manual/modules/ROOT/nav.adoc b/docs/hop-user-manual/modules/ROOT/nav.adoc index eccf11c3a92..26a44008ec7 100644 --- a/docs/hop-user-manual/modules/ROOT/nav.adoc +++ b/docs/hop-user-manual/modules/ROOT/nav.adoc @@ -357,12 +357,13 @@ under the License. * xref:database/databases.adoc[Relational Database Connections] //::=START AUTO GENERATED LINKS DATABASES ** xref:database/databases/derby.adoc[Apache Derby] -** xref:database/databases/doris.adoc[Apache Doris] +** xref:database/databases/duckdb.adoc[DuckDB] ** xref:database/databases/apache-hive.adoc[Apache Hive] ** xref:database/databases/as400.adoc[AS400] ** xref:database/databases/cache.adoc[Cache] ** xref:database/databases/clickhouse.adoc[ClickHouse] ** xref:database/databases/db2.adoc[DB2] +** xref:database/databases/doris.adoc[Apache Doris] ** xref:database/databases/exasol.adoc[Exasol] ** xref:database/databases/firebird.adoc[Firebird] ** xref:database/databases/googlebigquery.adoc[Google BigQuery] diff --git a/docs/hop-user-manual/modules/ROOT/pages/database/databases/duckdb.adoc b/docs/hop-user-manual/modules/ROOT/pages/database/databases/duckdb.adoc new file mode 100644 index 00000000000..467af8c3301 --- /dev/null +++ b/docs/hop-user-manual/modules/ROOT/pages/database/databases/duckdb.adoc @@ -0,0 +1,39 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// +[[database-plugins-duckdb]] +:documentationPath: /database/databases/ +:language: en_US + += DuckDB + +DuckDB is an in-process SQL OLAP database management system. + +As an in-process database, DuckDB is easy to configure: specify the path to your DuckDB filename as the database name, e.g. `/duckdb`. + +[cols="2*",options="header"] +|=== +| Option | Info +|Type | Relational +|Driver | https://search.maven.org/artifact/org.duckdb/duckdb_jdbc/0.7.0/jar[Driver Link] +|Version Included | 0.7.0 +|Hop Dependencies | None +|Documentation | https://duckdb.org/docs/api/java.html +|JDBC Url | jdbc:duckdb: (in memory) or jdbc:duckdb: +|Driver folder | Hop Installation/plugins/databases/duckdb/lib +|=== + + diff --git a/integration-tests/duckdb/dev-env-config.json b/integration-tests/duckdb/dev-env-config.json new file mode 100644 index 00000000000..9f86eef0f2d --- /dev/null +++ b/integration-tests/duckdb/dev-env-config.json @@ -0,0 +1,7 @@ +{ + "variables" : [ { + "name" : "DUCKDB_DIR", + "value" : "/tmp/duckdb", + "description" : "" + } ] +} \ No newline at end of file diff --git a/integration-tests/duckdb/duckdb-read.hpl b/integration-tests/duckdb/duckdb-read.hpl new file mode 100644 index 00000000000..ab1d735fd95 --- /dev/null +++ b/integration-tests/duckdb/duckdb-read.hpl @@ -0,0 +1,204 @@ + + + + + duckdb-read + Y + + + + Normal + + + N + 1000 + 100 + - + 2023/02/20 19:36:52.555 + - + 2023/02/20 19:36:52.555 + + + + + + read book_data + get nb_rows + Y + + + get nb_rows + 100k rows? + Y + + + 100k rows? + Dummy (do nothing) + Y + + + 100k rows? + Abort + Y + + + + 100k rows? + FilterRows + + Y + + 1 + + none + + + + + + + = + nb_rows + N + - + + N + -1 + ####0;-####0 + constant + 0 + 100000 + Integer + + + + Abort + Dummy (do nothing) + + + 432 + 64 + + + + get nb_rows + GroupBy + + Y + + 1 + + none + + + N + N + ${java.io.tmpdir} + + + nb_rows + title + COUNT_ANY + + + + Y + + + N + + grp + + + 288 + 64 + + + + read book_data + TableInput + + Y + + 1 + + none + + + duckdb + N + 0 + SELECT + author +, publisher +, genre +, title +FROM main.book_data +order by publisher, title + + N + + + 96 + 64 + + + + Dummy (do nothing) + Dummy + + Y + + 1 + + none + + + + + 608 + 64 + + + + Abort + Abort + + Y + + 1 + + none + + + ABORT_WITH_ERROR + Y + + 0 + + + 432 + 160 + + + + + + diff --git a/integration-tests/duckdb/duckdb-write.hpl b/integration-tests/duckdb/duckdb-write.hpl new file mode 100644 index 00000000000..eb2aadf4ac1 --- /dev/null +++ b/integration-tests/duckdb/duckdb-write.hpl @@ -0,0 +1,156 @@ + + + + + duckdb-write + Y + + + + Normal + + + N + 1000 + 100 + - + 2023/02/20 19:03:40.339 + - + 2023/02/20 19:03:40.339 + + + + + + generate 100k rows + fake book data + Y + + + fake book data + write book data + Y + + + + fake book data + Fake + + Y + + 1 + + none + + + + + author + author + Book + + + publisher + publisher + Book + + + genre + genre + Book + + + title + title + Book + + + en + + + 320 + 80 + + + + generate 100k rows + RowGenerator + + Y + + 1 + + none + + + + + 5000 + FiveSecondsAgo + 100000 + N + now + + + 144 + 80 + + + + write book data + TableOutput + + Y + + 1 + + none + + + 1000 + duckdb + + + N + Y + N + N + + Y + + N + main + N + book_data
+ + N + Y + Y + N + + + 480 + 80 + +
+ + + +
diff --git a/integration-tests/duckdb/hop-config.json b/integration-tests/duckdb/hop-config.json new file mode 100644 index 00000000000..102ac981d58 --- /dev/null +++ b/integration-tests/duckdb/hop-config.json @@ -0,0 +1,290 @@ +{ + "variables": [ + { + "name": "HOP_LENIENT_STRING_TO_NUMBER_CONVERSION", + "value": "N", + "description": "System wide flag to allow lenient string to number conversion for backward compatibility. If this setting is set to \"Y\", an string starting with digits will be converted successfully into a number. (example: 192.168.1.1 will be converted into 192 or 192.168 or 192168 depending on the decimal and grouping symbol). The default (N) will be to throw an error if non-numeric symbols are found in the string." + }, + { + "name": "HOP_COMPATIBILITY_DB_IGNORE_TIMEZONE", + "value": "N", + "description": "System wide flag to ignore timezone while writing date/timestamp value to the database." + }, + { + "name": "HOP_LOG_SIZE_LIMIT", + "value": "0", + "description": "The log size limit for all pipelines and workflows that don't have the \"log size limit\" property set in their respective properties." + }, + { + "name": "HOP_EMPTY_STRING_DIFFERS_FROM_NULL", + "value": "N", + "description": "NULL vs Empty String. If this setting is set to Y, an empty string and null are different. Otherwise they are not." + }, + { + "name": "HOP_MAX_LOG_SIZE_IN_LINES", + "value": "0", + "description": "The maximum number of log lines that are kept internally by Hop. Set to 0 to keep all rows (default)" + }, + { + "name": "HOP_MAX_LOG_TIMEOUT_IN_MINUTES", + "value": "1440", + "description": "The maximum age (in minutes) of a log line while being kept internally by Hop. Set to 0 to keep all rows indefinitely (default)" + }, + { + "name": "HOP_MAX_WORKFLOW_TRACKER_SIZE", + "value": "5000", + "description": "The maximum number of workflow trackers kept in memory" + }, + { + "name": "HOP_MAX_ACTIONS_LOGGED", + "value": "5000", + "description": "The maximum number of action results kept in memory for logging purposes." + }, + { + "name": "HOP_MAX_LOGGING_REGISTRY_SIZE", + "value": "10000", + "description": "The maximum number of logging registry entries kept in memory for logging purposes." + }, + { + "name": "HOP_LOG_TAB_REFRESH_DELAY", + "value": "1000", + "description": "The hop log tab refresh delay." + }, + { + "name": "HOP_LOG_TAB_REFRESH_PERIOD", + "value": "1000", + "description": "The hop log tab refresh period." + }, + { + "name": "HOP_PLUGIN_CLASSES", + "value": null, + "description": "A comma delimited list of classes to scan for plugin annotations" + }, + { + "name": "HOP_PLUGIN_PACKAGES", + "value": null, + "description": "A comma delimited list of packages to scan for plugin annotations (warning: slow!!)" + }, + { + "name": "HOP_TRANSFORM_PERFORMANCE_SNAPSHOT_LIMIT", + "value": "0", + "description": "The maximum number of transform performance snapshots to keep in memory. Set to 0 to keep all snapshots indefinitely (default)" + }, + { + "name": "HOP_ROWSET_GET_TIMEOUT", + "value": "50", + "description": "The name of the variable that optionally contains an alternative rowset get timeout (in ms). This only makes a difference for extremely short lived pipelines." + }, + { + "name": "HOP_ROWSET_PUT_TIMEOUT", + "value": "50", + "description": "The name of the variable that optionally contains an alternative rowset put timeout (in ms). This only makes a difference for extremely short lived pipelines." + }, + { + "name": "HOP_CORE_TRANSFORMS_FILE", + "value": null, + "description": "The name of the project variable that will contain the alternative location of the hop-transforms.xml file. You can use this to customize the list of available internal transforms outside of the codebase." + }, + { + "name": "HOP_CORE_WORKFLOW_ACTIONS_FILE", + "value": null, + "description": "The name of the project variable that will contain the alternative location of the hop-workflow-actions.xml file." + }, + { + "name": "HOP_SERVER_OBJECT_TIMEOUT_MINUTES", + "value": "1440", + "description": "This project variable will set a time-out after which waiting, completed or stopped pipelines and workflows will be automatically cleaned up. The default value is 1440 (one day)." + }, + { + "name": "HOP_PIPELINE_PAN_JVM_EXIT_CODE", + "value": null, + "description": "Set this variable to an integer that will be returned as the Pan JVM exit code." + }, + { + "name": "HOP_DISABLE_CONSOLE_LOGGING", + "value": "N", + "description": "Set this variable to Y to disable standard Hop logging to the console. (stdout)" + }, + { + "name": "HOP_REDIRECT_STDERR", + "value": "N", + "description": "Set this variable to Y to redirect stderr to Hop logging." + }, + { + "name": "HOP_REDIRECT_STDOUT", + "value": "N", + "description": "Set this variable to Y to redirect stdout to Hop logging." + }, + { + "name": "HOP_DEFAULT_NUMBER_FORMAT", + "value": null, + "description": "The name of the variable containing an alternative default number format" + }, + { + "name": "HOP_DEFAULT_BIGNUMBER_FORMAT", + "value": null, + "description": "The name of the variable containing an alternative default bignumber format" + }, + { + "name": "HOP_DEFAULT_INTEGER_FORMAT", + "value": null, + "description": "The name of the variable containing an alternative default integer format" + }, + { + "name": "HOP_DEFAULT_DATE_FORMAT", + "value": null, + "description": "The name of the variable containing an alternative default date format" + }, + { + "name": "HOP_DEFAULT_TIMESTAMP_FORMAT", + "value": null, + "description": "The name of the variable containing an alternative default timestamp format" + }, + { + "name": "HOP_DEFAULT_SERVLET_ENCODING", + "value": null, + "description": "Defines the default encoding for servlets, leave it empty to use Java default encoding" + }, + { + "name": "HOP_FAIL_ON_LOGGING_ERROR", + "value": "N", + "description": "Set this variable to Y when you want the workflow/pipeline fail with an error when the related logging process (e.g. to a database) fails." + }, + { + "name": "HOP_AGGREGATION_MIN_NULL_IS_VALUED", + "value": "N", + "description": "Set this variable to Y to set the minimum to NULL if NULL is within an aggregate. Otherwise by default NULL is ignored by the MIN aggregate and MIN is set to the minimum value that is not NULL. See also the variable HOP_AGGREGATION_ALL_NULLS_ARE_ZERO." + }, + { + "name": "HOP_AGGREGATION_ALL_NULLS_ARE_ZERO", + "value": "N", + "description": "Set this variable to Y to return 0 when all values within an aggregate are NULL. Otherwise by default a NULL is returned when all values are NULL." + }, + { + "name": "HOP_COMPATIBILITY_TEXT_FILE_OUTPUT_APPEND_NO_HEADER", + "value": "N", + "description": "Set this variable to Y for backward compatibility for the Text File Output transform. Setting this to Ywill add no header row at all when the append option is enabled, regardless if the file is existing or not." + }, + { + "name": "HOP_PASSWORD_ENCODER_PLUGIN", + "value": "Hop", + "description": "Specifies the password encoder plugin to use by ID (Hop is the default)." + }, + { + "name": "HOP_SYSTEM_HOSTNAME", + "value": null, + "description": "You can use this variable to speed up hostname lookup. Hostname lookup is performed by Hop so that it is capable of logging the server on which a workflow or pipeline is executed." + }, + { + "name": "HOP_SERVER_JETTY_ACCEPTORS", + "value": null, + "description": "A variable to configure jetty option: acceptors for Carte" + }, + { + "name": "HOP_SERVER_JETTY_ACCEPT_QUEUE_SIZE", + "value": null, + "description": "A variable to configure jetty option: acceptQueueSize for Carte" + }, + { + "name": "HOP_SERVER_JETTY_RES_MAX_IDLE_TIME", + "value": null, + "description": "A variable to configure jetty option: lowResourcesMaxIdleTime for Carte" + }, + { + "name": "HOP_COMPATIBILITY_MERGE_ROWS_USE_REFERENCE_STREAM_WHEN_IDENTICAL", + "value": "N", + "description": "Set this variable to Y for backward compatibility for the Merge Rows (diff) transform. Setting this to Y will use the data from the reference stream (instead of the comparison stream) in case the compared rows are identical." + }, + { + "name": "HOP_SPLIT_FIELDS_REMOVE_ENCLOSURE", + "value": "false", + "description": "Set this variable to false to preserve enclosure symbol after splitting the string in the Split fields transform. Changing it to true will remove first and last enclosure symbol from the resulting string chunks." + }, + { + "name": "HOP_ALLOW_EMPTY_FIELD_NAMES_AND_TYPES", + "value": "false", + "description": "Set this variable to TRUE to allow your pipeline to pass 'null' fields and/or empty types." + }, + { + "name": "HOP_GLOBAL_LOG_VARIABLES_CLEAR_ON_EXPORT", + "value": "false", + "description": "Set this variable to false to preserve global log variables defined in pipeline / workflow Properties -> Log panel. Changing it to true will clear it when export pipeline / workflow." + }, + { + "name": "HOP_FILE_OUTPUT_MAX_STREAM_COUNT", + "value": "1024", + "description": "This project variable is used by the Text File Output transform. It defines the max number of simultaneously open files within the transform. The transform will close/reopen files as necessary to insure the max is not exceeded" + }, + { + "name": "HOP_FILE_OUTPUT_MAX_STREAM_LIFE", + "value": "0", + "description": "This project variable is used by the Text File Output transform. It defines the max number of milliseconds between flushes of files opened by the transform." + }, + { + "name": "HOP_USE_NATIVE_FILE_DIALOG", + "value": "N", + "description": "Set this value to Y if you want to use the system file open/save dialog when browsing files" + }, + { + "name": "HOP_AUTO_CREATE_CONFIG", + "value": "Y", + "description": "Set this value to N if you don't want to automatically create a hop configuration file (hop-config.json) when it's missing" + } + ], + "LocaleDefault": "en_BE", + "guiProperties": { + "FontFixedSize": "13", + "MaxUndo": "100", + "DarkMode": "Y", + "FontNoteSize": "13", + "ShowOSLook": "Y", + "FontFixedStyle": "0", + "FontNoteName": ".AppleSystemUIFont", + "FontFixedName": "Monospaced", + "FontGraphStyle": "0", + "FontDefaultSize": "13", + "GraphColorR": "255", + "FontGraphSize": "13", + "IconSize": "32", + "BackgroundColorB": "255", + "FontNoteStyle": "0", + "FontGraphName": ".AppleSystemUIFont", + "FontDefaultName": ".AppleSystemUIFont", + "GraphColorG": "255", + "UseGlobalFileBookmarks": "Y", + "FontDefaultStyle": "0", + "GraphColorB": "255", + "BackgroundColorR": "255", + "BackgroundColorG": "255", + "WorkflowDialogStyle": "RESIZE,MAX,MIN", + "LineWidth": "1", + "ContextDialogShowCategories": "Y" + }, + "projectsConfig": { + "enabled": true, + "projectMandatory": true, + "environmentMandatory": true, + "defaultProject": "default", + "defaultEnvironment": null, + "standardParentProject": "default", + "standardProjectsFolder": null, + "projectConfigurations": [ + { + "projectName": "default", + "projectHome": "${HOP_CONFIG_FOLDER}", + "configFilename": "project-config.json" + } + ], + "lifecycleEnvironments": [ + { + "name": "dev", + "purpose": "Testing", + "projectName": "default", + "configurationFiles": [ + "${PROJECT_HOME}/dev-env-config.json" + ] + } + ], + "projectLifecycles": [] + } +} \ No newline at end of file diff --git a/integration-tests/duckdb/main-0001-read-write-duckdb.hwf b/integration-tests/duckdb/main-0001-read-write-duckdb.hwf new file mode 100644 index 00000000000..9a672bd6a6b --- /dev/null +++ b/integration-tests/duckdb/main-0001-read-write-duckdb.hwf @@ -0,0 +1,159 @@ + + + + main-0001-read-write-duckdb + Y + + + + - + 2023/02/20 20:24:40.087 + - + 2023/02/20 20:24:40.087 + + + + + Start + + SPECIAL + + 1 + 12 + 60 + 0 + 0 + N + 0 + 1 + N + 50 + 50 + + + + create book_data + + SQL + + DROP TABLE IF EXISTS main.book_data; + +CREATE TABLE main.book_data +( + author VARCHAR(150) +, publisher VARCHAR(150) +, genre VARCHAR(150) +, title VARCHAR(150) +) +; + F + F + + F + duckdb + N + 192 + 48 + + + + duckdb-write.hpl + + PIPELINE + + ${PROJECT_HOME}/duckdb-write.hpl + N + N + N + N + N + + + N + N + Basic + N + Y + N + local + + Y + + N + 384 + 48 + + + + duckdb-read.hpl + + PIPELINE + + ${PROJECT_HOME}/duckdb-read.hpl + N + N + N + N + N + + + N + N + Basic + N + Y + N + local + + Y + + N + 576 + 48 + + + + + + Start + create book_data + Y + Y + Y + + + create book_data + duckdb-write.hpl + Y + Y + N + + + duckdb-write.hpl + duckdb-read.hpl + Y + Y + N + + + + + + diff --git a/integration-tests/duckdb/metadata/pipeline-run-configuration/local.json b/integration-tests/duckdb/metadata/pipeline-run-configuration/local.json new file mode 100644 index 00000000000..11a9ee03d27 --- /dev/null +++ b/integration-tests/duckdb/metadata/pipeline-run-configuration/local.json @@ -0,0 +1,21 @@ +{ + "engineRunConfiguration": { + "Local": { + "feedback_size": "50000", + "sample_size": "100", + "sample_type_in_gui": "Last", + "wait_time": "20", + "rowset_size": "10000", + "safe_mode": false, + "show_feedback": false, + "topo_sort": false, + "gather_metrics": false, + "transactional": false + } + }, + "defaultSelection": true, + "configurationVariables": [], + "name": "local", + "description": "", + "executionInfoLocationName": "Runs your pipelines locally with the standard local Hop pipeline engine" +} \ No newline at end of file diff --git a/integration-tests/duckdb/metadata/rdbms/duckdb.json b/integration-tests/duckdb/metadata/rdbms/duckdb.json new file mode 100644 index 00000000000..8a24a975589 --- /dev/null +++ b/integration-tests/duckdb/metadata/rdbms/duckdb.json @@ -0,0 +1,26 @@ +{ + "rdbms": { + "DuckDB": { + "databaseName": "${DUCKDB_DIR}", + "pluginId": "DuckDB", + "accessType": 0, + "hostname": "", + "password": "Encrypted ", + "pluginName": "DuckDB", + "port": "", + "attributes": { + "SUPPORTS_TIMESTAMP_DATA_TYPE": "N", + "QUOTE_ALL_FIELDS": "N", + "SUPPORTS_BOOLEAN_DATA_TYPE": "Y", + "FORCE_IDENTIFIERS_TO_LOWERCASE": "N", + "PRESERVE_RESERVED_WORD_CASE": "Y", + "SQL_CONNECT": "", + "FORCE_IDENTIFIERS_TO_UPPERCASE": "N", + "PREFERRED_SCHEMA_NAME": "" + }, + "manualUrl": "", + "username": "" + } + }, + "name": "duckdb" +} \ No newline at end of file diff --git a/integration-tests/duckdb/metadata/workflow-run-configuration/local.json b/integration-tests/duckdb/metadata/workflow-run-configuration/local.json new file mode 100644 index 00000000000..1d0cf74baec --- /dev/null +++ b/integration-tests/duckdb/metadata/workflow-run-configuration/local.json @@ -0,0 +1,11 @@ +{ + "engineRunConfiguration": { + "Local": { + "safe_mode": false, + "transactional": false + } + }, + "defaultSelection": true, + "name": "local", + "description": "Runs your workflows locally with the standard local Hop workflow engine" +} \ No newline at end of file diff --git a/integration-tests/duckdb/project-config.json b/integration-tests/duckdb/project-config.json new file mode 100644 index 00000000000..6a91171e1c8 --- /dev/null +++ b/integration-tests/duckdb/project-config.json @@ -0,0 +1,13 @@ +{ + "metadataBaseFolder" : "${PROJECT_HOME}/metadata", + "unitTestsBasePath" : "${PROJECT_HOME}", + "dataSetsCsvFolder" : "${PROJECT_HOME}/datasets", + "enforcingExecutionInHome" : true, + "config" : { + "variables" : [ { + "name" : "HOP_LICENSE_HEADER_FILE", + "value" : "${PROJECT_HOME}/../asf-header.txt", + "description" : "This will automatically serialize the ASF license header into pipelines and workflows in the integration test projects" + } ] + } +} \ No newline at end of file diff --git a/plugins/databases/duckdb/pom.xml b/plugins/databases/duckdb/pom.xml new file mode 100644 index 00000000000..db326431687 --- /dev/null +++ b/plugins/databases/duckdb/pom.xml @@ -0,0 +1,42 @@ + + + + + + hop-plugins-databases + org.apache.hop + 2.4.0-SNAPSHOT + + 4.0.0 + + hop-databases-duckdb + + jar + + Hop Plugins Databases DuckDB + + + + org.duckdb + duckdb_jdbc + 0.7.0 + + + + \ No newline at end of file diff --git a/plugins/databases/duckdb/src/main/java/org/apache/hop/databases/duckdb/DuckDBDatabaseMeta.java b/plugins/databases/duckdb/src/main/java/org/apache/hop/databases/duckdb/DuckDBDatabaseMeta.java new file mode 100644 index 00000000000..b14e889b032 --- /dev/null +++ b/plugins/databases/duckdb/src/main/java/org/apache/hop/databases/duckdb/DuckDBDatabaseMeta.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hop.databases.duckdb; + +import org.apache.hop.core.Const; +import org.apache.hop.core.database.BaseDatabaseMeta; +import org.apache.hop.core.database.DatabaseMeta; +import org.apache.hop.core.database.DatabaseMetaPlugin; +import org.apache.hop.core.database.IDatabase; +import org.apache.hop.core.exception.HopDatabaseException; +import org.apache.hop.core.gui.plugin.GuiPlugin; +import org.apache.hop.core.row.IValueMeta; + +@DatabaseMetaPlugin( + type="DuckDB", + typeDescription = "DuckDB", + documentationUrl = "/database/databases/apache-hive.html" +) +@GuiPlugin(id = "GUI-DuckDBDatabaseMeta") +public class DuckDBDatabaseMeta extends BaseDatabaseMeta implements IDatabase { + + private static final Class PKG = DuckDBDatabaseMeta.class; // For Translator + + @Override + public String getFieldDefinition(IValueMeta v, String tk, String pk, boolean useAutoIncrement, boolean addFieldName, boolean addCr) { + // https://duckdb.org/docs/sql/data_types/overview.html + String retval = ""; + + String fieldname = v.getName(); + int length = v.getLength(); + int precision = v.getPrecision(); + + if (addFieldName) { + retval += fieldname + " "; + } + + int type = v.getType(); + switch (type) { + case IValueMeta.TYPE_TIMESTAMP: + case IValueMeta.TYPE_DATE: + retval += "TIMESTAMP"; + break; + case IValueMeta.TYPE_BOOLEAN: + if (isSupportsBooleanDataType()) { + retval += "BOOLEAN"; + } else { + retval += "CHAR(1)"; + } + break; + case IValueMeta.TYPE_NUMBER: + case IValueMeta.TYPE_INTEGER: + case IValueMeta.TYPE_BIGNUMBER: + if (fieldname.equalsIgnoreCase(tk) + || // Technical key + fieldname.equalsIgnoreCase(pk) // Primary key + ) { + retval += "IDENTITY"; + } else { + if (length > 0) { + if (precision > 0 || length > 18) { + retval += "DECIMAL(" + length + ", " + precision + ")"; + } else { + if (length > 9) { + retval += "BIGINT"; + } else { + if (length < 5) { + if (length < 3) { + retval += "TINYINT"; + } else { + retval += "SMALLINT"; + } + } else { + retval += "INTEGER"; + } + } + } + + } else { + retval += "DOUBLE"; + } + } + break; + case IValueMeta.TYPE_STRING: + if (length >= DatabaseMeta.CLOB_LENGTH) { + retval += "TEXT"; + } else { + retval += "VARCHAR"; + if (length > 0) { + retval += "(" + length; + } else { + retval += "(" + Integer.MAX_VALUE; + } + retval += ")"; + } + break; + case IValueMeta.TYPE_BINARY: + retval += "BLOB"; + break; + default: + retval += "UNKNOWN"; + break; + } + + if (addCr) { + retval += Const.CR; + } + + return retval; + } + + @Override + public int[] getAccessTypeList() { + return new int[] {DatabaseMeta.TYPE_ACCESS_NATIVE}; + } + + @Override + public String getDriverClass() { + return "org.duckdb.DuckDBDriver"; + } + + @Override + public String getURL(String hostname, String port, String databaseName) throws HopDatabaseException { + return "jdbc:duckdb:" + databaseName; + } + + @Override + public String getAddColumnStatement(String tableName, IValueMeta v, String tk, boolean useAutoIncrement, String pk, boolean semicolon) { + return "ALTER TABLE " + + tableName + + " ADD COLUMN " + + getFieldDefinition(v, tk, pk, useAutoIncrement,true, false); + } + + @Override + public String getModifyColumnStatement(String tableName, IValueMeta v, String tk, boolean useAutoIncrement, String pk, boolean semicolon) { + return "ALTER TABLE " + + tableName + + " ALTER COLUMN " + + getFieldDefinition(v, tk, pk, useAutoIncrement,true, false); + } + + @Override + public boolean isSupportsBooleanDataType() { + return true; + } + +} diff --git a/plugins/databases/pom.xml b/plugins/databases/pom.xml index 568e746d9cd..09855743a79 100644 --- a/plugins/databases/pom.xml +++ b/plugins/databases/pom.xml @@ -104,6 +104,7 @@ clickhouse cockroachdb hive + duckdb