Skip to content

Commit

Permalink
Merge pull request #94 from keboola/jirka-kbc-2394-full-load
Browse files Browse the repository at this point in the history
KBC-2394 full load
  • Loading branch information
zajca authored Mar 31, 2022
2 parents cd026ab + 87c705b commit 0b13a8c
Show file tree
Hide file tree
Showing 32 changed files with 2,911 additions and 13 deletions.
5 changes: 5 additions & 0 deletions .env.dist
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ EXASOL_PASSWORD=exasol

# ID of build used in CI only
BUILD_PREFIX=

TERADATA_HOST=
TERADATA_USERNAME=
TERADATA_PASSWORD=
TERADATA_PORT=
31 changes: 31 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
FROM quay.io/keboola/aws-cli
ARG AWS_SECRET_ACCESS_KEY
ARG AWS_ACCESS_KEY_ID
RUN /usr/bin/aws s3 cp s3://keboola-drivers/teradata/tdodbc1710-17.10.00.08-1.x86_64.deb /tmp/teradata/tdodbc.deb
RUN /usr/bin/aws s3 cp s3://keboola-drivers/teradata/utils/TeradataToolsAndUtilitiesBase__ubuntu_x8664.17.00.34.00.tar.gz /tmp/teradata/tdutils.tar.gz

FROM php:7.4-cli

ARG COMPOSER_FLAGS="--prefer-dist --no-interaction"
Expand Down Expand Up @@ -89,6 +95,31 @@ RUN set -ex; \
echo "\n[exasol]\nDriver=/opt/exasol/libexaodbc-uo2214lv2.so\n" >> /etc/odbcinst.ini;\
rm -rf /tmp/exasol;

# Teradata ODBC
COPY --from=0 /tmp/teradata/tdodbc.deb /tmp/teradata/tdodbc.deb
COPY docker/teradata/odbc.ini /tmp/teradata/odbc_td.ini
COPY docker/teradata/odbcinst.ini /tmp/teradata/odbcinst_td.ini

RUN dpkg -i /tmp/teradata/tdodbc.deb \
&& cat /tmp/teradata/odbc_td.ini >> /etc/odbc.ini \
&& cat /tmp/teradata/odbcinst_td.ini >> /etc/odbcinst.ini \
&& rm -r /tmp/teradata \
&& docker-php-ext-configure pdo_odbc --with-pdo-odbc=unixODBC,/usr \
&& docker-php-ext-install pdo_odbc

ENV ODBCHOME = /opt/teradata/client/ODBC_64/
ENV ODBCINI = /opt/teradata/client/ODBC_64/odbc.ini
ENV ODBCINST = /opt/teradata/client/ODBC_64/odbcinst.ini
ENV LD_LIBRARY_PATH = /opt/teradata/client/ODBC_64/lib

# Teradata Utils
COPY --from=0 /tmp/teradata/tdutils.tar.gz /tmp/teradata/tdutils.tar.gz
RUN cd /tmp/teradata \
&& tar -xvaf tdutils.tar.gz \
&& sh /tmp/teradata/TeradataToolsAndUtilitiesBase/.setup.sh tptbase s3axsmod \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/teradata

#php odbc
RUN docker-php-ext-configure pdo_odbc --with-pdo-odbc=unixODBC,/usr \
&& docker-php-ext-install pdo_odbc
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@

## Development

### Docker

Prepare `.env` (copy of `.env.dist`) and set up AWS keys which has access to `keboola-drivers` bucket in order to build this image. Then run `docker-compose --env-file=.env.local build`

The AWS credentials have to also have access to bucket specified in `AWS_S3_BUCKET`. This bucket has to contain testing data. Run `docker-compose run --rm dev composer loadS3` to load them up.


### Preparation

#### Azure
Expand Down Expand Up @@ -96,6 +103,15 @@ EXASOL_PASSWORD=

Obtain host (with port), username and password from Exasol SaaS for your testing DB and fill it in `.env` as desribed above. Make sure, that your account has enabled network for your IP.

#### Teradata
```bash
TERADATA_HOST=
TERADATA_USERNAME=
TERADATA_PASSWORD=JirkaTdPassword+
TERADATA_PORT=
```


### Tests

Run tests with following command.
Expand Down
12 changes: 10 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ stages:
displayName: Build and test
jobs:
- job: Build
timeoutInMinutes: 100
timeoutInMinutes: 200
displayName: Build
pool:
vmImage: 'ubuntu-latest'
Expand Down Expand Up @@ -71,6 +71,9 @@ stages:
- script: |
docker-compose build --pull production
displayName: 'Build project images'
env:
AWS_ACCESS_KEY_ID: $(DRIVERS_AWS_ACCESS_KEY_ID)
AWS_SECRET_ACCESS_KEY: $(DRIVERS_AWS_SECRET_ACCESS_KEY)
- script: |
docker-compose run production php -v
parallel -j12 --linebuffer docker-compose run production composer ::: \
Expand Down Expand Up @@ -137,7 +140,8 @@ stages:
tests-synapse-heap4000temp-hash \
tests-synapse-heap4000temp-optimized \
tests-synapse-heap4000temp-optimized-hash \
tests-synapse-next
tests-synapse-next \
tests-teradata
PARALLEL_EXIT_CODE=$?
cat /tmp/parallel-joblog
sleep 1
Expand Down Expand Up @@ -166,6 +170,10 @@ stages:
EXASOL_HOST: $(EXASOL_CLUSTER_DNS)
EXASOL_USERNAME: devel
EXASOL_PASSWORD: $(EXA_SAAS_TOKEN)
TERADATA_HOST: $(TERADATA_HOST)
TERADATA_PASSWORD: $(TERADATA_PASSWORD)
TERADATA_PORT: $(TERADATA_PORT)
TERADATA_USERNAME: $(TERADATA_USERNAME)
- script: |
docker-compose stop
php ./provisioning/cli.php app:delete:synapse \
Expand Down
12 changes: 7 additions & 5 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
"keboola/php-csv-db-import": "^5.0",
"keboola/php-file-storage-utils": ">=0.2",
"keboola/table-backend-utils": "^0.18",
"microsoft/azure-storage-blob": "^1.4"
"keboola/php-temp": "^1.0",
"microsoft/azure-storage-blob": "^1.4",
"symfony/process": "^4.4|^5.0"
},
"require-dev": {
"phpstan/phpstan": "^0.12.54",
"phpstan/phpstan-phpunit": "^0.12.16",
"keboola/coding-standard": "^9.0",
"php-parallel-lint/php-parallel-lint": "^1.2",
"phpstan/extension-installer": "^1.0",
"keboola/datadir-tests": "^2.0",
"keboola/php-temp": "^1.0",
"symfony/process": "^4.4|^5.0"
"keboola/datadir-tests": "^2.0"
},
"autoload": {
"psr-4": {
Expand Down Expand Up @@ -49,6 +49,7 @@
"tests-synapse-clusterdindextemp": "SUITE=tests-synapse-clusterdindextemp CREDENTIALS_IMPORT_TYPE=SAS CREDENTIALS_EXPORT_TYPE=MASTER_KEY TEMP_TABLE_TYPE=CLUSTERED_INDEX DEDUP_TYPE=TMP_TABLE phpunit --colors=always --testsuite tests-synapse-clusterdindextemp",
"tests-synapse-mi": "SUITE=tests-synapse-mi CREDENTIALS_IMPORT_TYPE=MANAGED_IDENTITY CREDENTIALS_EXPORT_TYPE=MANAGED_IDENTITY TEMP_TABLE_TYPE=HEAP DEDUP_TYPE=TMP_TABLE phpunit --colors=always --testsuite synapse-mi",
"tests-exasol": "SUITE=tests-exasol STORAGE_TYPE=S3 phpunit --colors=always --testsuite exasol",
"tests-teradata": "SUITE=tests-teradata STORAGE_TYPE=S3 phpunit --colors=always --testsuite tests-teradata",
"tests-functional": [
"@tests-snowflake-abs",
"@tests-snowflake-s3",
Expand All @@ -58,7 +59,8 @@
"@tests-synapse-clusterdindextemp",
"@tests-synapse-heap4000temp",
"@tests-synapse-heap4000temp-optimized",
"@tests-exasol"
"@tests-exasol",
"@tests-teradata"
],
"tests": [
"@tests-unit",
Expand Down
10 changes: 9 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
version: '3'
services:
production: &prod
build: .
build:
context: .
args:
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
environment:
- ABS_ACCOUNT_NAME
- ABS_ACCOUNT_KEY
Expand All @@ -24,6 +28,10 @@ services:
- EXASOL_USERNAME
- EXASOL_PASSWORD
- BUILD_PREFIX
- TERADATA_HOST
- TERADATA_USERNAME
- TERADATA_PASSWORD
- TERADATA_PORT
dev: &dev
<<: *prod
image: keboola/php-db-import-export
Expand Down
14 changes: 14 additions & 0 deletions docker/teradata/odbc.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[ODBC]
InstallDir=/opt/teradata/client/ODBC_64
Trace=0
TraceDll=/opt/teradata/client/ODBC_64/lib/odbctrac.so
TraceFile=/usr/odbcusr/trace.log
TraceAutoStop=0

[Teradata]
Driver = /opt/teradata/client/ODBC_64/lib/tdataodbc_sb64.so
UsageCount = 2
APILevel = CORE
ConnectFunctions = YYY
DriverODBCVer = 3.51
SQLLevel = 1
9 changes: 9 additions & 0 deletions docker/teradata/odbcinst.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[ODBC DRIVERS]
Teradata=Installed

[Teradata]
Driver=/opt/teradata/client/ODBC_64/lib/tdataodbc_sb64.so
APILevel=CORE
ConnectFunctions=YYY
DriverODBCVer=3.51
SQLLevel=1
4 changes: 4 additions & 0 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
<exclude>tests/functional/Synapse/SqlCommandBuilderTest.php</exclude>
</testsuite>

<testsuite name="tests-teradata">
<directory>tests/functional/Teradata</directory>
</testsuite>

<testsuite name="unit">
<directory>tests/unit</directory>
</testsuite>
Expand Down
67 changes: 67 additions & 0 deletions src/Backend/Teradata/Helper/BackendHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php

declare(strict_types=1);

namespace Keboola\Db\ImportExport\Backend\Teradata\Helper;

use Keboola\Db\ImportExport\Storage\S3\SourceFile;

final class BackendHelper
{
public static function generateTempTableName(): string
{
return '__temp_' . str_replace('.', '_', uniqid('csvimport', true));
}

public static function generateTempDedupTableName(): string
{
return '__temp_DEDUP_' . str_replace('.', '_', uniqid('csvimport', true));
}

/**
* creates a wildcard string which should match all files in manifest
* [file01.csv, file01.csv] => file0*
* TODO
* - has to fix edgecases a) [1_file.csv, 2_file.csv] b) not all the files matched in WC have to be on s3
* @param SourceFile $source
* @return string
* @throws \Keboola\Db\Import\Exception
*/
public static function getMask(SourceFile $source): string
{
$entries = $source->getManifestEntries();
if (count($entries) === 0) {
// no entries -> no data to load
return '';
}
// SourceDirectory returns fileName as directory/file.csv but SourceFile returns s3://bucket/directory/file.csv
$toRemove = $source->getS3Prefix() . '/';
$entriesAsArrays = [];
$min = 99999;
$minIndex = 0;
foreach ($entries as $i => $entry) {
$entry = str_replace($toRemove, '', $entry);
$asArray = str_split($entry);
$entriesAsArrays[] = $asArray;
$thisSize = count($asArray);
if ($thisSize < $min) {
$min = $thisSize;
$minIndex = $i;
}
}
$out = [];

foreach ($entriesAsArrays[$minIndex] as $index => $letter) {
$match = true;

foreach ($entriesAsArrays as $fileName) {
if ($fileName[$index] !== $letter) {
$match = false;
break;
}
}
$out[$index] = $match ? $letter : '*';
}
return implode('', $out);
}
}
17 changes: 17 additions & 0 deletions src/Backend/Teradata/TeradataException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

declare(strict_types=1);

namespace Keboola\Db\ImportExport\Backend\Teradata;

use Keboola\Db\Import\Exception;

class TeradataException extends Exception
{

public static function covertException(\Doctrine\DBAL\Exception $e): \Throwable
{
// TODO
return $e;
}
}
63 changes: 63 additions & 0 deletions src/Backend/Teradata/TeradataImportOptions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

declare(strict_types=1);

namespace Keboola\Db\ImportExport\Backend\Teradata;

use Keboola\Db\ImportExport\ImportOptions;

class TeradataImportOptions extends ImportOptions
{
private string $teradataHost;

private string $teradataUser;

private string $teradataPassword;

private int $teradataPort;

/**
* @param string[] $convertEmptyValuesToNull
*/
public function __construct(
string $teradataHost,
string $teradataUser,
string $teradataPassword,
int $teradataPort,
array $convertEmptyValuesToNull = [],
bool $isIncremental = false,
bool $useTimestamp = false,
int $numberOfIgnoredLines = 0
) {
parent::__construct(
$convertEmptyValuesToNull,
$isIncremental,
$useTimestamp,
$numberOfIgnoredLines
);
$this->teradataHost = $teradataHost;
$this->teradataUser = $teradataUser;
$this->teradataPassword = $teradataPassword;
$this->teradataPort = $teradataPort;
}

public function getTeradataHost(): string
{
return $this->teradataHost;
}

public function getTeradataUser(): string
{
return $this->teradataUser;
}

public function getTeradataPassword(): string
{
return $this->teradataPassword;
}

public function getTeradataPort(): int
{
return $this->teradataPort;
}
}
Loading

0 comments on commit 0b13a8c

Please sign in to comment.