Skip to content

Commit

Permalink
stan, CS, docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jirkasemmler committed Apr 11, 2022
1 parent fce4fa7 commit e1471b9
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 98 deletions.
8 changes: 1 addition & 7 deletions src/Backend/Teradata/ToStage/FromS3TPTAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,7 @@ public function runCopyCommand(
private function getLogData(Temp $temp): string
{
if (file_exists($temp->getTmpFolder() . '/import-1.out')) {

$data = file_get_contents($temp->getTmpFolder() . '/import-1.out') ?: 'unable to get error';
file_put_contents(__DIR__ . 'out.txt', $data);
file_put_contents('/code/out.txt', $data);
return $data;
return file_get_contents($temp->getTmpFolder() . '/import-1.out') ?: 'unable to get error';
}

return 'unable to get error';
Expand Down Expand Up @@ -286,8 +282,6 @@ private function generateTPTScript(
EOD;

file_put_contents($folder . '/import_vars.txt', $jobVariableFile);
file_put_contents('/code/import_vars.txt', $jobVariableFile);
file_put_contents('/code/import_script.tpt', $jobVariableFile);

return [
$temp,
Expand Down
18 changes: 8 additions & 10 deletions src/Storage/S3/TeradataExportTPTAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,15 @@ public function runCopyCommand(
$process->wait();

// debug stuff
foreach ($process as $type => $data) {
if ($process::OUT === $type) {
echo "\nRead from stdout: " . $data;
} else { // $process::ERR === $type
echo "\nRead from stderr: " . $data;
}
}
// foreach ($process as $type => $data) {
// if ($process::OUT === $type) {
// echo "\nRead from stdout: " . $data;
// } else { // $process::ERR === $type
// echo "\nRead from stderr: " . $data;
// }
// }

if ($process->getExitCode() !== 0) {

throw new FailedTPTLoadException(
$process->getErrorOutput(),
$process->getOutput(),
Expand All @@ -83,7 +82,7 @@ public function runCopyCommand(
/**
* generates params to run TPT script
*
* @return array
* @return array{Temp, array<int, string>}
*/
private function generateTPTExportScript(
Storage\SourceInterface $source,
Expand Down Expand Up @@ -161,7 +160,6 @@ private function generateTPTExportScript(
EOD,
...$exportOptions->getTeradataCredentials(),
...$exportOptions->getTeradataCredentials(),

);
$temp = new Temp();
$temp->initRunFolder();
Expand Down
14 changes: 8 additions & 6 deletions src/Storage/Teradata/TeradataExportOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TeradataExportOptions extends ExportOptions

private int $teradataPort;

// TD settings for sliced files
private string $bufferSize;

private string $maxObjectSize;
Expand Down Expand Up @@ -54,17 +55,11 @@ public function __construct(
$this->singlePartFile = $singlePartFile;
}

/**
* @return string
*/
public function getBufferSize(): string
{
return $this->bufferSize;
}

/**
* @return string
*/
public function getMaxObjectSize(): string
{
return $this->maxObjectSize;
Expand All @@ -90,6 +85,9 @@ public function getTeradataPort(): int
return $this->teradataPort;
}

/**
* @return array<int, string>
*/
public function getTeradataCredentials(): array
{
return [
Expand All @@ -99,6 +97,10 @@ public function getTeradataCredentials(): array
];
}

/**
* generates part of TPT which defines settings for sliced files
* @return string
*/
public function generateS3SizeOptions(): string
{
return sprintf(
Expand Down
176 changes: 101 additions & 75 deletions tests/functional/Teradata/ExportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use Keboola\Db\ImportExport\ImportOptions;
use Keboola\Db\ImportExport\Storage;
use Keboola\Db\ImportExport\Storage\S3;
use Keboola\Db\ImportExport\Storage\Teradata\Table;
use Keboola\TableBackendUtils\Escaping\Teradata\TeradataQuote;
use Keboola\TableBackendUtils\Table\Teradata\TeradataTableDefinition;
use Keboola\TableBackendUtils\Table\Teradata\TeradataTableQueryBuilder;
Expand All @@ -24,71 +25,6 @@ class ExportTest extends TeradataBaseTestCase
{
private const EXPORT_DIR = 'teradata_test_export';

public function exportOptionsProvider(): array
{
/* MOS = MaxObjectSize ; BS = BufferSize
* MOS say the max size of the target slice
* BUT!!!
* - BS can be MIN 5M
* - At least 1 Buffer has to be written to each object -> when MOS < BS then MOS is ignored and each slice is of BS size
* - BS doesn't have to fill whole MOS, size of the file is BS * n where n={1,2,3,4...}. so when MOS=12, BS=5 => file will have 10M
*/
return
[
// buffer can fit just once in the MOS
'buffer 6M, max 8M, split, not single' => [
[
'6M',
'8M',
], // options
[
['fileName' => 'F00000', 'size' => 6],
['fileName' => 'F00001', 'size' => 6],
['fileName' => 'F00002', 'size' => 0],
], // expected files
],
// buffer can fit twice in MOS -> object has 10M
'buffer 5, max 11m, split, not single' => [
[
'5M',
'11M',
], // options
[
['fileName' => 'F00000', 'size' => 10],
['fileName' => 'F00001', 'size' => 0],
], // expected files
],
// MOS is smaller than min buffer size -> MOS is ignored and parts are of buffer size
'buffer 5M, max 33k, split, not single' => [
[
'5M',
'33k',
], // options
[
['fileName' => 'F00000', 'size' => 5],
['fileName' => 'F00001', 'size' => 5],
['fileName' => 'F00002', 'size' => 5],
['fileName' => 'F00003', 'size' => 0],
], // expected files
],
// whole file can it in the object
'buffer 5M, max 100M, split, single' => [
[
'5M',
'100M',
], // options
[
['fileName' => 'F00000', 'size' => 0],
], // expected files
],
'default' => [
[], // options, default is 8M and 8G
[
['fileName' => 'F00000', 'size' => 0],
], // expected files
],
];
}

public function setUp(): void
{
Expand Down Expand Up @@ -129,8 +65,9 @@ public function testExportGzip(): void
$schema = $this->getDestinationDbName();
$this->initTable(self::BIGGER_TABLE);
$file = new CsvFile(self::DATA_DIR . 'big_table.csv');
/** @var S3\SourceFile $source */
$source = $this->getSourceInstance('big_table.csv', $file->getHeader());
$destination = new Storage\Teradata\Table(
$destination = new Table(
$schema,
self::BIGGER_TABLE
);
Expand All @@ -150,6 +87,7 @@ public function testExportGzip(): void
$exportOptions
);

/** @var array<int, array> $files */
$files = $this->listFiles($this->getExportDir());
self::assertNotNull($files);
self::assertCount(1, $files);
Expand All @@ -159,15 +97,20 @@ public function testExportGzip(): void

/**
* @dataProvider exportOptionsProvider
* @param string[] $providedExportOptions
* @param array[] $expectedFiles
* @throws \Doctrine\DBAL\Exception
* @throws \Keboola\Csv\InvalidArgumentException
*/
public function testExportOptionsForSlicing($providedExportOptions, $expectedFiles): void
public function testExportOptionsForSlicing(array $providedExportOptions, array $expectedFiles): void
{
// import
$schema = $this->getDestinationDbName();
$this->initTable(self::BIGGER_TABLE);
$file = new CsvFile(self::DATA_DIR . 'big_table.csv');
/** @var S3\SourceFile $source */
$source = $this->getSourceInstance('big_table.csv', $file->getHeader());
$destination = new Storage\Teradata\Table(
$destination = new Table(
$schema,
self::BIGGER_TABLE
);
Expand All @@ -186,28 +129,37 @@ public function testExportOptionsForSlicing($providedExportOptions, $expectedFil
$exportOptions
);

/** @var array<int, array> $files */
$files = $this->listFiles($this->getExportDir());
self::assertFilesMatch($expectedFiles, $files);
}

public static function assertFilesMatch($expectedFiles, $files): void
/**
* @param array<int, array> $expectedFiles
* @param array<int, array> $files
*/
public static function assertFilesMatch(array $expectedFiles, array $files): void
{
self::assertCount(count($expectedFiles), $files);
foreach ($expectedFiles as $i => $expectedFile) {
$actualFile = $files[$i];
self::assertContains($expectedFile['fileName'], $actualFile['Key']);
$fileSize = (int) $actualFile['Size'];
$expectedFileSize = ((int) $expectedFile['size']) * 1024 * 1024;
// check that the file size is in range xMB +- 10 000B (because I cannot really say what the exact size in bytes should be)
// check that the file size is in range xMB +- 10 000B
// - (because I cannot really say what the exact size in bytes should be)
// the size of the last file is ignored
if ($expectedFileSize !== 0) {
self::assertTrue(($expectedFileSize - 10000) < $fileSize && $fileSize < ($expectedFileSize + 10000), sprintf("Actual size is %s but expected is %s", $fileSize, $expectedFileSize));
self::assertTrue(
($expectedFileSize - 10000) < $fileSize && $fileSize < ($expectedFileSize + 10000),
sprintf('Actual size is %s but expected is %s', $fileSize, $expectedFileSize)
);
}
}
}

/**
* @param Storage\Teradata\Table $destination
* @param Table $destinationTable
* @param S3\SourceFile|S3\SourceDirectory $source
* @param TeradataImportOptions $options
* @param int $repeatImport - dupliate data in staging table -> able to create a big table
Expand All @@ -220,6 +172,7 @@ private function importTable(
int $repeatImport = 0
): void {
$importer = new ToStageImporter($this->connection);
/** @var Table $destinationTable */
$destinationRef = new TeradataTableReflection(
$this->connection,
$destinationTable->getSchema(),
Expand All @@ -242,6 +195,7 @@ private function importTable(
$options
);

// re-insert inserted data -> make the table BIIIG
for ($i = 0; $i < $repeatImport; $i++) {
$this->connection->executeStatement(sprintf(
'INSERT INTO %s.%s SELECT * FROM %s.%s',
Expand All @@ -266,8 +220,9 @@ public function testExportSimple(): void
// import
$this->initTable(self::TABLE_OUT_CSV_2COLS);
$file = new CsvFile(self::DATA_DIR . 'with-ts.csv');
/** @var S3\SourceFile $source */
$source = $this->getSourceInstance('with-ts.csv', $file->getHeader());
$destination = new Storage\Teradata\Table(
$destination = new Table(
$this->getDestinationDbName(),
'out_csv_2Cols'
);
Expand Down Expand Up @@ -297,16 +252,16 @@ public function testExportSimple(): void
1 // skip header
);
$this->assertCsvFilesSame($expected, $actual);

}

public function testExportSimpleWithQuery(): void
{
// import
$this->initTable(self::TABLE_ACCOUNTS_3);
$file = new CsvFile(self::DATA_DIR . 'tw_accounts.csv');
/** @var S3\SourceFile $source */
$source = $this->getSourceInstance('tw_accounts.csv', $file->getHeader());
$destination = new Storage\Teradata\Table(
$destination = new Table(
$this->getDestinationDbName(),
'accounts-3'
);
Expand Down Expand Up @@ -344,4 +299,75 @@ public function testExportSimpleWithQuery(): void
);
$this->assertCsvFilesSame($expected, $actual);
}

/**
* @return array[]
*/
public function exportOptionsProvider(): array
{
/* MOS = MaxObjectSize ; BS = BufferSize
* MOS say the max size of the target slice
* BUT!!!
* - BS can be MIN 5M
* - At least 1 Buffer has to be written to each object
* -> when MOS < BS then MOS is ignored and each slice is of BS size
* - BS doesn't have to fill whole MOS, size of the file is BS * n where n={1,2,3,4...}.
* So when MOS=12, BS=5 => file will have 10M
*/
return
[
// buffer can fit just once in the MOS
'buffer 6M, max 8M, split, not single' => [
[
'6M',
'8M',
], // options
[
['fileName' => 'F00000', 'size' => 6],
['fileName' => 'F00001', 'size' => 6],
['fileName' => 'F00002', 'size' => 0],
], // expected files
],
// buffer can fit twice in MOS -> object has 10M
'buffer 5, max 11m, split, not single' => [
[
'5M',
'11M',
], // options
[
['fileName' => 'F00000', 'size' => 10],
['fileName' => 'F00001', 'size' => 0],
], // expected files
],
// MOS is smaller than min buffer size -> MOS is ignored and parts are of buffer size
'buffer 5M, max 33k, split, not single' => [
[
'5M',
'33k',
], // options
[
['fileName' => 'F00000', 'size' => 5],
['fileName' => 'F00001', 'size' => 5],
['fileName' => 'F00002', 'size' => 5],
['fileName' => 'F00003', 'size' => 0],
], // expected files
],
// whole file can it in the object
'buffer 5M, max 100M, split, single' => [
[
'5M',
'100M',
], // options
[
['fileName' => 'F00000', 'size' => 0],
], // expected files
],
'default' => [
[], // options, default is 8M and 8G
[
['fileName' => 'F00000', 'size' => 0],
], // expected files
],
];
}
}

0 comments on commit e1471b9

Please sign in to comment.