Skip to content

Commit

Permalink
fix ABS sliced file name
Browse files Browse the repository at this point in the history
  • Loading branch information
martinjunger committed Jan 23, 2023
1 parent 44cfa96 commit 2e36539
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 33 deletions.
22 changes: 12 additions & 10 deletions src/Backend/Teradata/Helper/StorageABSHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Keboola\Db\ImportExport\Backend\Teradata\Helper;

use Keboola\Db\ImportExport\Backend\Helper\BackendHelper as BaseHelper;
use Keboola\Db\ImportExport\Storage\ABS\BaseFile;
use Keboola\Db\ImportExport\Storage\ABS\SourceFile;
use LogicException;

Expand All @@ -19,14 +20,14 @@ final class StorageABSHelper extends BaseHelper
*/
public static function getMask(SourceFile $source): string
{
throw new LogicException('TODO Not implemented yet');
/*$entries = $source->getManifestEntries();
$entries = $source->getManifestEntries();
if (count($entries) === 0) {
// no entries -> no data to load
return '';
}
// SourceDirectory returns fileName as directory/file.csv but SourceFile returns s3://bucket/directory/file.csv
$toRemove = $source->getS3Prefix() . '/';
// SourceDirectory returns fileName as directory/file.csv
// but SourceFile returns azure://myaccount...windows.net/bucket/directory/file.csv
$toRemove = $source->getContainerUrl(BaseFile::PROTOCOL_AZURE);
$entriesAsArrays = [];
$min = 99999;
$minIndex = 0;
Expand All @@ -53,7 +54,7 @@ public static function getMask(SourceFile $source): string
}
$out[$index] = $match ? $letter : '*';
}
return implode('', $out);*/
return implode('', $out);
}

public static function isMultipartFile(SourceFile $source): bool
Expand All @@ -69,25 +70,26 @@ public static function isMultipartFile(SourceFile $source): bool
}

/**
* extracts filename and prefix from s3 url - removing bucket, protocol and Fxxx suffix
* extracts filename and prefix from ABS url - removing bucket, protocol and Fxxx suffix
* @return string[]
*/
public static function buildPrefixAndObject(SourceFile $source): array
{
throw new LogicException('TODO Not implemented yet');
/*// docs say 6, but my files are created with 5
// docs say 6, but my files are created with 5
$entries = $source->getManifestEntries();
preg_match('/(?<filePath>.*)\/F(?<fileNumber>[0-9]{5,6})/', $entries[0], $out);

$filePath = $out['filePath'] ?? '';
$filePath = str_replace(($source->getS3Prefix() . '/'), '', $filePath);
// SourceDirectory returns fileName as directory/file.csv
// but SourceFile returns azure://myaccount...windows.net/bucket/directory/file.csv
$filePath = str_replace(($source->getContainerUrl(BaseFile::PROTOCOL_AZURE)), '', $filePath);

$exploded = explode('/', $filePath);
$object = end($exploded);
// get all the parts of exploded path but without the last thing - the filename
$prefix = implode('/', array_slice($exploded, 0, -1));
// prefix should end with / but only if it exists
$prefix = $prefix ? ($prefix . '/') : '';
return [$prefix, $object];*/
return [$prefix, $object];
}
}
30 changes: 13 additions & 17 deletions src/Backend/Teradata/ToStage/FromABSTPTAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use Keboola\Db\ImportExport\ImportOptions;
use Keboola\Db\ImportExport\ImportOptionsInterface;
use Keboola\Db\ImportExport\Storage;
use Keboola\FileStorage\Abs\AbsProvider;
use Keboola\FileStorage\Path\RelativePath;
use Keboola\FileStorage\S3\S3Provider;
use Keboola\TableBackendUtils\Escaping\Teradata\TeradataQuote;
Expand Down Expand Up @@ -205,35 +206,31 @@ private function generateTPTScript(
// file is abs://container/path/.../file.csv.gz/F00000
// extract real filepath from path -> remove F00000 and do just one load
// load with SPF = false
throw new LogicException('Unsupported import type - multipart');

/*[$prefix, $object] = StorageS3Helper::buildPrefixAndObject($source);
[$prefix, $object] = StorageABSHelper::buildPrefixAndObject($source);
$moduleStr = sprintf(
// phpcs:ignore
'AccessModuleInitStr = \'S3Region="%s" S3Bucket="%s" S3Prefix="%s" S3Object="%s" S3SinglePartFile=False S3ConfigDir=%s\'',
$source->getRegion(),
$source->getBucket(),
'AccessModuleInitStr = \'-ConfigDir "%s" -Container "%s" -Prefix "%s" -Object "%s" -SinglePartFile False\'',
$absConfigDir,
$source->getContainer(),
$prefix,
$object,
$absConfigDir
);*/
);
} else {
if ($source->isSliced()) {
// load with wildcard
// scenario b
throw new LogicException('Unsupported import type - sliced');

/*$mask = StorageABSHelper::getMask($source);
$path = RelativePath::createFromRootAndPath(new S3Provider(), $source->getBucket(), $mask);
$mask = StorageABSHelper::getMask($source);
$path = RelativePath::createFromRootAndPath(new AbsProvider(), $source->getContainer(), $mask);
$moduleStr = sprintf(
// phpcs:ignore
'AccessModuleInitStr = \'S3Region="%s" S3Bucket="%s" S3Prefix="%s" S3Object="%s" S3SinglePartFile=True S3ConfigDir=%s\'',
$source->getRegion(),
'AccessModuleInitStr = \'-ConfigDir "%s" -Container "%s" -Prefix "%s" -Object "%s" -SinglePartFile True\'',
$absConfigDir,
$path->getRoot(),
$path->getPathWithoutRoot() . '/',
$path->getFileName(),
$absConfigDir
);*/
);
} else {
// direct load with
// scenario a
Expand All @@ -243,9 +240,8 @@ private function generateTPTScript(
'AccessModuleInitStr = \'-ConfigDir "%s" -Container "%s" -Prefix "%s" -Object "%s" -SinglePartFile True\'',
$absConfigDir,
$source->getContainer(),
// TODO use parser
dirname($source->getFilePath()) === '.' ? '' : dirname($source->getFilePath()),
basename($source->getFilePath()),
$source->getPrefix(),
$source->getFileName(),
);
}
}
Expand Down
39 changes: 39 additions & 0 deletions src/Storage/ABS/SourceFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Keboola\Db\ImportExport\Storage\ABS;

use Exception as InternalException;
use Keboola\CsvOptions\CsvOptions;
use Keboola\Db\Import\Exception;
use Keboola\Db\ImportExport\Storage\FileNotFoundException;
Expand Down Expand Up @@ -204,4 +205,42 @@ public function isSliced(): bool
{
return $this->isSliced;
}

/**
* from path data/shared/file.csv to file.csv
*
* @throws InternalException
*/
public function getFileName(): string
{
if ($this->isSliced) {
throw new InternalException('Not supported getFileName for sliced files.');
}
$fileName = $this->filePath;
if (strrpos($fileName, '/') !== false) {
// there is dir in the path
return substr($fileName, strrpos($fileName, '/') + 1);
}
// there is no dir in the path, just the filename
return $fileName;
}

/**
* from path data/shared/file.csv to data/shared/
*
* @throws InternalException
*/
public function getPrefix(): string
{
$filePath = $this->getFilePath();
// SourceDirectory returns fileName as directory/file.csv
// but SourceFile returns azure://myaccount...windows.net/bucket/directory/file.csv
$filePath = str_replace(($this->getContainerUrl(BaseFile::PROTOCOL_AZURE)), '', $filePath);

$exploded = explode('/', $filePath);
// get all the parts of exploded path but without the last thing - the filename
$prefix = implode('/', array_slice($exploded, 0, -1));
// prefix should end with / but only if it exists
return $prefix ? ($prefix . '/') : '';
}
}
7 changes: 4 additions & 3 deletions src/Storage/ABS/TeradataExportTPTAdapter.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ private function generateTPTExportScript(
);
file_put_contents($absConfigDir . '/credentials', $credentials);

$path = $destination->getRelativePath();

$tptScript = sprintf(
/** @lang SQL */<<<EOD
USING CHARACTER SET UTF8
Expand All @@ -140,9 +142,8 @@ private function generateTPTExportScript(
EOD,
$absConfigDir,
$destination->getContainer(),
// TODO use parser
dirname($destination->getFilePath()),
basename($destination->getFilePath()) . ($exportOptions->isCompressed() ? '.gz' : ''),
$path->getPathWithoutRoot(),
$path->getFileName() . ($exportOptions->isCompressed() ? '.gz' : ''),
$exportOptions->generateABSSizeOptions(),
$source->getFromStatement()
);
Expand Down
6 changes: 3 additions & 3 deletions tests/functional/Teradata/ExportTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -430,9 +430,9 @@ public function exportOptionsProvider(): array
public function pipelineOptions(): array
{
if (getenv('STORAGE_TYPE') === 'S3') {
$generatedSliceName = '.gz/F000000';
$generatedSliceName = '/F000000';
} elseif (getenv('STORAGE_TYPE') === 'ABS') {
$generatedSliceName = '.gz/F00000';
$generatedSliceName = '/F00000';
} else {
$this->fail('Unsupported file storage in this test!');
}
Expand All @@ -441,7 +441,7 @@ public function pipelineOptions(): array
'compressed singleFile=false' => [
true, // gz
false, // use SinglePartFile
$generatedSliceName, // generated file name based on ^^
'.gz' . $generatedSliceName, // generated file name based on ^^
],
'compressed singleFile=true' => [
true,
Expand Down

0 comments on commit 2e36539

Please sign in to comment.