Skip to content

Commit

Permalink
Added a way to pass configuration to flow cli
Browse files Browse the repository at this point in the history
  • Loading branch information
norberttech committed Oct 13, 2024
1 parent 634e2f4 commit 208bb8c
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 60 deletions.
25 changes: 24 additions & 1 deletion docs/components/cli/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
composer require flow-php/cli
```

In some cases it might make sense to install the CLI globally:
In some cases, it might make sense to install the CLI globally:

```
composer global require flow-php/cli
Expand All @@ -17,6 +17,25 @@ Now you can run the CLI using the `flow` command.

## Commands

### Config

All Flow CLI Commands can be configured using `--config` option. The option accepts a path to a configuration file in php that returns an Config or ConfigBuilder instance.

`.flow.php`

```php
<?php

use function Flow\ETL\DSL\config_builder;

return config_builder()
->id('execution-id');
```

`flow read --config .flow.php orders.csv`

One of the most common use cases is to mount custom filesystem into Flow fstab to access remote files through CLI.

```shell
$ flow
Flow PHP - Data processing framework
Expand Down Expand Up @@ -64,6 +83,7 @@ Arguments:
Options:
--file-format=FILE-FORMAT Source file format. When not set file format is guessed from source file path extension
--file-limit=FILE-LIMIT Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed
--config=CONFIG. Path to a local php file that MUST return instance of: Flow\ETL\Config
--output-pretty Pretty print schema
--output-table Pretty schema as ascii table
--schema-auto-cast[=SCHEMA-AUTO-CAST] When set Flow will try to automatically cast values to more precise data types, for example datetime strings will be casted to datetime type [default: false]
Expand Down Expand Up @@ -122,6 +142,7 @@ Options:
--file-format=FILE-FORMAT File format. When not set file format is guessed from source file path extension
--file-batch-size=FILE-BATCH-SIZE Number of rows that are going to be read and displayed in one batch, when set to -1 whole dataset will be displayed at once [default: 100]
--file-limit=FILE-LIMIT Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed
--config=CONFIG. Path to a local php file that MUST return instance of: Flow\ETL\Config
--output-truncate=OUTPUT-TRUNCATE Truncate output to given number of characters, when set to -1 output is not truncated at all [default: 20]
--schema-auto-cast[=SCHEMA-AUTO-CAST] When set Flow will try to automatically cast values to more precise data types, for example datetime strings will be casted to datetime type [default: false]
--json-pointer=JSON-POINTER JSON Pointer to a subtree from which schema should be extracted
Expand Down Expand Up @@ -160,6 +181,7 @@ Arguments:
Options:
--file-format=FILE-FORMAT Source file format. When not set file format is guessed from source file path extension
--file-limit=FILE-LIMIT Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed
--config=CONFIG. Path to a local php file that MUST return instance of: Flow\ETL\Config
--json-pointer=JSON-POINTER JSON Pointer to a subtree from which schema should be extracted
--json-pointer-entry-name When set, JSON Pointer will be used as an entry name in the schema
--csv-header[=CSV-HEADER] When set, CSV header will be used as a schema
Expand Down Expand Up @@ -222,6 +244,7 @@ Arguments:

Options:
--analyze=ANALYZE Collect processing statistics and print them. [default: false]
--config=CONFIG Path to a local php file that MUST return instance of: Flow\ETL\Config
-h, --help Display help for the given command. When no command is given display help for the list command
-q, --quiet Do not output any message
-V, --version Display this application version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ final class ParquetExtractor implements Extractor, FileExtractor, LimitableExtra
private ByteOrder $byteOrder = ByteOrder::LITTLE_ENDIAN;

/**
* @param array<string> $columns
* @var array<string>
*/
private array $columns = [];

Expand Down Expand Up @@ -48,6 +48,10 @@ public function extract(FlowContext $context) : \Generator
$fileRows = $fileData['file']->metadata()->rowsNumber();
$flowSchema = $this->schemaConverter->fromParquet($fileData['file']->schema());

if (count($this->columns)) {
$flowSchema = $flowSchema->keep(...$this->columns);
}

if ($fileOffset > $fileRows) {
$fileData['stream']->close();
$fileOffset -= $fileRows;
Expand Down
17 changes: 12 additions & 5 deletions src/cli/src/Flow/CLI/Command/FileReadCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
namespace Flow\CLI\Command;

use function Flow\CLI\{option_bool, option_int, option_int_nullable};
use function Flow\ETL\DSL\{config_builder, df};
use function Flow\ETL\DSL\{df};
use Flow\CLI\Arguments\{FilePathArgument};
use Flow\CLI\Command\Traits\{CSVExtractorOptions, JSONExtractorOptions, ParquetExtractorOptions, XMLExtractorOptions};
use Flow\CLI\Command\Traits\{
CSVExtractorOptions,
ConfigOptions,
JSONExtractorOptions,
ParquetExtractorOptions,
XMLExtractorOptions
};
use Flow\CLI\Factory\ExtractorFactory;
use Flow\CLI\Options\{FileFormat, FileFormatOption};
use Flow\CLI\Options\{ConfigOption, FileFormat, FileFormatOption};
use Flow\ETL\Formatter\AsciiTableFormatter;
use Flow\ETL\{Config, Rows};
use Flow\Filesystem\Path;
Expand All @@ -20,6 +26,7 @@

final class FileReadCommand extends Command
{
use ConfigOptions;
use CSVExtractorOptions;
use JSONExtractorOptions;
use ParquetExtractorOptions;
Expand All @@ -45,6 +52,7 @@ public function configure() : void
->addOption('output-truncate', null, InputOption::VALUE_REQUIRED, 'Truncate output to given number of characters, when set to -1 output is not truncated at all', 20)
->addOption('schema-auto-cast', null, InputOption::VALUE_OPTIONAL, 'When set Flow will try to automatically cast values to more precise data types, for example datetime strings will be casted to datetime type', false);

$this->addConfigOptions($this);
$this->addJSONOptions($this);
$this->addCSVOptions($this);
$this->addXMLOptions($this);
Expand Down Expand Up @@ -89,8 +97,7 @@ protected function execute(InputInterface $input, OutputInterface $output) : int

protected function initialize(InputInterface $input, OutputInterface $output) : void
{
$this->flowConfig = config_builder()->build();

$this->flowConfig = (new ConfigOption('config'))->get($input);
$this->sourcePath = (new FilePathArgument('file'))->getExisting($input, $this->flowConfig);
$this->fileFormat = (new FileFormatOption($this->sourcePath, 'file-format'))->get($input);
}
Expand Down
17 changes: 12 additions & 5 deletions src/cli/src/Flow/CLI/Command/FileRowsCountCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
namespace Flow\CLI\Command;

use function Flow\CLI\{option_int_nullable};
use function Flow\ETL\DSL\{config_builder, df};
use function Flow\ETL\DSL\{df};
use Flow\CLI\Arguments\{FilePathArgument};
use Flow\CLI\Command\Traits\{CSVExtractorOptions, JSONExtractorOptions, ParquetExtractorOptions, XMLExtractorOptions};
use Flow\CLI\Command\Traits\{
CSVExtractorOptions,
ConfigOptions,
JSONExtractorOptions,
ParquetExtractorOptions,
XMLExtractorOptions
};
use Flow\CLI\Factory\ExtractorFactory;
use Flow\CLI\Options\{FileFormat, FileFormatOption};
use Flow\CLI\Options\{ConfigOption, FileFormat, FileFormatOption};
use Flow\ETL\Config;
use Flow\Filesystem\Path;
use Symfony\Component\Console\Command\Command;
Expand All @@ -19,6 +25,7 @@

final class FileRowsCountCommand extends Command
{
use ConfigOptions;
use CSVExtractorOptions;
use JSONExtractorOptions;
use ParquetExtractorOptions;
Expand All @@ -39,6 +46,7 @@ public function configure() : void
->addOption('file-format', null, InputArgument::OPTIONAL, 'Source file format. When not set file format is guessed from source file path extension', null)
->addOption('file-limit', null, InputOption::VALUE_REQUIRED, 'Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed', null);

$this->addConfigOptions($this);
$this->addJSONOptions($this);
$this->addCSVOptions($this);
$this->addXMLOptions($this);
Expand All @@ -64,8 +72,7 @@ protected function execute(InputInterface $input, OutputInterface $output) : int

protected function initialize(InputInterface $input, OutputInterface $output) : void
{
$this->flowConfig = config_builder()->build();

$this->flowConfig = (new ConfigOption('config'))->get($input);
$this->sourcePath = (new FilePathArgument('file'))->getExisting($input, $this->flowConfig);
$this->fileFormat = (new FileFormatOption($this->sourcePath, 'file-format'))->get($input);
}
Expand Down
17 changes: 12 additions & 5 deletions src/cli/src/Flow/CLI/Command/FileSchemaCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
namespace Flow\CLI\Command;

use function Flow\CLI\{option_bool, option_int_nullable};
use function Flow\ETL\DSL\{config_builder, df, from_array, ref, schema_to_json, to_output};
use function Flow\ETL\DSL\{df, from_array, ref, schema_to_json, to_output};
use Flow\CLI\Arguments\{FilePathArgument};
use Flow\CLI\Command\Traits\{CSVExtractorOptions, JSONExtractorOptions, ParquetExtractorOptions, XMLExtractorOptions};
use Flow\CLI\Command\Traits\{
CSVExtractorOptions,
ConfigOptions,
JSONExtractorOptions,
ParquetExtractorOptions,
XMLExtractorOptions
};
use Flow\CLI\Factory\ExtractorFactory;
use Flow\CLI\Options\{FileFormat, FileFormatOption};
use Flow\CLI\Options\{ConfigOption, FileFormat, FileFormatOption};
use Flow\ETL\Config;
use Flow\Filesystem\Path;
use Symfony\Component\Console\Command\Command;
Expand All @@ -19,6 +25,7 @@

final class FileSchemaCommand extends Command
{
use ConfigOptions;
use CSVExtractorOptions;
use JSONExtractorOptions;
use ParquetExtractorOptions;
Expand All @@ -42,6 +49,7 @@ public function configure() : void
->addOption('output-table', null, InputOption::VALUE_NONE, 'Pretty schema as ascii table')
->addOption('schema-auto-cast', null, InputOption::VALUE_OPTIONAL, 'When set Flow will try to automatically cast values to more precise data types, for example datetime strings will be casted to datetime type', false);

$this->addConfigOptions($this);
$this->addJSONOptions($this);
$this->addCSVOptions($this);
$this->addXMLOptions($this);
Expand Down Expand Up @@ -90,8 +98,7 @@ protected function execute(InputInterface $input, OutputInterface $output) : int

protected function initialize(InputInterface $input, OutputInterface $output) : void
{
$this->flowConfig = config_builder()->build();

$this->flowConfig = (new ConfigOption('config'))->get($input);
$this->sourcePath = (new FilePathArgument('file'))->getExisting($input, $this->flowConfig);
$this->fileFormat = (new FileFormatOption($this->sourcePath, 'file-format'))->get($input);
}
Expand Down
9 changes: 7 additions & 2 deletions src/cli/src/Flow/CLI/Command/PipelineRunCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
namespace Flow\CLI\Command;

use function Flow\CLI\option_bool;
use function Flow\ETL\DSL\config_builder;
use Flow\CLI\Arguments\FilePathArgument;
use Flow\CLI\Command\Traits\ConfigOptions;
use Flow\CLI\Options\ConfigOption;
use Flow\CLI\PipelineFactory;
use Flow\ETL\Exception\{Exception};
use Flow\ETL\{Config};
Expand All @@ -18,6 +19,8 @@

final class PipelineRunCommand extends Command
{
use ConfigOptions;

private ?Config $flowConfig = null;

private ?Path $pipelinePath = null;
Expand Down Expand Up @@ -46,6 +49,8 @@ public function configure() : void
)
->addArgument('pipeline-file', InputArgument::REQUIRED, 'Path to a php/json with DataFrame definition.')
->addOption('analyze', null, InputArgument::OPTIONAL, 'Collect processing statistics and print them.', false);

$this->addConfigOptions($this);
}

public function execute(InputInterface $input, OutputInterface $output) : int
Expand Down Expand Up @@ -77,7 +82,7 @@ public function execute(InputInterface $input, OutputInterface $output) : int

protected function initialize(InputInterface $input, OutputInterface $output) : void
{
$this->flowConfig = config_builder()->build();
$this->flowConfig = (new ConfigOption('config'))->get($input);
$this->pipelinePath = (new FilePathArgument('pipeline-file'))->getExisting($input, $this->flowConfig);
}
}
18 changes: 18 additions & 0 deletions src/cli/src/Flow/CLI/Command/Traits/ConfigOptions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

declare(strict_types=1);

namespace Flow\CLI\Command\Traits;

use Flow\ETL\Config;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputOption;

trait ConfigOptions
{
private function addConfigOptions(Command $command) : void
{
$command
->addOption('config', null, InputOption::VALUE_REQUIRED, 'Path to a local php file that MUST return instance of: ' . Config::class);
}
}
49 changes: 49 additions & 0 deletions src/cli/src/Flow/CLI/Options/ConfigOption.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?php

declare(strict_types=1);

namespace Flow\CLI\Options;

use function Flow\CLI\option_string_nullable;
use function Flow\Filesystem\DSL\path_real;
use Flow\ETL\Config;
use Flow\Filesystem\Local\NativeLocalFilesystem;
use Symfony\Component\Console\Exception\InvalidArgumentException;
use Symfony\Component\Console\Input\InputInterface;

final class ConfigOption
{
public function __construct(private readonly string $optionName)
{
}

public function get(InputInterface $input) : Config
{
$configPath = option_string_nullable($this->optionName, $input);

if ($configPath === null) {
return \Flow\ETL\DSL\config();
}

$path = path_real($configPath);

$fs = new NativeLocalFilesystem();

if ($fs->status($path) === null) {
throw new InvalidArgumentException("File '{$path->path()}' does not exist.");
}

/** @psalm-suppress UnresolvableInclude */
$config = require $path->path();

if ($config instanceof Config\ConfigBuilder) {
$config = $config->build();
}

if (!$config instanceof Config) {
throw new InvalidArgumentException('File "{$path->path()}" does not return instance of "' . Config::class . '" or "' . Config\ConfigBuilder::class . '".');
}

return $config;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

declare(strict_types=1);

namespace Flow\CLI\Tests\Integration\Options;

use Flow\CLI\Options\ConfigOption;
use Flow\ETL\Adapter\Elasticsearch\Tests\Integration\TestCase;
use Flow\ETL\Config;
use Symfony\Component\Console\Exception\InvalidArgumentException;
use Symfony\Component\Console\Input\{ArrayInput, InputDefinition, InputOption};

final class ConfigOptionTest extends TestCase
{
public function test_getting_config_from_option() : void
{
$option = new InputOption('config', null, InputOption::VALUE_REQUIRED);
$definition = new InputDefinition([$option]);

$config = (new ConfigOption('config'))->get(new ArrayInput(['--config' => __DIR__ . '/Fixtures/.flow.config.php'], $definition));

self::assertInstanceOf(Config::class, $config);
self::assertSame('execution-id', $config->id());
}

public function test_getting_default_config() : void
{
$option = new InputOption('config', null, InputOption::VALUE_OPTIONAL);
$definition = new InputDefinition([$option]);

$config = (new ConfigOption('config'))->get(new ArrayInput([], $definition));

self::assertInstanceOf(Config::class, $config);
self::assertNotSame('execution-id', $config->id());
}

public function test_throwing_exception_when_config_file_does_not_exist() : void
{
$option = new InputOption('config', null, InputOption::VALUE_REQUIRED);
$definition = new InputDefinition([$option]);

$this->expectException(InvalidArgumentException::class);

(new ConfigOption('config'))->get(new ArrayInput(['--config' => 'non-existing-file.php'], $definition));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

use function Flow\ETL\DSL\config_builder;

return config_builder()
->id('execution-id');
1 change: 1 addition & 0 deletions src/lib/filesystem/src/Flow/Filesystem/Path.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ public static function realpath(string $path, array|Options $options = []) : sel
if (\is_array($urlParts) && \array_key_exists('scheme', $urlParts) && $urlParts['scheme'] !== 'file') {
return new self($path, $options);
}

$realPath = $path;

if ($realPath[0] === '~') {
Expand Down
Loading

0 comments on commit 208bb8c

Please sign in to comment.