From 3dd07cda3f14c2a1131d18a004370c6aeb5d842f Mon Sep 17 00:00:00 2001 From: Willi Date: Tue, 10 Sep 2024 17:26:58 +0530 Subject: [PATCH 1/9] Masks secrets in traces. --- dlt/pipeline/trace.py | 10 +++++++--- tests/pipeline/test_pipeline_trace.py | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dlt/pipeline/trace.py b/dlt/pipeline/trace.py index 2f857e5fd5..8f4a79303c 100644 --- a/dlt/pipeline/trace.py +++ b/dlt/pipeline/trace.py @@ -35,6 +35,7 @@ TRACE_ENGINE_VERSION = 1 TRACE_FILE_NAME = "trace.pickle" +MASKED_SECRET = "*****" # @dataclasses.dataclass(init=True) @@ -54,7 +55,10 @@ def asdict(self) -> StrAny: return {k: v for k, v in self._asdict().items() if k not in ("value", "default_value")} def asstr(self, verbosity: int = 0) -> str: - return f"{self.key}->{self.value} in {'.'.join(self.sections)} by {self.provider_name}" + return ( + f"{self.key}->{MASKED_SECRET if self.is_secret_hint else self.value } in" + f" {'.'.join(self.sections)} by {self.provider_name}" + ) def __str__(self) -> str: return self.asstr(verbosity=0) @@ -280,8 +284,8 @@ def end_trace_step( resolved_values = map( lambda v: SerializableResolvedValueTrace( v.key, - v.value, - v.default_value, + MASKED_SECRET if is_secret_hint(v.hint) else v.value, + MASKED_SECRET if is_secret_hint(v.hint) else v.default_value, is_secret_hint(v.hint), v.sections, v.provider_name, diff --git a/tests/pipeline/test_pipeline_trace.py b/tests/pipeline/test_pipeline_trace.py index 9c12519a89..039cdfc1f3 100644 --- a/tests/pipeline/test_pipeline_trace.py +++ b/tests/pipeline/test_pipeline_trace.py @@ -28,6 +28,7 @@ PipelineTrace, SerializableResolvedValueTrace, load_trace, + MASKED_SECRET, ) from dlt.pipeline.track import slack_notify_load_success from dlt.extract import DltResource, DltSource @@ -115,16 +116,19 @@ def data(): assert resolved.is_secret_hint is False assert resolved.default_value is None assert resolved.provider_name == "config.toml" - # dictionaries are not returned anymore + # dictionaries are not returned anymore, secrets are masked resolved = _find_resolved_value(trace.resolved_config_values, "credentials", []) assert resolved is None or isinstance(resolved.value, str) resolved = _find_resolved_value(trace.resolved_config_values, "secret_value", []) assert resolved.is_secret_hint is True - assert resolved.value == "2137" - assert resolved.default_value == "123" + assert resolved.value != "2137" + assert resolved.value == MASKED_SECRET, "credential is not masked" + assert resolved.default_value != "123" + assert resolved.default_value == MASKED_SECRET, "credential is not masked" resolved = _find_resolved_value(trace.resolved_config_values, "credentials", ["databricks"]) assert resolved.is_secret_hint is True - assert resolved.value == databricks_creds + assert resolved.value != databricks_creds + assert resolved.value == MASKED_SECRET, "credential is not masked" assert_trace_serializable(trace) # activate pipeline because other was running in assert trace p.activate() From 317c8105a77948d92849e82e40958992a82192eb Mon Sep 17 00:00:00 2001 From: Willi Date: Tue, 10 Sep 2024 18:53:01 +0530 Subject: [PATCH 2/9] tests that secrets are masked in stringified trace --- tests/pipeline/test_pipeline_trace.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/pipeline/test_pipeline_trace.py b/tests/pipeline/test_pipeline_trace.py index 039cdfc1f3..b7e1a5ef46 100644 --- a/tests/pipeline/test_pipeline_trace.py +++ b/tests/pipeline/test_pipeline_trace.py @@ -130,6 +130,13 @@ def data(): assert resolved.value != databricks_creds assert resolved.value == MASKED_SECRET, "credential is not masked" assert_trace_serializable(trace) + + trace_string = trace.asstr(verbosity=1) + assert "2137" not in trace_string + assert databricks_creds not in trace_string + assert f"credentials->{MASKED_SECRET}" in trace_string + assert f"secret_value->{MASKED_SECRET}" in trace_string + # activate pipeline because other was running in assert trace p.activate() From 57aa97cd30258dcfbd71c97cc6666a6e8ac792e6 Mon Sep 17 00:00:00 2001 From: Willi Date: Tue, 10 Sep 2024 19:36:50 +0530 Subject: [PATCH 3/9] generates secrets in deployments from dlt.secrets provider instead of pipeline trace --- dlt/cli/deploy_command_helpers.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index 5fe46415dd..86c91e93af 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -14,7 +14,7 @@ import dlt from dlt.common import git -from dlt.common.configuration.exceptions import LookupTrace +from dlt.common.configuration.exceptions import LookupTrace, ConfigFieldMissingException from dlt.common.configuration.providers import ConfigTomlProvider, EnvironProvider from dlt.common.git import get_origin, get_repo, Repo from dlt.common.configuration.specs.run_configuration import get_default_pipeline_name @@ -201,8 +201,14 @@ def _echo_secrets(self) -> None: for s_v in self.secret_envs: fmt.secho("Name:", fg="green") fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections))) - fmt.secho("Secret:", fg="green") - fmt.echo(s_v.value) + try: + secret_value = dlt.secrets[self.env_prov.get_key_name(s_v.key, *s_v.sections)] + fmt.secho("Secret:", fg="green") + fmt.echo(secret_value) + except ConfigFieldMissingException: + fmt.secho( + "Not found. See https://dlthub.com/docs/general-usage/credentials", fg="red" + ) fmt.echo() def _echo_envs(self) -> None: From 6ae763c2188a569cf83e3b1b9526050940760708 Mon Sep 17 00:00:00 2001 From: Willi Date: Wed, 11 Sep 2024 15:55:58 +0530 Subject: [PATCH 4/9] corrects masking and looks up secret value in dlt.secrets --- dlt/cli/deploy_command_helpers.py | 6 ++++-- dlt/common/configuration/providers/environ.py | 4 ++++ dlt/pipeline/trace.py | 17 ++++++++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index 86c91e93af..e8d56f4fac 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -202,15 +202,17 @@ def _echo_secrets(self) -> None: fmt.secho("Name:", fg="green") fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections))) try: - secret_value = dlt.secrets[self.env_prov.get_key_name(s_v.key, *s_v.sections)] fmt.secho("Secret:", fg="green") - fmt.echo(secret_value) + fmt.echo(self._lookup_secret_value(s_v)) except ConfigFieldMissingException: fmt.secho( "Not found. See https://dlthub.com/docs/general-usage/credentials", fg="red" ) fmt.echo() + def _lookup_secret_value(self, trace: LookupTrace) -> Any: + return dlt.secrets[self.env_prov.get_secret_key_name(trace.key, *trace.sections)] + def _echo_envs(self) -> None: for v in self.envs: fmt.secho("Name:", fg="green") diff --git a/dlt/common/configuration/providers/environ.py b/dlt/common/configuration/providers/environ.py index f83ea9a24d..c9580ab33b 100644 --- a/dlt/common/configuration/providers/environ.py +++ b/dlt/common/configuration/providers/environ.py @@ -14,6 +14,10 @@ class EnvironProvider(ConfigProvider): def get_key_name(key: str, *sections: str) -> str: return get_key_name(key, "__", *sections).upper() + @staticmethod + def get_secret_key_name(key: str, *sections: str) -> str: + return get_key_name(key, ".", *sections) + @property def name(self) -> str: return "Environment Variables" diff --git a/dlt/pipeline/trace.py b/dlt/pipeline/trace.py index 8f4a79303c..d81af8bbdb 100644 --- a/dlt/pipeline/trace.py +++ b/dlt/pipeline/trace.py @@ -3,14 +3,14 @@ import os import pickle import datetime # noqa: 251 -from typing import Any, List, NamedTuple, Optional, Protocol, Sequence +from typing import Any, List, NamedTuple, Optional, Protocol, Sequence, Union import humanize from dlt.common.pendulum import pendulum from dlt.common.configuration import is_secret_hint from dlt.common.configuration.exceptions import ContextDefaultCannotBeCreated from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.configuration.utils import _RESOLVED_TRACES +from dlt.common.configuration.utils import _RESOLVED_TRACES, ResolvedValueTrace from dlt.common.configuration.container import Container from dlt.common.exceptions import ExceptionTrace, ResourceNameNotAvailable from dlt.common.logger import suppress_and_warn @@ -56,7 +56,7 @@ def asdict(self) -> StrAny: def asstr(self, verbosity: int = 0) -> str: return ( - f"{self.key}->{MASKED_SECRET if self.is_secret_hint else self.value } in" + f"{self.key}->{_mask_secret(self.value) if self.is_secret_hint else self.value } in" f" {'.'.join(self.sections)} by {self.provider_name}" ) @@ -284,8 +284,8 @@ def end_trace_step( resolved_values = map( lambda v: SerializableResolvedValueTrace( v.key, - MASKED_SECRET if is_secret_hint(v.hint) else v.value, - MASKED_SECRET if is_secret_hint(v.hint) else v.default_value, + _mask_secret(v.value) if is_secret_hint(v.hint) else v.value, + _mask_secret(v.default_value) if is_secret_hint(v.hint) else v.default_value, is_secret_hint(v.hint), v.sections, v.provider_name, @@ -302,6 +302,13 @@ def end_trace_step( return trace +def _mask_secret(trace_item_value: Any) -> Any: + if isinstance(trace_item_value, dict): + return {k: MASKED_SECRET for k in trace_item_value} + else: + return MASKED_SECRET + + def end_trace( trace: PipelineTrace, pipeline: SupportsPipeline, trace_path: str, send_state: bool ) -> PipelineTrace: From acf64042152ede5b7f6877cd4cb22dbcfe987722 Mon Sep 17 00:00:00 2001 From: Willi Date: Wed, 11 Sep 2024 19:22:38 +0530 Subject: [PATCH 5/9] removes secret masking and replaces credentials with None. --- dlt/cli/deploy_command_helpers.py | 5 +++-- dlt/common/configuration/providers/environ.py | 4 ---- dlt/pipeline/trace.py | 17 +++-------------- tests/pipeline/test_pipeline_trace.py | 16 +++------------- 4 files changed, 9 insertions(+), 33 deletions(-) diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index e8d56f4fac..66df243af1 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -16,6 +16,7 @@ from dlt.common import git from dlt.common.configuration.exceptions import LookupTrace, ConfigFieldMissingException from dlt.common.configuration.providers import ConfigTomlProvider, EnvironProvider +from dlt.common.configuration.providers.toml import BaseDocProvider from dlt.common.git import get_origin, get_repo, Repo from dlt.common.configuration.specs.run_configuration import get_default_pipeline_name from dlt.common.typing import StrAny @@ -200,7 +201,7 @@ def _update_envs(self, trace: PipelineTrace) -> None: def _echo_secrets(self) -> None: for s_v in self.secret_envs: fmt.secho("Name:", fg="green") - fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections))) + fmt.echo(fmt.bold(BaseDocProvider.get_key_name(s_v.key, *s_v.sections))) try: fmt.secho("Secret:", fg="green") fmt.echo(self._lookup_secret_value(s_v)) @@ -211,7 +212,7 @@ def _echo_secrets(self) -> None: fmt.echo() def _lookup_secret_value(self, trace: LookupTrace) -> Any: - return dlt.secrets[self.env_prov.get_secret_key_name(trace.key, *trace.sections)] + return dlt.secrets[BaseDocProvider.get_key_name(trace.key, *trace.sections)] def _echo_envs(self) -> None: for v in self.envs: diff --git a/dlt/common/configuration/providers/environ.py b/dlt/common/configuration/providers/environ.py index c9580ab33b..f83ea9a24d 100644 --- a/dlt/common/configuration/providers/environ.py +++ b/dlt/common/configuration/providers/environ.py @@ -14,10 +14,6 @@ class EnvironProvider(ConfigProvider): def get_key_name(key: str, *sections: str) -> str: return get_key_name(key, "__", *sections).upper() - @staticmethod - def get_secret_key_name(key: str, *sections: str) -> str: - return get_key_name(key, ".", *sections) - @property def name(self) -> str: return "Environment Variables" diff --git a/dlt/pipeline/trace.py b/dlt/pipeline/trace.py index d81af8bbdb..c47926e5f4 100644 --- a/dlt/pipeline/trace.py +++ b/dlt/pipeline/trace.py @@ -35,7 +35,6 @@ TRACE_ENGINE_VERSION = 1 TRACE_FILE_NAME = "trace.pickle" -MASKED_SECRET = "*****" # @dataclasses.dataclass(init=True) @@ -55,10 +54,7 @@ def asdict(self) -> StrAny: return {k: v for k, v in self._asdict().items() if k not in ("value", "default_value")} def asstr(self, verbosity: int = 0) -> str: - return ( - f"{self.key}->{_mask_secret(self.value) if self.is_secret_hint else self.value } in" - f" {'.'.join(self.sections)} by {self.provider_name}" - ) + return f"{self.key}->{self.value} in {'.'.join(self.sections)} by {self.provider_name}" def __str__(self) -> str: return self.asstr(verbosity=0) @@ -284,8 +280,8 @@ def end_trace_step( resolved_values = map( lambda v: SerializableResolvedValueTrace( v.key, - _mask_secret(v.value) if is_secret_hint(v.hint) else v.value, - _mask_secret(v.default_value) if is_secret_hint(v.hint) else v.default_value, + None if is_secret_hint(v.hint) else v.value, + None if is_secret_hint(v.hint) else v.default_value, is_secret_hint(v.hint), v.sections, v.provider_name, @@ -302,13 +298,6 @@ def end_trace_step( return trace -def _mask_secret(trace_item_value: Any) -> Any: - if isinstance(trace_item_value, dict): - return {k: MASKED_SECRET for k in trace_item_value} - else: - return MASKED_SECRET - - def end_trace( trace: PipelineTrace, pipeline: SupportsPipeline, trace_path: str, send_state: bool ) -> PipelineTrace: diff --git a/tests/pipeline/test_pipeline_trace.py b/tests/pipeline/test_pipeline_trace.py index b7e1a5ef46..433913851f 100644 --- a/tests/pipeline/test_pipeline_trace.py +++ b/tests/pipeline/test_pipeline_trace.py @@ -28,7 +28,6 @@ PipelineTrace, SerializableResolvedValueTrace, load_trace, - MASKED_SECRET, ) from dlt.pipeline.track import slack_notify_load_success from dlt.extract import DltResource, DltSource @@ -121,22 +120,13 @@ def data(): assert resolved is None or isinstance(resolved.value, str) resolved = _find_resolved_value(trace.resolved_config_values, "secret_value", []) assert resolved.is_secret_hint is True - assert resolved.value != "2137" - assert resolved.value == MASKED_SECRET, "credential is not masked" - assert resolved.default_value != "123" - assert resolved.default_value == MASKED_SECRET, "credential is not masked" + assert resolved.value is None, "Credential is not masked" + assert resolved.default_value is None, "Credential is not masked" resolved = _find_resolved_value(trace.resolved_config_values, "credentials", ["databricks"]) assert resolved.is_secret_hint is True - assert resolved.value != databricks_creds - assert resolved.value == MASKED_SECRET, "credential is not masked" + assert resolved.value is None, "Credential is not masked" assert_trace_serializable(trace) - trace_string = trace.asstr(verbosity=1) - assert "2137" not in trace_string - assert databricks_creds not in trace_string - assert f"credentials->{MASKED_SECRET}" in trace_string - assert f"secret_value->{MASKED_SECRET}" in trace_string - # activate pipeline because other was running in assert trace p.activate() From f2f22e06568db2ca67be27b1e2c78b4cb7bfc2b9 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 14 Sep 2024 13:06:26 +0200 Subject: [PATCH 6/9] fixes deploy help when deploy type missing --- dlt/cli/_dlt.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py index 7513ce7c64..0a4a86b9de 100644 --- a/dlt/cli/_dlt.py +++ b/dlt/cli/_dlt.py @@ -617,13 +617,17 @@ def main() -> int: elif args.command == "deploy": try: deploy_args = vars(args) - return deploy_command_wrapper( - pipeline_script_path=deploy_args.pop("pipeline_script_path"), - deployment_method=deploy_args.pop("deployment_method"), - repo_location=deploy_args.pop("location"), - branch=deploy_args.pop("branch"), - **deploy_args, - ) + if deploy_args.get("deployment_method") is None: + print_help(deploy_cmd) + return -1 + else: + return deploy_command_wrapper( + pipeline_script_path=deploy_args.pop("pipeline_script_path"), + deployment_method=deploy_args.pop("deployment_method"), + repo_location=deploy_args.pop("location"), + branch=deploy_args.pop("branch"), + **deploy_args, + ) except (NameError, KeyError): fmt.warning( "Please install additional command line dependencies to use deploy command:" From ccf7c2c7d11ceca688c6fd41368c264ecfcd95bc Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 14 Sep 2024 13:06:53 +0200 Subject: [PATCH 7/9] fixes always_choose restore defaults in echo --- dlt/cli/echo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dlt/cli/echo.py b/dlt/cli/echo.py index bd9cf24f64..302b74b076 100644 --- a/dlt/cli/echo.py +++ b/dlt/cli/echo.py @@ -15,9 +15,11 @@ def always_choose(always_choose_default: bool, always_choose_value: Any) -> Iter _always_choose_value = ALWAYS_CHOOSE_VALUE ALWAYS_CHOOSE_DEFAULT = always_choose_default ALWAYS_CHOOSE_VALUE = always_choose_value - yield - ALWAYS_CHOOSE_DEFAULT = _always_choose_default - ALWAYS_CHOOSE_VALUE = _always_choose_value + try: + yield + finally: + ALWAYS_CHOOSE_DEFAULT = _always_choose_default + ALWAYS_CHOOSE_VALUE = _always_choose_value echo = click.echo From fc056b551f661939090d0e65ea25613faac51129 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 14 Sep 2024 13:07:14 +0200 Subject: [PATCH 8/9] tests deploy command with and without secrets --- dlt/cli/deploy_command_helpers.py | 18 +++++++--- tests/cli/test_deploy_command.py | 52 ++++++++++++++++------------- tests/common/configuration/utils.py | 17 ++-------- tests/utils.py | 24 ++++++++++++- 4 files changed, 69 insertions(+), 42 deletions(-) diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index 66df243af1..e931353153 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -199,16 +199,26 @@ def _update_envs(self, trace: PipelineTrace) -> None: # fmt.echo(f"{resolved_value.key} in {resolved_value.sections} moved to CONFIG") def _echo_secrets(self) -> None: + display_info = False for s_v in self.secret_envs: fmt.secho("Name:", fg="green") - fmt.echo(fmt.bold(BaseDocProvider.get_key_name(s_v.key, *s_v.sections))) + fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections))) try: fmt.secho("Secret:", fg="green") fmt.echo(self._lookup_secret_value(s_v)) except ConfigFieldMissingException: - fmt.secho( - "Not found. See https://dlthub.com/docs/general-usage/credentials", fg="red" - ) + fmt.secho("Not found in the available secrets.", fg="red") + display_info = True + fmt.echo() + if display_info: + fmt.warning( + "We could not read and display some secrets. Starting from 1.0 version of dlt," + " those are not stored in the traces. Instead we are trying to read them from the" + " available configuration ie. secrets.toml file. Please run the deploy command from" + " the same working directory you ran your pipeline script. If you pass the" + " credentials in code we will not be able to display them here. See" + " https://dlthub.com/docs/general-usage/credentials" + ) fmt.echo() def _lookup_secret_value(self, trace: LookupTrace) -> Any: diff --git a/tests/cli/test_deploy_command.py b/tests/cli/test_deploy_command.py index 685921ca6e..1a4ad3fe74 100644 --- a/tests/cli/test_deploy_command.py +++ b/tests/cli/test_deploy_command.py @@ -19,7 +19,7 @@ from dlt.pipeline.exceptions import CannotRestorePipelineException from dlt.cli.deploy_command_helpers import get_schedule_description -from tests.utils import TEST_STORAGE_ROOT, test_storage +from tests.utils import TEST_STORAGE_ROOT, reset_providers, test_storage DEPLOY_PARAMS = [ @@ -134,28 +134,34 @@ def test_deploy_command( test_storage.delete(".dlt/secrets.toml") test_storage.atomic_rename(".dlt/secrets.toml.ci", ".dlt/secrets.toml") - # this time script will run - venv.run_script("debug_pipeline.py") - with echo.always_choose(False, always_choose_value=True): - with io.StringIO() as buf, contextlib.redirect_stdout(buf): - deploy_command.deploy_command( - "debug_pipeline.py", - deployment_method, - deploy_command.COMMAND_DEPLOY_REPO_LOCATION, - **deployment_args - ) - _out = buf.getvalue() - print(_out) - # make sure our secret and config values are all present - assert "api_key_9x3ehash" in _out - assert "dlt_data" in _out - if "schedule" in deployment_args: - assert get_schedule_description(deployment_args["schedule"]) - secrets_format = deployment_args.get("secrets_format", "env") - if secrets_format == "env": - assert "API_KEY" in _out - else: - assert "api_key = " in _out + # reset toml providers to (1) CWD (2) non existing dir so API_KEY is not found + for project_dir, api_key in [ + (None, "api_key_9x3ehash"), + (".", "Not found in the available secrets"), + ]: + with reset_providers(project_dir=project_dir): + # this time script will run + venv.run_script("debug_pipeline.py") + with echo.always_choose(False, always_choose_value=True): + with io.StringIO() as buf, contextlib.redirect_stdout(buf): + deploy_command.deploy_command( + "debug_pipeline.py", + deployment_method, + deploy_command.COMMAND_DEPLOY_REPO_LOCATION, + **deployment_args + ) + _out = buf.getvalue() + print(_out) + # make sure our secret and config values are all present + assert api_key in _out + assert "dlt_data" in _out + if "schedule" in deployment_args: + assert get_schedule_description(deployment_args["schedule"]) + secrets_format = deployment_args.get("secrets_format", "env") + if secrets_format == "env": + assert "API_KEY" in _out + else: + assert "api_key = " in _out # non existing script name with pytest.raises(NoSuchPathError): diff --git a/tests/common/configuration/utils.py b/tests/common/configuration/utils.py index d7a46e9084..677ec3d329 100644 --- a/tests/common/configuration/utils.py +++ b/tests/common/configuration/utils.py @@ -20,15 +20,11 @@ from dlt.common.configuration import configspec from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration from dlt.common.configuration.container import Container -from dlt.common.configuration.providers import ( - ConfigProvider, - EnvironProvider, - ConfigTomlProvider, - SecretsTomlProvider, -) +from dlt.common.configuration.providers import ConfigProvider, EnvironProvider from dlt.common.configuration.utils import get_resolved_traces from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext from dlt.common.typing import TSecretValue, StrAny +from tests.utils import _reset_providers @configspec @@ -118,14 +114,7 @@ def env_provider() -> Iterator[ConfigProvider]: @pytest.fixture def toml_providers() -> Iterator[ConfigProvidersContext]: - pipeline_root = "./tests/common/cases/configuration/.dlt" - ctx = ConfigProvidersContext() - ctx.providers.clear() - ctx.add_provider(EnvironProvider()) - ctx.add_provider(SecretsTomlProvider(project_dir=pipeline_root)) - ctx.add_provider(ConfigTomlProvider(project_dir=pipeline_root)) - with Container().injectable_context(ctx): - yield ctx + yield from _reset_providers("./tests/common/cases/configuration/.dlt") class MockProvider(ConfigProvider): diff --git a/tests/utils.py b/tests/utils.py index e90ac5a626..813deea69f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,4 @@ +import contextlib import multiprocessing import os import platform @@ -12,7 +13,12 @@ import dlt from dlt.common.configuration.container import Container -from dlt.common.configuration.providers import DictionaryProvider +from dlt.common.configuration.providers import ( + DictionaryProvider, + EnvironProvider, + SecretsTomlProvider, + ConfigTomlProvider, +) from dlt.common.configuration.resolve import resolve_configuration from dlt.common.configuration.specs import RunConfiguration from dlt.common.configuration.specs.config_providers_context import ( @@ -382,3 +388,19 @@ def assert_query_data( # the second is load id if info: assert row[1] in info.loads_ids + + +@contextlib.contextmanager +def reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]: + """Context manager injecting standard set of providers where toml providers are initialized from `project_dir`""" + return _reset_providers(project_dir) + + +def _reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]: + ctx = ConfigProvidersContext() + ctx.providers.clear() + ctx.add_provider(EnvironProvider()) + ctx.add_provider(SecretsTomlProvider(project_dir=project_dir)) + ctx.add_provider(ConfigTomlProvider(project_dir=project_dir)) + with Container().injectable_context(ctx): + yield ctx From 9d34a76196ea7749f0b11aedab1160ed44ca6c82 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Sat, 14 Sep 2024 14:39:14 +0200 Subject: [PATCH 9/9] fixes dumping secret vals for toml in deploy --- dlt/cli/deploy_command.py | 9 ++----- dlt/cli/deploy_command_helpers.py | 40 +++++++++++++++++++++++-------- tests/cli/test_deploy_command.py | 2 +- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/dlt/cli/deploy_command.py b/dlt/cli/deploy_command.py index 5a25752a6d..b48dffa881 100644 --- a/dlt/cli/deploy_command.py +++ b/dlt/cli/deploy_command.py @@ -4,7 +4,7 @@ from enum import Enum from importlib.metadata import version as pkg_version -from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY, StringTomlProvider +from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY from dlt.common.configuration.paths import make_dlt_settings_path from dlt.common.configuration.utils import serialize_value from dlt.common.git import is_dirty @@ -393,12 +393,7 @@ def _echo_instructions(self, *args: Optional[Any]) -> None: f" {SECRETS_TOML_KEY} variable." ) fmt.echo() - toml_provider = StringTomlProvider("") - for s_v in self.secret_envs: - toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections) - for s_v in self.envs: - toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections) - fmt.echo(toml_provider.dumps()) + self._echo_secrets_toml() else: raise ValueError(self.secrets_format) diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py index e931353153..2afbfbf46e 100644 --- a/dlt/cli/deploy_command_helpers.py +++ b/dlt/cli/deploy_command_helpers.py @@ -16,7 +16,7 @@ from dlt.common import git from dlt.common.configuration.exceptions import LookupTrace, ConfigFieldMissingException from dlt.common.configuration.providers import ConfigTomlProvider, EnvironProvider -from dlt.common.configuration.providers.toml import BaseDocProvider +from dlt.common.configuration.providers.toml import BaseDocProvider, StringTomlProvider from dlt.common.git import get_origin, get_repo, Repo from dlt.common.configuration.specs.run_configuration import get_default_pipeline_name from dlt.common.typing import StrAny @@ -207,20 +207,40 @@ def _echo_secrets(self) -> None: fmt.secho("Secret:", fg="green") fmt.echo(self._lookup_secret_value(s_v)) except ConfigFieldMissingException: - fmt.secho("Not found in the available secrets.", fg="red") + fmt.secho("please set me up!", fg="red") display_info = True fmt.echo() if display_info: - fmt.warning( - "We could not read and display some secrets. Starting from 1.0 version of dlt," - " those are not stored in the traces. Instead we are trying to read them from the" - " available configuration ie. secrets.toml file. Please run the deploy command from" - " the same working directory you ran your pipeline script. If you pass the" - " credentials in code we will not be able to display them here. See" - " https://dlthub.com/docs/general-usage/credentials" - ) + self._display_missing_secret_info() + fmt.echo() + + def _echo_secrets_toml(self) -> None: + display_info = False + toml_provider = StringTomlProvider("") + for s_v in self.secret_envs: + try: + secret_value = self._lookup_secret_value(s_v) + except ConfigFieldMissingException: + secret_value = "please set me up!" + display_info = True + toml_provider.set_value(s_v.key, secret_value, None, *s_v.sections) + for s_v in self.envs: + toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections) + fmt.echo(toml_provider.dumps()) + if display_info: + self._display_missing_secret_info() fmt.echo() + def _display_missing_secret_info(self) -> None: + fmt.warning( + "We could not read and display some secrets. Starting from 1.0 version of dlt," + " those are not stored in the traces. Instead we are trying to read them from the" + " available configuration ie. secrets.toml file. Please run the deploy command from" + " the same working directory you ran your pipeline script. If you pass the" + " credentials in code we will not be able to display them here. See" + " https://dlthub.com/docs/general-usage/credentials" + ) + def _lookup_secret_value(self, trace: LookupTrace) -> Any: return dlt.secrets[BaseDocProvider.get_key_name(trace.key, *trace.sections)] diff --git a/tests/cli/test_deploy_command.py b/tests/cli/test_deploy_command.py index 1a4ad3fe74..78a14ee914 100644 --- a/tests/cli/test_deploy_command.py +++ b/tests/cli/test_deploy_command.py @@ -137,7 +137,7 @@ def test_deploy_command( # reset toml providers to (1) CWD (2) non existing dir so API_KEY is not found for project_dir, api_key in [ (None, "api_key_9x3ehash"), - (".", "Not found in the available secrets"), + (".", "please set me up!"), ]: with reset_providers(project_dir=project_dir): # this time script will run