From f81876b7e4433ab35475b68d23b12facc4702e98 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 08:17:43 -0700 Subject: [PATCH 01/13] Update core.py --- src/fidesctl/cli/commands/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 4911b05f0f..86dc77f3e2 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -146,6 +146,7 @@ def sync(ctx: click.Context, manifests_dir: str) -> None: config = ctx.obj["CONFIG"] # Do this to validate the manifests since they won't get parsed during the sync process _parse.parse(manifests_dir) + # Check for any uncomitted files _sync.sync( url=config.cli.server_url, manifests_dir=manifests_dir, From 7ed80f2a731ac5a316da398722cd5eccc699bf21 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:14:46 +0700 Subject: [PATCH 02/13] move gitpython from a dev requirement to a full requirement --- dev-requirements.txt | 1 - requirements.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 2c1c4a4905..231fb36607 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,6 @@ black==22.3.0 ipython isort -GitPython==3.1 mypy==0.910 nox>=2022 packaging==21.3 diff --git a/requirements.txt b/requirements.txt index eacc68689e..f93c57083a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ cryptography>=36 deepdiff==5.8.0 fideslang==0.9.0 fideslog==1.1.5 +GitPython==3.1 loguru>=0.5,<0.6 openpyxl==3.0.9 pandas==1.4 From 9dac684e92503a3aa338d5ea01b138fce1ac4042 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:15:36 +0700 Subject: [PATCH 03/13] re-add GitPython as a dev-requirement since it is required by nox for builds --- dev-requirements.txt | 1 + src/fidesapi/routes/crud.py | 3 ++- src/fidesctl/cli/commands/core.py | 3 ++- src/fidesctl/core/utils.py | 16 ++++++++++++++++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 231fb36607..2c1c4a4905 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,7 @@ black==22.3.0 ipython isort +GitPython==3.1 mypy==0.910 nox>=2022 packaging==21.3 diff --git a/src/fidesapi/routes/crud.py b/src/fidesapi/routes/crud.py index 5dfa499417..0dd229208b 100644 --- a/src/fidesapi/routes/crud.py +++ b/src/fidesapi/routes/crud.py @@ -43,6 +43,7 @@ async def create( return await create_resource(sql_model, resource.dict()) @router.get("/", response_model=List[resource_model], name="List") + @router.get("", response_model=List[resource_model], name="List") async def ls( # pylint: disable=invalid-name resource_type: str = get_resource_type(router), ) -> List: @@ -50,7 +51,7 @@ async def ls( # pylint: disable=invalid-name sql_model = sql_model_map[resource_type] return await list_resource(sql_model) - @router.get("/{fides_key}", response_model=resource_model) + @router.get("/{fides_key}/", response_model=resource_model) async def get( fides_key: str, resource_type: str = get_resource_type(router) ) -> Dict: diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 86dc77f3e2..2e553dbbd6 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -13,6 +13,7 @@ from fidesctl.core import evaluate as _evaluate from fidesctl.core import parse as _parse from fidesctl.core import sync as _sync +from fidesctl.core.utils import check_if_git_is_dirty @click.command() @@ -146,7 +147,7 @@ def sync(ctx: click.Context, manifests_dir: str) -> None: config = ctx.obj["CONFIG"] # Do this to validate the manifests since they won't get parsed during the sync process _parse.parse(manifests_dir) - # Check for any uncomitted files + check_if_git_is_dirty(manifests_dir) _sync.sync( url=config.cli.server_url, manifests_dir=manifests_dir, diff --git a/src/fidesctl/core/utils.py b/src/fidesctl/core/utils.py index 21c35908cb..f5ebbaca3a 100644 --- a/src/fidesctl/core/utils.py +++ b/src/fidesctl/core/utils.py @@ -12,6 +12,7 @@ import sqlalchemy from fideslang.models import DatasetField, FidesModel from fideslang.validation import FidesValidationError +from git.repo import Repo from sqlalchemy.engine import Engine logger = logging.getLogger("server_api") @@ -119,3 +120,18 @@ def sanitize_fides_key(proposed_fides_key: str) -> str: """ sanitized_fides_key = re.sub(r"[^a-zA-Z0-9_.-]", "_", proposed_fides_key) return sanitized_fides_key + + +def check_if_git_is_dirty(dir_to_check: str = ".") -> bool: + """ + Checks to see if the local git repo is dirty. + Can also specify a directory to check for dirtiness. + """ + repo = Repo() + git_session = repo.git() + + dirty_phrases = ["Changes not staged for commit"] + git_status = git_session.status(dir_to_check).split("\n") + comparison = [phrase in git_status for phrase in dirty_phrases] + is_dirty = True in comparison + return is_dirty From 46531fb08d143d3701c979993435ea1357476386 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:16:45 +0700 Subject: [PATCH 04/13] remove accidental change --- src/fidesapi/routes/crud.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fidesapi/routes/crud.py b/src/fidesapi/routes/crud.py index 0dd229208b..5dfa499417 100644 --- a/src/fidesapi/routes/crud.py +++ b/src/fidesapi/routes/crud.py @@ -43,7 +43,6 @@ async def create( return await create_resource(sql_model, resource.dict()) @router.get("/", response_model=List[resource_model], name="List") - @router.get("", response_model=List[resource_model], name="List") async def ls( # pylint: disable=invalid-name resource_type: str = get_resource_type(router), ) -> List: @@ -51,7 +50,7 @@ async def ls( # pylint: disable=invalid-name sql_model = sql_model_map[resource_type] return await list_resource(sql_model) - @router.get("/{fides_key}/", response_model=resource_model) + @router.get("/{fides_key}", response_model=resource_model) async def get( fides_key: str, resource_type: str = get_resource_type(router) ) -> Dict: From e156cc7377a3a1f840642080dc818ae84f9cdab8 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:25:49 +0700 Subject: [PATCH 05/13] only throw an error for unstaged commits, not all uncommitted --- src/fidesctl/cli/commands/core.py | 8 +++++--- src/fidesctl/core/utils.py | 11 +++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 2e553dbbd6..70accc0d08 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -7,13 +7,13 @@ manifests_dir_argument, verbose_flag, ) -from fidesctl.cli.utils import pretty_echo, print_divider, with_analytics +from fidesctl.cli.utils import echo_red, pretty_echo, print_divider, with_analytics from fidesctl.core import apply as _apply from fidesctl.core import audit as _audit from fidesctl.core import evaluate as _evaluate from fidesctl.core import parse as _parse from fidesctl.core import sync as _sync -from fidesctl.core.utils import check_if_git_is_dirty +from fidesctl.core.utils import git_is_dirty @click.command() @@ -147,7 +147,9 @@ def sync(ctx: click.Context, manifests_dir: str) -> None: config = ctx.obj["CONFIG"] # Do this to validate the manifests since they won't get parsed during the sync process _parse.parse(manifests_dir) - check_if_git_is_dirty(manifests_dir) + if git_is_dirty(manifests_dir): + echo_red("There are unstaged changes in git, aborting sync!") + raise SystemExit(1) _sync.sync( url=config.cli.server_url, manifests_dir=manifests_dir, diff --git a/src/fidesctl/core/utils.py b/src/fidesctl/core/utils.py index f5ebbaca3a..d90f68744b 100644 --- a/src/fidesctl/core/utils.py +++ b/src/fidesctl/core/utils.py @@ -122,16 +122,15 @@ def sanitize_fides_key(proposed_fides_key: str) -> str: return sanitized_fides_key -def check_if_git_is_dirty(dir_to_check: str = ".") -> bool: +def git_is_dirty(dir_to_check: str = ".") -> bool: """ - Checks to see if the local git repo is dirty. - Can also specify a directory to check for dirtiness. + Checks to see if the local repo has unstaged changes. + Can also specify a directory to check. """ repo = Repo() git_session = repo.git() - dirty_phrases = ["Changes not staged for commit"] + dirty_phrase = "Changes not staged for commit" git_status = git_session.status(dir_to_check).split("\n") - comparison = [phrase in git_status for phrase in dirty_phrases] - is_dirty = True in comparison + is_dirty = dirty_phrase in git_status return is_dirty From b8971772d39476ae44dc18dfe4234f726b3d6f1a Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:32:52 +0700 Subject: [PATCH 06/13] add a more helpful error message --- src/fidesctl/cli/commands/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 70accc0d08..2b834bb185 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -148,7 +148,9 @@ def sync(ctx: click.Context, manifests_dir: str) -> None: # Do this to validate the manifests since they won't get parsed during the sync process _parse.parse(manifests_dir) if git_is_dirty(manifests_dir): - echo_red("There are unstaged changes in git, aborting sync!") + echo_red( + f"There are unstaged changes in your manifest directory: '{manifests_dir}' \nAborting sync!" + ) raise SystemExit(1) _sync.sync( url=config.cli.server_url, From 4901bab18bfc66e251f72f79ef5645324e615c58 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 7 Jul 2022 23:33:10 +0700 Subject: [PATCH 07/13] fix the dirty phrase to detect properly --- src/fidesctl/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fidesctl/core/utils.py b/src/fidesctl/core/utils.py index d90f68744b..bbf6f57104 100644 --- a/src/fidesctl/core/utils.py +++ b/src/fidesctl/core/utils.py @@ -130,7 +130,7 @@ def git_is_dirty(dir_to_check: str = ".") -> bool: repo = Repo() git_session = repo.git() - dirty_phrase = "Changes not staged for commit" + dirty_phrase = "Changes not staged for commit:" git_status = git_session.status(dir_to_check).split("\n") is_dirty = dirty_phrase in git_status return is_dirty From 9b2beab94f0b89e25e6af4b8dfcdb9b2a049c4df Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 00:13:49 +0700 Subject: [PATCH 08/13] inject the analytics_id env var into the container, don't write the analytics_id to the toml file if it is set as an env var --- docker-compose.yml | 1 + src/fidesctl/cli/utils.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3741e3f89d..dbb7a16173 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,7 @@ services: FIDESCTL_TEST_MODE: "True" FIDESCTL__CLI__SERVER_HOST: "fidesctl" FIDESCTL__CLI__SERVER_PORT: "8080" + FIDESCTL__CLI__ANALYTICS_ID: ${FIDESCTL__CLI__ANALYTICS_ID} FIDESCTL__API__DATABASE_HOST: "fidesctl-db" fidesctl-ui: diff --git a/src/fidesctl/cli/utils.py b/src/fidesctl/cli/utils.py index ca5332e0a5..2d6a9ad57e 100644 --- a/src/fidesctl/cli/utils.py +++ b/src/fidesctl/cli/utils.py @@ -101,11 +101,18 @@ def check_and_update_analytics_config(ctx: click.Context, config_path: str) -> N user={"analytics_opt_out": ctx.obj["CONFIG"].user.analytics_opt_out} ) - if ctx.obj["CONFIG"].user.analytics_opt_out is False and get_config_from_file( + is_analytics_opt_out = ctx.obj["CONFIG"].user.analytics_opt_out is False + is_analytics_opt_out_file_empty = get_config_from_file( config_path, "cli", "analytics_id", - ) in ("", None): + ) in ("", None) + is_analytics_opt_out_env_var_empty = getenv("FIDESCTL__CLI__ANALYTICS_ID") + if ( + is_analytics_opt_out is False + and is_analytics_opt_out_file_empty + and not is_analytics_opt_out_env_var_empty + ): config_updates.update(cli={"analytics_id": ctx.obj["CONFIG"].cli.analytics_id}) if len(config_updates) > 0: From c5c8e2e5038fa9c76172d759f2b7960252fc2dd4 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 00:19:09 +0700 Subject: [PATCH 09/13] clarify naming for opt_out vars --- src/fidesctl/cli/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fidesctl/cli/utils.py b/src/fidesctl/cli/utils.py index 2d6a9ad57e..bbd3bb6614 100644 --- a/src/fidesctl/cli/utils.py +++ b/src/fidesctl/cli/utils.py @@ -101,17 +101,17 @@ def check_and_update_analytics_config(ctx: click.Context, config_path: str) -> N user={"analytics_opt_out": ctx.obj["CONFIG"].user.analytics_opt_out} ) - is_analytics_opt_out = ctx.obj["CONFIG"].user.analytics_opt_out is False - is_analytics_opt_out_file_empty = get_config_from_file( + is_analytics_opt_out = ctx.obj["CONFIG"].user.analytics_opt_out + is_analytics_opt_out_config_empty = get_config_from_file( config_path, "cli", "analytics_id", ) in ("", None) - is_analytics_opt_out_env_var_empty = getenv("FIDESCTL__CLI__ANALYTICS_ID") + is_analytics_opt_out_env_var_set = getenv("FIDESCTL__CLI__ANALYTICS_ID") if ( - is_analytics_opt_out is False - and is_analytics_opt_out_file_empty - and not is_analytics_opt_out_env_var_empty + not is_analytics_opt_out + and is_analytics_opt_out_config_empty + and not is_analytics_opt_out_env_var_set ): config_updates.update(cli={"analytics_id": ctx.obj["CONFIG"].cli.analytics_id}) From ab2cc896798d5ec603f8848a56a240e4eedf5b9d Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 15:54:20 +0700 Subject: [PATCH 10/13] add tests for the new git_is_dirty function --- tests/core/test_utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 3934897519..877c7f4a48 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -1,4 +1,5 @@ # pylint: disable=missing-docstring, redefined-outer-name +import os from pathlib import PosixPath from typing import Generator @@ -90,3 +91,25 @@ def test_sanitize_fides_key(fides_key: str, sanitized_fides_key: str) -> None: ) def test_check_fides_key(fides_key: str, sanitized_fides_key: str) -> None: assert sanitized_fides_key == utils.check_fides_key(fides_key) + + +@pytest.mark.unit +class TestGitIsDirty: + """ + These tests can't use the standard pytest tmpdir + because the files need to be within the git repo + to be properly tested. + + They will therefore also break if the real dir + used for testing is deleted. + """ + + def test_not_dirty(self) -> None: + assert not utils.git_is_dirty("tests/data/example_sql/") + + def test_new_file_is_dirty(self) -> None: + test_file = "tests/data/example_sql/new_file.txt" + with open(test_file, "w") as file: + file.write("test file") + assert utils.git_is_dirty() + os.remove(test_file) From aa696fd454f3efedc22439f584c1182b18509db2 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 15:54:46 +0700 Subject: [PATCH 11/13] update the git_is_dirty to also catch new (untracked) files --- src/fidesctl/core/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fidesctl/core/utils.py b/src/fidesctl/core/utils.py index bbf6f57104..65be74f578 100644 --- a/src/fidesctl/core/utils.py +++ b/src/fidesctl/core/utils.py @@ -130,7 +130,7 @@ def git_is_dirty(dir_to_check: str = ".") -> bool: repo = Repo() git_session = repo.git() - dirty_phrase = "Changes not staged for commit:" + dirty_phrases = ["Changes not staged for commit:", "Untracked files:"] git_status = git_session.status(dir_to_check).split("\n") - is_dirty = dirty_phrase in git_status + is_dirty = any(phrase in git_status for phrase in dirty_phrases) return is_dirty From a3da24f3a63485eeab81a0eed67fc9778c38a1b5 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 16:19:39 +0700 Subject: [PATCH 12/13] update the changelog, fix the cli sync test --- CHANGELOG.md | 6 +++++- tests/cli/test_cli.py | 16 +++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85be55c645..2143c483bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fides/compare/1.7.0...main) ### Added + * Add datasets via YAML in the UI [#708](https://github.com/ethyca/fides/issues/708) * Add delete confirmation when deleting a field or collection from a dataset [808](https://github.com/ethyca/fides/issues/808) * Add ability to delete datasets from the UI @@ -25,9 +26,11 @@ The types of changes are: * System scanning step: AWS credentials form and initial `generate` API usage. * Added Cypress for testing [713](https://github.com/ethyca/fides/pull/833) * CustomInput type "password" with show/hide icon. +* Sync CLI command now checks for untracked/unstaged files in the manifests dir [#869](https://github.com/ethyca/fides/pull/869) * Add Okta support to the `/generate` endpoint [#842](https://github.com/ethyca/fides/pull/842) ### Changed + * Updated the `datamap` endpoint to return human-readable column names as the first response item [#779](https://github.com/ethyca/fides/pull/779) * Initial configuration wizard UI view * Refactored step & form results management to use Redux Toolkit slice. @@ -39,6 +42,7 @@ The types of changes are: * Fixed a build issue causing an `unknown` version of `fidesctl` to be installed in published Docker images [#836](https://github.com/ethyca/fides/pull/836) ### Changed + * Remove the `obscure` requirement from the `generate` endpoint [#819](https://github.com/ethyca/fides/pull/819) ## [1.7.0](https://github.com/ethyca/fides/compare/1.6.1...1.7.0) - 2022-06-23 @@ -97,7 +101,7 @@ The types of changes are: * Replaced all references to `make` with `nox` [#547](https://github.com/ethyca/fides/pull/547) * Removed config/schemas page [#613](https://github.com/ethyca/fides/issues/613) -* Dataset UI and config wizard docs added (https://github.com/ethyca/fides/pull/697) +* Dataset UI and config wizard docs added () * The fides README now walks through generating a datamap [#746](https://github.com/ethyca/fides/pull/746) ### Fixed diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 75ef7e8b00..312955e363 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -1,11 +1,11 @@ # pylint: disable=missing-docstring, redefined-outer-name import os from pathlib import PosixPath -from shutil import copytree from typing import Generator import pytest from click.testing import CliRunner +from git.repo import Repo from py._path.local import LocalPath from fidesctl.cli import cli @@ -102,13 +102,19 @@ def test_dry_diff_apply(test_config_path: str, test_cli_runner: CliRunner) -> No def test_sync( test_config_path: str, test_cli_runner: CliRunner, tmp_path: PosixPath ) -> None: - copytree("demo_resources", tmp_path, dirs_exist_ok=True) - result = test_cli_runner.invoke( - cli, ["-f", test_config_path, "sync", str(tmp_path)] - ) + """ + Due to the fact that this command checks the real git status, a pytest + tmp_dir can't be used. Consequently a real directory must be tested against + and then reset. + """ + test_dir = "demo_resources/" + result = test_cli_runner.invoke(cli, ["-f", test_config_path, "sync", test_dir]) print(result.output) assert result.exit_code == 0 + git_session = Repo().git() + git_session.checkout("HEAD", test_dir) + @pytest.mark.integration def test_audit(test_config_path: str, test_cli_runner: CliRunner) -> None: From cb02f9d0d5d918e42e52353a7eea65e814abf573 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 8 Jul 2022 16:23:10 +0700 Subject: [PATCH 13/13] update sync docstring --- src/fidesctl/cli/commands/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fidesctl/cli/commands/core.py b/src/fidesctl/cli/commands/core.py index 2b834bb185..1ba68988dd 100644 --- a/src/fidesctl/cli/commands/core.py +++ b/src/fidesctl/cli/commands/core.py @@ -142,6 +142,8 @@ def parse(ctx: click.Context, manifests_dir: str, verbose: bool = False) -> None def sync(ctx: click.Context, manifests_dir: str) -> None: """ Update local resource files by their fides_key to match their server versions. + + Aborts the sync if there are unstaged or untracked files in the manifests dir. """ config = ctx.obj["CONFIG"]