diff --git a/CHANGELOG.md b/CHANGELOG.md index 102ad029ee..fc2485d17c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ The types of changes are: * `fides` is now an alias for `fidesctl` as a CLI entrypoint [#926](https://github.com/ethyca/fides/pull/926) * Add user auth routes [929](https://github.com/ethyca/fides/pull/929) * Bump fideslib to 3.0.1 and remove patch code[931](https://github.com/ethyca/fides/pull/931) +* Add resource groups tagging api as a source of system generation [939](https://github.com/ethyca/fides/pull/939) ### Changed diff --git a/docs/fides/docs/guides/generate_resources.md b/docs/fides/docs/guides/generate_resources.md index 06b96a9830..2c85167918 100644 --- a/docs/fides/docs/guides/generate_resources.md +++ b/docs/fides/docs/guides/generate_resources.md @@ -201,7 +201,8 @@ These permissions can be supplied in an IAM policy: "Action": [ "redshift:DescribeClusters", "rds:DescribeDBInstances", - "rds:DescribeDBClusters" + "rds:DescribeDBClusters", + "tag:GetResources", ], "Resource": "*" } diff --git a/src/fidesctl/cli/commands/generate.py b/src/fidesctl/cli/commands/generate.py index 5341dad533..36604389a5 100644 --- a/src/fidesctl/cli/commands/generate.py +++ b/src/fidesctl/cli/commands/generate.py @@ -201,7 +201,7 @@ def generate_system_aws( tracked resource. Credentials can be supplied as options, a credentials reference to fidesctl config, or boto3 environment configuration. - Tracked resources: [Redshift, RDS] + Tracked resources: [Redshift, RDS, DynamoDb, S3] This is a one-time operation that does not track the state of the aws resources. It will need to be run again if the tracked resources change. diff --git a/src/fidesctl/cli/commands/scan.py b/src/fidesctl/cli/commands/scan.py index 1315b62ff5..9e6cc2fcdd 100644 --- a/src/fidesctl/cli/commands/scan.py +++ b/src/fidesctl/cli/commands/scan.py @@ -157,7 +157,7 @@ def scan_system_aws( Connect to an aws account and compares tracked resources to existing systems. Credentials can be supplied as options, a credentials reference to fidesctl config, or boto3 environment configuration. - Tracked resources: [Redshift, RDS] + Tracked resources: [Redshift, RDS, DynamoDb, S3] Outputs missing resources and has a non-zero exit if coverage is under the stated threshold. diff --git a/src/fidesctl/ctl/connectors/aws.py b/src/fidesctl/ctl/connectors/aws.py index 115f3f4234..77aa157987 100644 --- a/src/fidesctl/ctl/connectors/aws.py +++ b/src/fidesctl/ctl/connectors/aws.py @@ -58,7 +58,7 @@ def validate_credentials(aws_config: Optional[AWSConfig]) -> None: @handle_common_aws_errors def describe_redshift_clusters(client: Any) -> Dict[str, List[Dict]]: # type: ignore """ - Returns describe_clusters response given a redshift boto3 client. + Returns describe_clusters response given a 'redshift' boto3 client. """ describe_clusters = client.describe_clusters() return describe_clusters @@ -76,12 +76,26 @@ def describe_rds_clusters(client: Any) -> Dict[str, List[Dict]]: # type: ignore @handle_common_aws_errors def describe_rds_instances(client: Any) -> Dict[str, List[Dict]]: # type: ignore """ - Returns describe_db_instances response given a rds boto3 client. + Returns describe_db_instances response given a 'rds' boto3 client. """ describe_instances = client.describe_db_instances() return describe_instances +@handle_common_aws_errors +def get_tagging_resources(client: Any) -> List[str]: # type: ignore + """ + Returns a list of resource arns given a 'resourcegroupstaggingapi' boto3 client. + """ + paginator = client.get_paginator("get_resources") + found_arns = [ + resource["ResourceARN"] + for page in paginator.paginate() + for resource in page["ResourceTagMappingList"] + ] + return found_arns + + def create_redshift_systems( describe_clusters: Dict[str, List[Dict]], organization_key: str ) -> List[System]: @@ -162,3 +176,80 @@ def create_rds_systems( if not instance.get("DBClusterIdentifier") ] return rds_cluster_systems + rds_instances_systems + + +def create_resource_tagging_systems( + resource_arns: List[str], + organization_key: str, +) -> List[System]: + """ + Given a list of resource arns, build a list of systems object which represents + each resource. + """ + resource_generators = { + "dynamodb": create_tagging_dynamodb_system, + "s3": create_tagging_s3_system, + } + systems = [] + for arn in resource_arns: + arn_split = arn.split(":") + arn_resource_type = arn_split[2] + resource_generator = resource_generators.get(arn_resource_type) + if resource_generator: + generated_system = resource_generator(arn, organization_key) + if generated_system: + systems.append(generated_system) + return systems + + +def create_tagging_dynamodb_system( + arn: str, + organization_key: str, +) -> Optional[System]: + """ + Given an AWS arn for a dynamodb resource, returns a System representation + for dynamodb tables. + """ + arn_split = arn.split(":") + resource_name = arn_split[5] + + if resource_name.startswith("table/"): + table_name = resource_name[len("table/") :] + system = System( + fides_key=table_name, + name=table_name, + description=f"Fides Generated Description for DynamoDb table: {table_name}", + system_type="dynamodb_table", + organization_fides_key=organization_key, + fidesctl_meta=SystemMetadata( + resource_id=arn, + ), + privacy_declarations=[], + ) + return system + + +def create_tagging_s3_system( + arn: str, + organization_key: str, +) -> Optional[System]: + """ + Given an AWS arn for a s3 resource, returns a System representation + for s3 buckets. + """ + arn_split = arn.split(":") + resource_name = arn_split[5] + + bucket_name = resource_name.split("/")[0] + system = System( + fides_key=bucket_name, + name=bucket_name, + description=f"Fides Generated Description for S3 bucket: {bucket_name}", + system_type="s3_bucket", + organization_fides_key=organization_key, + fidesctl_meta=SystemMetadata( + resource_id=arn, + ), + privacy_declarations=[], + ) + return system diff --git a/src/fidesctl/ctl/core/system.py b/src/fidesctl/ctl/core/system.py index 3810698263..5f0e127a96 100644 --- a/src/fidesctl/ctl/core/system.py +++ b/src/fidesctl/ctl/core/system.py @@ -21,7 +21,7 @@ def generate_redshift_systems( organization_key: str, aws_config: Optional[AWSConfig] ) -> List[System]: """ - Fetches Redshift clusters from AWS and returns the transformed Sytem representations. + Fetches Redshift clusters from AWS and returns the transformed System representations. """ import fidesctl.ctl.connectors.aws as aws_connector @@ -37,7 +37,7 @@ def generate_rds_systems( organization_key: str, aws_config: Optional[AWSConfig] ) -> List[System]: """ - Fetches RDS clusters and instances from AWS and returns the transformed Sytem representations. + Fetches RDS clusters and instances from AWS and returns the transformed System representations. """ import fidesctl.ctl.connectors.aws as aws_connector @@ -52,6 +52,24 @@ def generate_rds_systems( return rds_systems +def generate_resource_tagging_systems( + organization_key: str, aws_config: Optional[AWSConfig] +) -> List[System]: + """ + Fetches AWS Resources from the resource tagging api and returns the transformed System representations. + """ + import fidesctl.ctl.connectors.aws as aws_connector + + client = aws_connector.get_aws_client( + service="resourcegroupstaggingapi", aws_config=aws_config + ) + resource_arns = aws_connector.get_tagging_resources(client=client) + resource_tagging_systems = aws_connector.create_resource_tagging_systems( + resource_arns=resource_arns, organization_key=organization_key + ) + return resource_tagging_systems + + def get_organization( organization_key: str, manifest_organizations: List[Organization], @@ -95,7 +113,11 @@ def generate_aws_systems( Returns a list of systems with any filters applied """ - generate_system_functions = [generate_redshift_systems, generate_rds_systems] + generate_system_functions = [ + generate_redshift_systems, + generate_rds_systems, + generate_resource_tagging_systems, + ] aws_systems = [ found_system @@ -136,7 +158,6 @@ def generate_system_aws( """ Connect to an aws account by leveraging a valid boto3 environment varible configuration and extract tracked resource to write a System manifest with. - Tracked resources: [Redshift, RDS] """ _check_aws_connector_import() @@ -343,7 +364,6 @@ def scan_system_aws( """ Connect to an aws account by leveraging a valid boto3 environment varible configuration and compares tracked resources to existing systems. - Tracked resources: [Redshift, RDS] """ _check_aws_connector_import() diff --git a/tests/ctl/core/test_system.py b/tests/ctl/core/test_system.py index e5c9a86608..c52f72bca6 100644 --- a/tests/ctl/core/test_system.py +++ b/tests/ctl/core/test_system.py @@ -45,12 +45,19 @@ def create_test_server_systems( @pytest.fixture(scope="function") def create_external_server_systems(test_config: FidesctlConfig) -> Generator: - systems = _system.generate_redshift_systems( - organization_key="default_organization", - aws_config={}, - ) + _system.generate_rds_systems( - organization_key="default_organization", - aws_config={}, + systems = ( + _system.generate_redshift_systems( + organization_key="default_organization", + aws_config={}, + ) + + _system.generate_rds_systems( + organization_key="default_organization", + aws_config={}, + ) + + _system.generate_resource_tagging_systems( + organization_key="default_organization", + aws_config={}, + ) ) delete_server_systems(test_config, systems) create_server_systems(test_config, systems)