diff --git a/meta/runtime.yml b/meta/runtime.yml index bea339f5826..10b917dcf9c 100644 --- a/meta/runtime.yml +++ b/meta/runtime.yml @@ -29,6 +29,7 @@ action_groups: - aws_eks_cluster - aws_elasticbeanstalk_app - aws_glue_connection + - aws_glue_crawler - aws_glue_job - aws_inspector_target - aws_kms diff --git a/plugins/modules/aws_glue_crawler.py b/plugins/modules/aws_glue_crawler.py new file mode 100644 index 00000000000..11ff1b63304 --- /dev/null +++ b/plugins/modules/aws_glue_crawler.py @@ -0,0 +1,428 @@ +#!/usr/bin/python +# Copyright: (c) 2018, Rob White (@wimnat) +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + + +DOCUMENTATION = r''' +--- +module: aws_glue_crawler +version_added: 4.1.0 +short_description: Manage an AWS Glue crawler +description: + - Manage an AWS Glue crawler. See U(https://aws.amazon.com/glue/) for details. +author: + - 'Ivan Chekaldin (@ichekaldin)' +options: + database_name: + description: + - The name of the database where results are written. + type: str + description: + description: + - Description of the crawler being defined. + type: str + name: + description: + - The name you assign to this crawler definition. It must be unique in your account. + required: true + type: str + recrawl_policy: + description: + - A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run. + suboptions: + recrawl_behavior: + description: + - Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. + - Supported options are C(CRAWL_EVERYTHING) and C(CRAWL_NEW_FOLDERS_ONLY). + type: str + type: dict + role: + description: + - The name or ARN of the IAM role associated with this crawler. + - Required when I(state=present). + type: str + schema_change_policy: + description: + - The policy for the crawler's update and deletion behavior. + suboptions: + delete_behavior: + description: + - Defines the deletion behavior when the crawler finds a deleted object. + - Supported options are C(LOG), C(DELETE_FROM_DATABASE), and C(DEPRECATE_IN_DATABASE). + type: str + update_behavior: + description: + - Defines the update behavior when the crawler finds a changed schema.. + - Supported options are C(LOG) and C(UPDATE_IN_DATABASE). + type: str + type: dict + state: + description: + - Create or delete the AWS Glue crawler. + required: true + choices: [ 'present', 'absent' ] + type: str + table_prefix: + description: + - The table prefix used for catalog tables that are created. + type: str + targets: + description: + - A list of targets to crawl. See example below. + - Required when I(state=present). + type: dict +extends_documentation_fragment: +- amazon.aws.aws +- amazon.aws.ec2 +- amazon.aws.tags +''' + +EXAMPLES = r''' +# Note: These examples do not set authentication details, see the AWS Guide for details. + +# Create an AWS Glue crawler +- community.aws.aws_glue_crawler: + name: my-glue-crawler + database_name: my_database + role: my-iam-role + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + recrawl_policy: + recrawl_ehavior: CRAWL_EVERYTHING + targets: + S3Targets: + - Path: "s3://my-bucket/prefix/folder/" + ConnectionName: my-connection + Exclusions: + - "**.json" + - "**.yml" + state: present + +# Delete an AWS Glue crawler +- community.aws.aws_glue_crawler: + name: my-glue-crawler + state: absent + +''' + +RETURN = r''' +creation_time: + description: The time and date that this crawler definition was created. + returned: when state is present + type: str + sample: '2021-04-01T05:19:58.326000+00:00' +database_name: + description: The name of the database where results are written. + returned: when state is present + type: str + sample: my_table +description: + description: Description of the crawler. + returned: when state is present + type: str + sample: My crawler +last_updated: + description: The time and date that this crawler definition was last updated. + returned: when state is present + type: str + sample: '2021-04-01T05:19:58.326000+00:00' +name: + description: The name of the AWS Glue crawler. + returned: always + type: str + sample: my-glue-crawler +recrawl_policy: + description: A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run. + returned: when state is present + type: complex + contains: + RecrawlBehavior: + description: Whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run. + returned: when state is present + type: str + sample: CRAWL_EVERYTHING +role: + description: The name or ARN of the IAM role associated with this crawler. + returned: when state is present + type: str + sample: my-iam-role +schema_change_policy: + description: The policy for the crawler's update and deletion behavior. + returned: when state is present + type: complex + contains: + DeleteBehavior: + description: The deletion behavior when the crawler finds a deleted object. + returned: when state is present + type: str + sample: DELETE_FROM_DATABASE + UpdateBehavior: + description: The update behavior when the crawler finds a changed schema. + returned: when state is present + type: str + sample: UPDATE_IN_DATABASE + +table_prefix: + description: The table prefix used for catalog tables that are created. + returned: when state is present + type: str + sample: my_prefix +targets: + description: A list of targets to crawl. + returned: when state is present + type: complex + contains: + S3Targets: + description: List of S3 targets. + returned: when state is present + type: list + JdbcTargets: + description: List of JDBC targets. + returned: when state is present + type: list + MongoDBTargets: + description: List of Mongo DB targets. + returned: when state is present + type: list + DynamoDBTargets: + description: List of DynamoDB targets. + returned: when state is present + type: list + CatalogTargets: + description: List of catalog targets. + returned: when state is present + type: list +''' + +# Non-ansible imports +import copy +try: + import botocore +except ImportError: + pass # Handled by AnsibleAWSModule + +from ansible.module_utils.common.dict_transformations import camel_dict_to_snake_dict +from ansible.module_utils.common.dict_transformations import snake_dict_to_camel_dict + +from ansible_collections.amazon.aws.plugins.module_utils.core import AnsibleAWSModule +from ansible_collections.amazon.aws.plugins.module_utils.core import is_boto3_error_code +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import ansible_dict_to_boto3_tag_list +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import AWSRetry +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import boto3_tag_list_to_ansible_dict +from ansible_collections.amazon.aws.plugins.module_utils.ec2 import compare_aws_tags +from ansible_collections.amazon.aws.plugins.module_utils.iam import get_aws_account_info + + +def _get_glue_crawler(connection, module, glue_crawler_name): + ''' + Get an AWS Glue crawler based on name. If not found, return None. + ''' + try: + return connection.get_crawler(aws_retry=True, Name=glue_crawler_name)['Crawler'] + except is_boto3_error_code('EntityNotFoundException'): + return None + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: # pylint: disable=duplicate-except + module.fail_json_aws(e) + + +def _trim_targets(targets): + return [_trim_target(t) for t in targets] + + +def _trim_target(target): + """ + Some target types have optional parameters which AWS will fill in and return + To compare the desired targets and the current targets we need to ignore the defaults + """ + if not target: + return None + retval = target.copy() + if not retval.get('Exclusions', None): + retval.pop('Exclusions', None) + return retval + + +def _compare_glue_crawler_params(user_params, current_params): + ''' + Compare Glue crawler params. If there is a difference, return True immediately else return False + ''' + if 'DatabaseName' in user_params and user_params['DatabaseName'] != current_params['DatabaseName']: + return True + if 'Description' in user_params and user_params['Description'] != current_params['Description']: + return True + if 'RecrawlPolicy' in user_params and user_params['RecrawlPolicy'] != current_params['RecrawlPolicy']: + return True + if 'Role' in user_params and user_params['Role'] != current_params['Role']: + return True + if 'SchemaChangePolicy' in user_params and user_params['SchemaChangePolicy'] != current_params['SchemaChangePolicy']: + return True + if 'TablePrefix' in user_params and user_params['TablePrefix'] != current_params['TablePrefix']: + return True + if 'Targets' in user_params: + if 'S3Targets' in user_params['Targets']: + if _trim_targets(user_params['Targets']['S3Targets']) != _trim_targets(current_params['Targets']['S3Targets']): + return True + if 'JdbcTargets' in user_params['Targets'] and user_params['Targets']['JdbcTargets'] != current_params['Targets']['JdbcTargets']: + if _trim_targets(user_params['Targets']['JdbcTargets']) != _trim_targets(current_params['Targets']['JdbcTargets']): + return True + if 'MongoDBTargets' in user_params['Targets'] and user_params['Targets']['MongoDBTargets'] != current_params['Targets']['MongoDBTargets']: + return True + if 'DynamoDBTargets' in user_params['Targets'] and user_params['Targets']['DynamoDBTargets'] != current_params['Targets']['DynamoDBTargets']: + return True + if 'CatalogTargets' in user_params['Targets'] and user_params['Targets']['CatalogTargets'] != current_params['Targets']['CatalogTargets']: + return True + + return False + + +def ensure_tags(connection, module, glue_crawler): + changed = False + + if module.params.get('tags') is None: + return False + + account_id, partition = get_aws_account_info(module) + arn = 'arn:{0}:glue:{1}:{2}:crawler/{3}'.format(partition, module.region, account_id, module.params.get('name')) + + try: + existing_tags = connection.get_tags(aws_retry=True, ResourceArn=arn).get('Tags', {}) + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + if module.check_mode: + existing_tags = {} + else: + module.fail_json_aws(e, msg='Unable to get tags for Glue crawler %s' % module.params.get('name')) + + tags_to_add, tags_to_remove = compare_aws_tags(existing_tags, module.params.get('tags'), module.params.get('purge_tags')) + + if tags_to_remove: + changed = True + if not module.check_mode: + try: + connection.untag_resource(aws_retry=True, ResourceArn=arn, TagsToRemove=tags_to_remove) + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name')) + + if tags_to_add: + changed = True + if not module.check_mode: + try: + connection.tag_resource(aws_retry=True, ResourceArn=arn, TagsToAdd=tags_to_add) + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e, msg='Unable to set tags for Glue crawler %s' % module.params.get('name')) + + return changed + + +def create_or_update_glue_crawler(connection, module, glue_crawler): + ''' + Create or update an AWS Glue crawler + ''' + + changed = False + params = dict() + params['Name'] = module.params.get('name') + params['Role'] = module.params.get('role') + params['Targets'] = module.params.get('targets') + if module.params.get('database_name') is not None: + params['DatabaseName'] = module.params.get('database_name') + if module.params.get('description') is not None: + params['Description'] = module.params.get('description') + if module.params.get('recrawl_policy') is not None: + params['RecrawlPolicy'] = snake_dict_to_camel_dict(module.params.get('recrawl_policy'), capitalize_first=True) + if module.params.get('role') is not None: + params['Role'] = module.params.get('role') + if module.params.get('schema_change_policy') is not None: + params['SchemaChangePolicy'] = snake_dict_to_camel_dict(module.params.get('schema_change_policy'), capitalize_first=True) + if module.params.get('table_prefix') is not None: + params['TablePrefix'] = module.params.get('table_prefix') + if module.params.get('targets') is not None: + params['Targets'] = module.params.get('targets') + + if glue_crawler: + if _compare_glue_crawler_params(params, glue_crawler): + try: + if not module.check_mode: + connection.update_crawler(aws_retry=True, **params) + changed = True + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e) + else: + try: + if not module.check_mode: + connection.create_crawler(aws_retry=True, **params) + changed = True + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e) + + glue_crawler = _get_glue_crawler(connection, module, params['Name']) + + changed |= ensure_tags(connection, module, glue_crawler) + + module.exit_json(changed=changed, **camel_dict_to_snake_dict(glue_crawler or {}, ignore_list=['SchemaChangePolicy', 'RecrawlPolicy', 'Targets'])) + + +def delete_glue_crawler(connection, module, glue_crawler): + ''' + Delete an AWS Glue crawler + ''' + changed = False + + if glue_crawler: + try: + if not module.check_mode: + connection.delete_crawler(aws_retry=True, Name=glue_crawler['Name']) + changed = True + except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: + module.fail_json_aws(e) + + module.exit_json(changed=changed) + + +def main(): + + argument_spec = ( + dict( + database_name=dict(type='str'), + description=dict(type='str'), + name=dict(required=True, type='str'), + purge_tags=dict(type='bool', default=True), + recrawl_policy=dict(type='dict', options=dict( + recrawl_behavior=dict(type='str') + )), + role=dict(type='str'), + schema_change_policy=dict(type='dict', options=dict( + delete_behavior=dict(type='str'), + update_behavior=dict(type='str') + )), + state=dict(required=True, choices=['present', 'absent'], type='str'), + table_prefix=dict(type='str'), + tags=dict(type='dict', aliases=['resource_tags']), + targets=dict(type='dict') + ) + ) + + module = AnsibleAWSModule(argument_spec=argument_spec, + required_if=[ + ('state', 'present', ['role', 'targets']) + ], + supports_check_mode=True + ) + + connection = module.client('glue', retry_decorator=AWSRetry.jittered_backoff(retries=10)) + + state = module.params.get('state') + + glue_crawler = _get_glue_crawler(connection, module, module.params.get('name')) + + if state == 'present': + create_or_update_glue_crawler(connection, module, glue_crawler) + else: + delete_glue_crawler(connection, module, glue_crawler) + + +if __name__ == '__main__': + main() diff --git a/tests/integration/targets/aws_glue_crawler/aliases b/tests/integration/targets/aws_glue_crawler/aliases new file mode 100644 index 00000000000..4ef4b2067d0 --- /dev/null +++ b/tests/integration/targets/aws_glue_crawler/aliases @@ -0,0 +1 @@ +cloud/aws diff --git a/tests/integration/targets/aws_glue_crawler/defaults/main.yml b/tests/integration/targets/aws_glue_crawler/defaults/main.yml new file mode 100644 index 00000000000..e5d95891f3b --- /dev/null +++ b/tests/integration/targets/aws_glue_crawler/defaults/main.yml @@ -0,0 +1,11 @@ +--- +glue_crawler_name: "{{ resource_prefix }}" +glue_crawler_description: Test crawler +glue_crawler_s3_path: "s3://test-s3-bucket-glue/prefix/folder/" +# IAM role names have to be less than 64 characters +# The 8 digit identifier at the end of resource_prefix helps determine during +# which test something was created and allows tests to be run in parallel +# Shippable resource_prefixes are in the format shippable-123456-123, so in those cases +# we need both sets of digits to keep the resource name unique +unique_id: "{{ resource_prefix | regex_search('(\\d+-?)(\\d+)$') }}" +glue_crawler_role_name: "ansible-test-{{ unique_id }}-glue-crawler" diff --git a/tests/integration/targets/aws_glue_crawler/meta/main.yml b/tests/integration/targets/aws_glue_crawler/meta/main.yml new file mode 100644 index 00000000000..1810d4bec98 --- /dev/null +++ b/tests/integration/targets/aws_glue_crawler/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - setup_remote_tmp_dir diff --git a/tests/integration/targets/aws_glue_crawler/tasks/main.yml b/tests/integration/targets/aws_glue_crawler/tasks/main.yml new file mode 100644 index 00000000000..b96968195c3 --- /dev/null +++ b/tests/integration/targets/aws_glue_crawler/tasks/main.yml @@ -0,0 +1,339 @@ +--- +- name: aws_glue_crawler integration tests + collections: + - amazon.aws + module_defaults: + group/aws: + aws_access_key: "{{ aws_access_key }}" + aws_secret_key: "{{ aws_secret_key }}" + security_token: "{{ security_token | default(omit) }}" + region: "{{ aws_region }}" + block: + + - name: Create minimal Glue crawler role + iam_role: + name: "{{ glue_crawler_role_name }}" + assume_role_policy_document: + Version: "2008-10-17" + Statement: + - Action: "sts:AssumeRole" + Effect: Allow + Principal: + Service: glue.amazonaws.com + create_instance_profile: false + managed_policies: + - "arn:aws:iam::aws:policy/AWSXrayWriteOnlyAccess" + + - name: Pause for role creation to finish + pause: + seconds: 10 + + - name: Create Glue crawler (check mode) + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + check_mode: true + register: glue_crawler_check + + - name: Verity that Glue crawler was not created in check mode + assert: + that: + - glue_crawler_check.changed + - glue_crawler_check.description is not defined + + - name: Create Glue crawler + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + register: glue_crawler + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info: "{{ crawler_info_query.stdout | from_json }}" + + - name: Verity that Glue crawler was created + assert: + that: + - glue_crawler.changed + - glue_crawler.name == crawler_info["Crawler"]["Name"] + - glue_crawler.database_name == crawler_info["Crawler"]["DatabaseName"] + - glue_crawler.description == crawler_info["Crawler"]["Description"] + - glue_crawler.recrawl_policy == crawler_info["Crawler"]["RecrawlPolicy"] + - glue_crawler.role == crawler_info["Crawler"]["Role"] + - glue_crawler.schema_change_policy == crawler_info["Crawler"]["SchemaChangePolicy"] + - glue_crawler.targets.S3Targets == crawler_info["Crawler"]["Targets"]["S3Targets"] + + - name: Create Glue crawler (idempotent) (check mode) + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + check_mode: true + register: glue_crawler_idempotent_check + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query_idempotent_check + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info_idempotent_check: "{{ crawler_info_query_idempotent_check.stdout | from_json }}" + + - name: Verity that Glue crawler was not modified in check mode + assert: + that: + - glue_crawler_idempotent_check is not changed + - crawler_info["Crawler"]["Name"] == crawler_info_idempotent_check["Crawler"]["Name"] + - crawler_info["Crawler"]["DatabaseName"] == crawler_info_idempotent_check["Crawler"]["DatabaseName"] + - crawler_info["Crawler"]["Description"] == crawler_info_idempotent_check["Crawler"]["Description"] + - crawler_info["Crawler"]["RecrawlPolicy"] == crawler_info_idempotent_check["Crawler"]["RecrawlPolicy"] + - crawler_info["Crawler"]["Role"] == crawler_info_idempotent_check["Crawler"]["Role"] + - crawler_info["Crawler"]["SchemaChangePolicy"] == crawler_info_idempotent_check["Crawler"]["SchemaChangePolicy"] + - crawler_info["Crawler"]["Targets"]["S3Targets"] == crawler_info_idempotent_check["Crawler"]["Targets"]["S3Targets"] + + - name: Create Glue crawler (idempotent) + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + register: glue_crawler_idempotent + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query_idempotent + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info_idempotent: "{{ crawler_info_query_idempotent.stdout | from_json }}" + + - name: Verity that Glue crawler was not modified + assert: + that: + - glue_crawler_idempotent is not changed + - crawler_info["Crawler"]["Name"] == crawler_info_idempotent["Crawler"]["Name"] + - crawler_info["Crawler"]["DatabaseName"] == crawler_info_idempotent["Crawler"]["DatabaseName"] + - crawler_info["Crawler"]["Description"] == crawler_info_idempotent["Crawler"]["Description"] + - crawler_info["Crawler"]["RecrawlPolicy"] == crawler_info_idempotent["Crawler"]["RecrawlPolicy"] + - crawler_info["Crawler"]["Role"] == crawler_info_idempotent["Crawler"]["Role"] + - crawler_info["Crawler"]["SchemaChangePolicy"] == crawler_info_idempotent["Crawler"]["SchemaChangePolicy"] + - crawler_info["Crawler"]["Targets"]["S3Targets"] == crawler_info_idempotent["Crawler"]["Targets"]["S3Targets"] + + - name: Update Glue crawler (check mode) + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database_2 + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + check_mode: true + register: glue_crawler_update_check + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query_update_check + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info_update_check: "{{ crawler_info_query_update_check.stdout | from_json }}" + + - name: Verity that Glue crawler was not modified in check mode + assert: + that: + - glue_crawler_update_check is changed + - glue_crawler_update_check.name == crawler_info_update_check["Crawler"]["Name"] + - glue_crawler_update_check.database_name == crawler_info_update_check["Crawler"]["DatabaseName"] + - glue_crawler_update_check.description == crawler_info_update_check["Crawler"]["Description"] + - glue_crawler_update_check.recrawl_policy == crawler_info_update_check["Crawler"]["RecrawlPolicy"] + - glue_crawler_update_check.role == crawler_info_update_check["Crawler"]["Role"] + - glue_crawler_update_check.schema_change_policy == crawler_info_update_check["Crawler"]["SchemaChangePolicy"] + - glue_crawler_update_check.targets.S3Targets == crawler_info_update_check["Crawler"]["Targets"]["S3Targets"] + + - name: Update Glue crawler + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + database_name: my_database_2 + description: "{{ glue_crawler_description }}" + role: "{{ glue_crawler_role_name }}" + recrawl_policy: + recrawl_behavior: CRAWL_EVERYTHING + schema_change_policy: + delete_behavior: DELETE_FROM_DATABASE + update_behavior: UPDATE_IN_DATABASE + targets: + S3Targets: + - Path: "{{ glue_crawler_s3_path }}" + tags: + Environment: Test + Product: Glue + state: present + register: glue_crawler_update + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query_update + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info_update: "{{ crawler_info_query_update.stdout | from_json }}" + + - name: Verity that Glue crawler was modified + assert: + that: + - glue_crawler_update.changed + - glue_crawler_update.name == crawler_info_update["Crawler"]["Name"] + - glue_crawler_update.database_name == crawler_info_update["Crawler"]["DatabaseName"] + - glue_crawler_update.description == crawler_info_update["Crawler"]["Description"] + - glue_crawler_update.recrawl_policy == crawler_info_update["Crawler"]["RecrawlPolicy"] + - glue_crawler_update.role == crawler_info_update["Crawler"]["Role"] + - glue_crawler_update.schema_change_policy == crawler_info_update["Crawler"]["SchemaChangePolicy"] + - glue_crawler_update.targets.S3Targets == crawler_info_update["Crawler"]["Targets"]["S3Targets"] + + - name: Delete Glue crawler (check mode) + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + state: absent + check_mode: true + register: glue_crawler_delete_check + + - name: Get info on Glue crawler + command: "aws glue get-crawler --name {{ glue_crawler_name }}" + environment: + AWS_ACCESS_KEY_ID: "{{ aws_access_key }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_key }}" + AWS_SESSION_TOKEN: "{{ security_token | default('') }}" + AWS_DEFAULT_REGION: "{{ aws_region }}" + register: crawler_info_query_delete_check + no_log: True + + - name: Convert it to an object + set_fact: + crawler_info_delete_check: "{{ crawler_info_query_delete_check.stdout | from_json }}" + + - name: Verity that Glue crawler was not deleted in check mode + assert: + that: + - glue_crawler_delete_check.changed + - crawler_info["Crawler"]["Name"] == crawler_info_delete_check["Crawler"]["Name"] + + - name: Delete Glue crawler + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + state: absent + register: glue_crawler_delete + + - name: Verity that Glue crawler was deleted + assert: + that: + - glue_crawler_delete.changed + + always: + - name: Delete Glue crawler + aws_glue_crawler: + name: "{{ glue_crawler_name }}" + state: absent + ignore_errors: true + + - name: Delete Glue crawler role + iam_role: + name: "{{ glue_crawler_role_name }}" + state: absent + ignore_errors: true