From 938a8457fa083e64e22253b6c338d644d365f848 Mon Sep 17 00:00:00 2001 From: Dominik Lekse Date: Sun, 24 Oct 2021 16:17:13 +0200 Subject: [PATCH 1/3] r/aws_glue_crawler Add support for S3 event notifications --- internal/service/glue/crawler.go | 31 ++++ internal/service/glue/crawler_test.go | 205 ++++++++++++++++++++++ website/docs/r/glue_crawler.html.markdown | 4 +- 3 files changed, 239 insertions(+), 1 deletion(-) diff --git a/internal/service/glue/crawler.go b/internal/service/glue/crawler.go index 06561756516..85d77a0ac9e 100644 --- a/internal/service/glue/crawler.go +++ b/internal/service/glue/crawler.go @@ -133,6 +133,16 @@ func ResourceCrawler() *schema.Resource { Optional: true, ValidateFunc: validation.IntBetween(1, 249), }, + "event_queue_arn": { + Type: schema.TypeString, + Optional: true, + ValidateFunc: verify.ValidARN, + }, + "dlq_event_queue_arn": { + Type: schema.TypeString, + Optional: true, + ValidateFunc: verify.ValidARN, + }, }, }, }, @@ -304,6 +314,11 @@ func resourceCrawlerCreate(d *schema.ResourceData, meta interface{}) error { return resource.RetryableError(err) } + // InvalidInputException: SQS queue arn:aws:sqs:us-west-2:*******:tf-acc-test-4317277351691904203 does not exist or the role provided does not have access to it. + if tfawserr.ErrMessageContains(err, glue.ErrCodeInvalidInputException, "SQS queue") && tfawserr.ErrMessageContains(err, glue.ErrCodeInvalidInputException, "does not exist or the role provided does not have access to it") { + return resource.RetryableError(err) + } + return resource.NonRetryableError(err) } return nil @@ -524,6 +539,14 @@ func expandGlueS3Target(cfg map[string]interface{}) *glue.S3Target { target.SampleSize = aws.Int64(int64(v.(int))) } + if v, ok := cfg["event_queue_arn"]; ok { + target.EventQueueArn = aws.String(v.(string)) + } + + if v, ok := cfg["dlq_event_queue_arn"]; ok { + target.DlqEventQueueArn = aws.String(v.(string)) + } + return target } @@ -625,6 +648,11 @@ func resourceCrawlerUpdate(d *schema.ResourceData, meta interface{}) error { return resource.RetryableError(err) } + // InvalidInputException: SQS queue arn:aws:sqs:us-west-2:*******:tf-acc-test-4317277351691904203 does not exist or the role provided does not have access to it. + if tfawserr.ErrMessageContains(err, glue.ErrCodeInvalidInputException, "SQS queue") && tfawserr.ErrMessageContains(err, glue.ErrCodeInvalidInputException, "does not exist or the role provided does not have access to it") { + return resource.RetryableError(err) + } + return resource.NonRetryableError(err) } return nil @@ -768,6 +796,9 @@ func flattenGlueS3Targets(s3Targets []*glue.S3Target) []map[string]interface{} { attrs["sample_size"] = aws.Int64Value(s3Target.SampleSize) } + attrs["event_queue_arn"] = aws.StringValue(s3Target.EventQueueArn) + attrs["dlq_event_queue_arn"] = aws.StringValue(s3Target.DlqEventQueueArn) + result = append(result, attrs) } return result diff --git a/internal/service/glue/crawler_test.go b/internal/service/glue/crawler_test.go index 24b7975e75c..90ef841121c 100644 --- a/internal/service/glue/crawler_test.go +++ b/internal/service/glue/crawler_test.go @@ -678,6 +678,67 @@ func TestAccGlueCrawler_S3Target_exclusions(t *testing.T) { }) } +func TestAccGlueCrawler_S3Target_eventqueue(t *testing.T) { + var crawler glue.Crawler + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_glue_crawler.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ErrorCheck: acctest.ErrorCheck(t, glue.EndpointsID), + Providers: acctest.Providers, + CheckDestroy: testAccCheckCrawlerDestroy, + Steps: []resource.TestStep{ + { + Config: testAccGlueCrawlerConfig_S3Target_EventQueue(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckCrawlerExists(resourceName, &crawler), + acctest.CheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)), + resource.TestCheckResourceAttr(resourceName, "s3_target.#", "1"), + acctest.CheckResourceAttrRegionalARN(resourceName, "s3_target.0.event_queue_arn", "sqs", rName), + resource.TestCheckResourceAttr(resourceName, "recrawl_policy.0.recrawl_behavior", "CRAWL_EVENT_MODE"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccGlueCrawler_S3Target_dlqeventqueue(t *testing.T) { + var crawler glue.Crawler + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_glue_crawler.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(t) }, + ErrorCheck: acctest.ErrorCheck(t, glue.EndpointsID), + Providers: acctest.Providers, + CheckDestroy: testAccCheckCrawlerDestroy, + Steps: []resource.TestStep{ + { + Config: testAccGlueCrawlerConfig_S3Target_DlqEventQueue(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckCrawlerExists(resourceName, &crawler), + acctest.CheckResourceAttrRegionalARN(resourceName, "arn", "glue", fmt.Sprintf("crawler/%s", rName)), + resource.TestCheckResourceAttr(resourceName, "s3_target.#", "1"), + acctest.CheckResourceAttrRegionalARN(resourceName, "s3_target.0.event_queue_arn", "sqs", rName), + acctest.CheckResourceAttrRegionalARN(resourceName, "s3_target.0.dlq_event_queue_arn", "sqs", fmt.Sprintf("%sdlq", rName)), + resource.TestCheckResourceAttr(resourceName, "recrawl_policy.0.recrawl_behavior", "CRAWL_EVENT_MODE"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func TestAccGlueCrawler_S3Target_multiple(t *testing.T) { var crawler glue.Crawler rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) @@ -2099,6 +2160,150 @@ resource "aws_glue_crawler" "test" { `, rName, exclusion1, exclusion2) } +func testAccGlueCrawlerConfig_S3Target_EventQueue(rName string) string { + return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` +resource "aws_glue_catalog_database" "test" { + name = %[1]q +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_sqs_queue" "test" { + name = %[1]q + + visibility_timeout_seconds = 3600 +} + +resource "aws_iam_role_policy" "test_sqs" { + role = aws_iam_role.test.name + + policy = data.aws_iam_policy_document.role_test_sqs.json +} + +data "aws_iam_policy_document" "role_test_sqs" { + statement { + effect = "Allow" + + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueUrl", + "sqs:ListDeadLetterSourceQueues", + "sqs:DeleteMessageBatch", + "sqs:ReceiveMessage", + "sqs:GetQueueAttributes", + "sqs:ListQueueTags", + "sqs:SetQueueAttributes", + "sqs:PurgeQueue", + ] + + resources = [ + aws_sqs_queue.test.arn, + ] + } +} + +resource "aws_glue_crawler" "test" { + depends_on = [ + aws_iam_role_policy_attachment.test-AWSGlueServiceRole, + aws_iam_role_policy.test_sqs, + ] + + database_name = aws_glue_catalog_database.test.name + name = %[1]q + role = aws_iam_role.test.name + + s3_target { + path = "s3://${aws_s3_bucket.test.bucket}" + + event_queue_arn = aws_sqs_queue.test.arn + } + + recrawl_policy { + recrawl_behavior = "CRAWL_EVENT_MODE" + } +} +`, rName) +} + +func testAccGlueCrawlerConfig_S3Target_DlqEventQueue(rName string) string { + return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` +resource "aws_glue_catalog_database" "test" { + name = %[1]q +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_sqs_queue" "test" { + name = %[1]q + + visibility_timeout_seconds = 3600 +} + +resource "aws_sqs_queue" "test_dlq" { + name = "%[1]sdlq" + + visibility_timeout_seconds = 3600 +} + +resource "aws_iam_role_policy" "test_sqs" { + role = aws_iam_role.test.name + + policy = data.aws_iam_policy_document.role_test_sqs.json +} + +data "aws_iam_policy_document" "role_test_sqs" { + statement { + effect = "Allow" + + actions = [ + "sqs:DeleteMessage", + "sqs:GetQueueUrl", + "sqs:ListDeadLetterSourceQueues", + "sqs:DeleteMessageBatch", + "sqs:ReceiveMessage", + "sqs:GetQueueAttributes", + "sqs:ListQueueTags", + "sqs:SetQueueAttributes", + "sqs:PurgeQueue", + ] + + resources = [ + aws_sqs_queue.test_dlq.arn, + aws_sqs_queue.test.arn, + ] + } +} + +resource "aws_glue_crawler" "test" { + depends_on = [ + aws_iam_role_policy_attachment.test-AWSGlueServiceRole, + aws_iam_role_policy.test_sqs, + ] + + database_name = aws_glue_catalog_database.test.name + name = %[1]q + role = aws_iam_role.test.name + + s3_target { + path = "s3://${aws_s3_bucket.test.bucket}" + + event_queue_arn = aws_sqs_queue.test.arn + dlq_event_queue_arn = aws_sqs_queue.test_dlq.arn + } + + recrawl_policy { + recrawl_behavior = "CRAWL_EVENT_MODE" + } +} +`, rName) +} + func testAccGlueCrawlerConfig_S3Target_Multiple(rName, path1, path2 string) string { return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` resource "aws_glue_catalog_database" "test" { diff --git a/website/docs/r/glue_crawler.html.markdown b/website/docs/r/glue_crawler.html.markdown index 6a8e1f67d4a..dbbf41c92c3 100644 --- a/website/docs/r/glue_crawler.html.markdown +++ b/website/docs/r/glue_crawler.html.markdown @@ -168,7 +168,9 @@ The following arguments are supported: * `path` - (Required) The path to the Amazon S3 target. * `connection_name` - (Optional) The name of a connection which allows crawler to access data in S3 within a VPC. * `exclusions` - (Optional) A list of glob patterns used to exclude from the crawl. -* `sample_size` - (Optional) Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249. +* `sample_size` - (Optional) Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249. +* `event_queue_arn` - (Optional) The ARN of the SQS queue to receive S3 notifications from. +* `dlq_event_queue_arn` - (Optional) The ARN of the dead-letter SQS queue. ### Catalog Target From 925d8fa3fb4be90a7b1965073b5e144e4b9b2ed2 Mon Sep 17 00:00:00 2001 From: Dominik Lekse Date: Sun, 24 Oct 2021 16:24:41 +0200 Subject: [PATCH 2/3] Add changelog for pull request #21467 --- .changelog/21467.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/21467.txt diff --git a/.changelog/21467.txt b/.changelog/21467.txt new file mode 100644 index 00000000000..61125380167 --- /dev/null +++ b/.changelog/21467.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_glue_crawler: Add support for S3 event notifications +``` \ No newline at end of file From 8d89f6754ce6a56b7474748f1a563e97dd8e1659 Mon Sep 17 00:00:00 2001 From: Kit Ewbank Date: Mon, 25 Oct 2021 15:51:03 -0400 Subject: [PATCH 3/3] Tweak CHANGELOG entry. --- .changelog/21467.txt | 2 +- internal/service/glue/crawler.go | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.changelog/21467.txt b/.changelog/21467.txt index 61125380167..e33b8062db2 100644 --- a/.changelog/21467.txt +++ b/.changelog/21467.txt @@ -1,3 +1,3 @@ ```release-note:enhancement -resource/aws_glue_crawler: Add support for S3 event notifications +resource/aws_glue_crawler: Add `dlq_event_queue_arn` and `event_queue_arn` arguments to the `s3_target` configuration block ``` \ No newline at end of file diff --git a/internal/service/glue/crawler.go b/internal/service/glue/crawler.go index 85d77a0ac9e..798234080ae 100644 --- a/internal/service/glue/crawler.go +++ b/internal/service/glue/crawler.go @@ -119,30 +119,30 @@ func ResourceCrawler() *schema.Resource { Type: schema.TypeString, Optional: true, }, - "path": { - Type: schema.TypeString, - Required: true, + "dlq_event_queue_arn": { + Type: schema.TypeString, + Optional: true, + ValidateFunc: verify.ValidARN, + }, + "event_queue_arn": { + Type: schema.TypeString, + Optional: true, + ValidateFunc: verify.ValidARN, }, "exclusions": { Type: schema.TypeList, Optional: true, Elem: &schema.Schema{Type: schema.TypeString}, }, + "path": { + Type: schema.TypeString, + Required: true, + }, "sample_size": { Type: schema.TypeInt, Optional: true, ValidateFunc: validation.IntBetween(1, 249), }, - "event_queue_arn": { - Type: schema.TypeString, - Optional: true, - ValidateFunc: verify.ValidARN, - }, - "dlq_event_queue_arn": { - Type: schema.TypeString, - Optional: true, - ValidateFunc: verify.ValidARN, - }, }, }, },