From 498e6962ec767a366d92e505d3dbc9ae5ba13e1c Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Wed, 14 Jul 2021 15:28:12 +0300 Subject: [PATCH 1/8] sample size --- aws/resource_aws_glue_crawler.go | 22 +++++++-- aws/resource_aws_glue_crawler_test.go | 66 +++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/aws/resource_aws_glue_crawler.go b/aws/resource_aws_glue_crawler.go index e6b05fc32cb..32c9229b150 100644 --- a/aws/resource_aws_glue_crawler.go +++ b/aws/resource_aws_glue_crawler.go @@ -130,6 +130,11 @@ func resourceAwsGlueCrawler() *schema.Resource { Optional: true, Elem: &schema.Schema{Type: schema.TypeString}, }, + "sample_size": { + Type: schema.TypeInt, + Optional: true, + ValidateFunc: validation.IntBetween(1, 249), + }, }, }, }, @@ -521,13 +526,18 @@ func expandGlueS3Target(cfg map[string]interface{}) *glue.S3Target { Path: aws.String(cfg["path"].(string)), } - if connection, ok := cfg["connection_name"]; ok { - target.ConnectionName = aws.String(connection.(string)) + if v, ok := cfg["connection_name"]; ok { + target.ConnectionName = aws.String(v.(string)) } - if exclusions, ok := cfg["exclusions"]; ok { - target.Exclusions = expandStringList(exclusions.([]interface{})) + if v, ok := cfg["exclusions"]; ok { + target.Exclusions = expandStringList(v.([]interface{})) } + + if v, ok := cfg["sample_size"]; ok { + target.SampleSize = aws.Int64(int64(v.(int))) + } + return target } @@ -768,6 +778,10 @@ func flattenGlueS3Targets(s3Targets []*glue.S3Target) []map[string]interface{} { attrs["path"] = aws.StringValue(s3Target.Path) attrs["connection_name"] = aws.StringValue(s3Target.ConnectionName) + if s3Target.SampleSize != nil { + attrs["sample_size"] = aws.Int64Value(s3Target.SampleSize) + } + result = append(result, attrs) } return result diff --git a/aws/resource_aws_glue_crawler_test.go b/aws/resource_aws_glue_crawler_test.go index bc59b615e49..3e8a1b6a63d 100644 --- a/aws/resource_aws_glue_crawler_test.go +++ b/aws/resource_aws_glue_crawler_test.go @@ -38,10 +38,11 @@ func testSweepGlueCrawlers(region string) error { for _, crawler := range page.Crawlers { name := aws.StringValue(crawler.Name) - log.Printf("[INFO] Deleting Glue Crawler: %s", name) - _, err := conn.DeleteCrawler(&glue.DeleteCrawlerInput{ - Name: aws.String(name), - }) + r := resourceAwsGlueCrawler() + d := r.Data(nil) + d.SetId(name) + + err := r.Delete(d, client) if err != nil { log.Printf("[ERROR] Failed to delete Glue Crawler %s: %s", name, err) } @@ -629,6 +630,42 @@ func TestAccAWSGlueCrawler_S3Target_ConnectionName(t *testing.T) { }) } +func TestAccAWSGlueCrawler_S3Target_SampleSize(t *testing.T) { + var crawler glue.Crawler + rName := acctest.RandomWithPrefix("tf-acc-test") + resourceName := "aws_glue_crawler.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + ErrorCheck: testAccErrorCheck(t, glue.EndpointsID), + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSGlueCrawlerDestroy, + Steps: []resource.TestStep{ + { + Config: testAccGlueCrawlerConfig_S3TargetSampleSize(rName, 1), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "s3_target.#", "1"), + resource.TestCheckResourceAttr(resourceName, "s3_target.0.sample_size", "1"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + { + Config: testAccGlueCrawlerConfig_S3TargetSampleSize(rName, 2), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "s3_target.#", "1"), + resource.TestCheckResourceAttr(resourceName, "s3_target.0.sample_size", "2"), + ), + }, + }, + }) +} + func TestAccAWSGlueCrawler_S3Target_Exclusions(t *testing.T) { var crawler glue.Crawler rName := acctest.RandomWithPrefix("tf-acc-test") @@ -2550,3 +2587,24 @@ resource "aws_glue_crawler" "test" { } `, rName, policy) } + +func testAccGlueCrawlerConfig_S3TargetSampleSize(rName string, size int) string { + return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` +resource "aws_glue_catalog_database" "test" { + name = %[1]q +} + +resource "aws_glue_crawler" "test" { + depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] + + database_name = aws_glue_catalog_database.test.name + name = %[1]q + role = aws_iam_role.test.name + + s3_target { + sample_size = %[2]d + path = "s3://bucket1" + } +} +`, rName, size) +} From d928dfea5370782f248dd166833f8ef61fa98b1f Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 09:49:06 +0300 Subject: [PATCH 2/8] test --- aws/resource_aws_glue_crawler.go | 47 ++++++++++---------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/aws/resource_aws_glue_crawler.go b/aws/resource_aws_glue_crawler.go index 32c9229b150..b780131b7ee 100644 --- a/aws/resource_aws_glue_crawler.go +++ b/aws/resource_aws_glue_crawler.go @@ -79,15 +79,10 @@ func resourceAwsGlueCrawler() *schema.Resource { Elem: &schema.Schema{Type: schema.TypeString}, }, "schema_change_policy": { - Type: schema.TypeList, - Optional: true, - DiffSuppressFunc: func(k, old, new string, d *schema.ResourceData) bool { - if old == "1" && new == "0" { - return true - } - return false - }, - MaxItems: 1, + Type: schema.TypeList, + Optional: true, + DiffSuppressFunc: suppressMissingOptionalConfigurationBlock, + MaxItems: 1, Elem: &schema.Resource{ Schema: map[string]*schema.Schema{ "delete_behavior": { @@ -238,15 +233,10 @@ func resourceAwsGlueCrawler() *schema.Resource { ValidateFunc: validation.StringIsJSON, }, "lineage_configuration": { - Type: schema.TypeList, - Optional: true, - MaxItems: 1, - DiffSuppressFunc: func(k, old, new string, d *schema.ResourceData) bool { - if old == "1" && new == "0" { - return true - } - return false - }, + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + DiffSuppressFunc: suppressMissingOptionalConfigurationBlock, Elem: &schema.Resource{ Schema: map[string]*schema.Schema{ "crawler_lineage_settings": { @@ -259,15 +249,10 @@ func resourceAwsGlueCrawler() *schema.Resource { }, }, "recrawl_policy": { - Type: schema.TypeList, - Optional: true, - MaxItems: 1, - DiffSuppressFunc: func(k, old, new string, d *schema.ResourceData) bool { - if old == "1" && new == "0" { - return true - } - return false - }, + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + DiffSuppressFunc: suppressMissingOptionalConfigurationBlock, Elem: &schema.Resource{ Schema: map[string]*schema.Schema{ "recrawl_behavior": { @@ -408,16 +393,14 @@ func updateCrawlerInput(d *schema.ResourceData, crawlerName string) (*glue.Updat crawlerInput.TablePrefix = aws.String(d.Get("table_prefix").(string)) - if configuration, ok := d.GetOk("configuration"); ok { - crawlerInput.Configuration = aws.String(configuration.(string)) - } - if v, ok := d.GetOk("configuration"); ok { configuration, err := structure.NormalizeJsonString(v) if err != nil { return nil, fmt.Errorf("Configuration contains an invalid JSON: %v", err) } crawlerInput.Configuration = aws.String(configuration) + } else { + crawlerInput.Configuration = aws.String("") } if securityConfiguration, ok := d.GetOk("security_configuration"); ok { @@ -534,7 +517,7 @@ func expandGlueS3Target(cfg map[string]interface{}) *glue.S3Target { target.Exclusions = expandStringList(v.([]interface{})) } - if v, ok := cfg["sample_size"]; ok { + if v, ok := cfg["sample_size"]; ok && v.(int) > 0 { target.SampleSize = aws.Int64(int64(v.(int))) } From b0616ab51bd9ba9ff3a532ac5ebaff5129d82d0f Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 21:07:38 +0300 Subject: [PATCH 3/8] docs --- website/docs/r/glue_crawler.html.markdown | 1 + 1 file changed, 1 insertion(+) diff --git a/website/docs/r/glue_crawler.html.markdown b/website/docs/r/glue_crawler.html.markdown index 658cdd8a727..cca557c0905 100644 --- a/website/docs/r/glue_crawler.html.markdown +++ b/website/docs/r/glue_crawler.html.markdown @@ -168,6 +168,7 @@ The following arguments are supported: * `path` - (Required) The path to the Amazon S3 target. * `connection_name` - (Optional) The name of a connection which allows crawler to access data in S3 within a VPC. * `exclusions` - (Optional) A list of glob patterns used to exclude from the crawl. +* `sample_size` - (Optional) Sets the number of files in each leaf folder to be crawled when crawling ample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249. ### Catalog Target From 004c539f2864b208b62548c098f89dba656c438c Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 21:08:21 +0300 Subject: [PATCH 4/8] changelog --- .changelog/20203.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/20203.txt diff --git a/.changelog/20203.txt b/.changelog/20203.txt new file mode 100644 index 00000000000..2875cfb524e --- /dev/null +++ b/.changelog/20203.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_glue_crawler: Add `sample_size` argument in `s3_target` block. +``` \ No newline at end of file From 4489d94c051c39f8e6d94d18708f0219d1fcf04a Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 21:19:50 +0300 Subject: [PATCH 5/8] config --- aws/resource_aws_glue_crawler_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/aws/resource_aws_glue_crawler_test.go b/aws/resource_aws_glue_crawler_test.go index 3e8a1b6a63d..e640d1f11ea 100644 --- a/aws/resource_aws_glue_crawler_test.go +++ b/aws/resource_aws_glue_crawler_test.go @@ -990,6 +990,13 @@ func TestAccAWSGlueCrawler_Configuration(t *testing.T) { ImportState: true, ImportStateVerify: true, }, + { + Config: testAccGlueCrawlerConfig_Configuration(rName, ""), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "configuration", ""), + ), + }, }, }) } @@ -2598,8 +2605,8 @@ resource "aws_glue_crawler" "test" { depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] database_name = aws_glue_catalog_database.test.name - name = %[1]q - role = aws_iam_role.test.name + name = %[1]q + role = aws_iam_role.test.name s3_target { sample_size = %[2]d From 6907e07fb7234aea2afd026f072279d009cea3ec Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 21:23:27 +0300 Subject: [PATCH 6/8] test fmt --- aws/resource_aws_glue_crawler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/resource_aws_glue_crawler_test.go b/aws/resource_aws_glue_crawler_test.go index e640d1f11ea..a131dc751af 100644 --- a/aws/resource_aws_glue_crawler_test.go +++ b/aws/resource_aws_glue_crawler_test.go @@ -2605,7 +2605,7 @@ resource "aws_glue_crawler" "test" { depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] database_name = aws_glue_catalog_database.test.name - name = %[1]q + name = %[1]q role = aws_iam_role.test.name s3_target { From 050ca5d0508f5ce1ab77476c58ce838813e8f040 Mon Sep 17 00:00:00 2001 From: drfaust92 Date: Thu, 15 Jul 2021 21:26:48 +0300 Subject: [PATCH 7/8] test fmt --- aws/resource_aws_glue_crawler_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws/resource_aws_glue_crawler_test.go b/aws/resource_aws_glue_crawler_test.go index a131dc751af..7cd5987beaf 100644 --- a/aws/resource_aws_glue_crawler_test.go +++ b/aws/resource_aws_glue_crawler_test.go @@ -2605,8 +2605,8 @@ resource "aws_glue_crawler" "test" { depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] database_name = aws_glue_catalog_database.test.name - name = %[1]q - role = aws_iam_role.test.name + name = %[1]q + role = aws_iam_role.test.name s3_target { sample_size = %[2]d From 45ac5294a6c1b9b781de568f0affb8a8ed90b08f Mon Sep 17 00:00:00 2001 From: Ilia Lazebnik Date: Mon, 19 Jul 2021 15:42:14 +0300 Subject: [PATCH 8/8] Update website/docs/r/glue_crawler.html.markdown Co-authored-by: Kit Ewbank --- website/docs/r/glue_crawler.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/r/glue_crawler.html.markdown b/website/docs/r/glue_crawler.html.markdown index cca557c0905..89c47f9413f 100644 --- a/website/docs/r/glue_crawler.html.markdown +++ b/website/docs/r/glue_crawler.html.markdown @@ -168,7 +168,7 @@ The following arguments are supported: * `path` - (Required) The path to the Amazon S3 target. * `connection_name` - (Optional) The name of a connection which allows crawler to access data in S3 within a VPC. * `exclusions` - (Optional) A list of glob patterns used to exclude from the crawl. -* `sample_size` - (Optional) Sets the number of files in each leaf folder to be crawled when crawling ample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249. +* `sample_size` - (Optional) Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249. ### Catalog Target