From dc98682e7e5817eb726cdf0a4452c4e89de1b64d Mon Sep 17 00:00:00 2001 From: Alper Rifat Ulucinar Date: Thu, 9 Mar 2023 21:29:48 +0300 Subject: [PATCH] [scraper]: Add support for deducing resource name from file path if scraper cannot extract it from the markdown file. Signed-off-by: Alper Rifat Ulucinar --- cmd/scraper/main.go | 2 ++ pkg/registry/meta.go | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cmd/scraper/main.go b/cmd/scraper/main.go index 5c33bd68..0ec2af2b 100644 --- a/cmd/scraper/main.go +++ b/cmd/scraper/main.go @@ -18,6 +18,7 @@ func main() { app = kingpin.New(filepath.Base(os.Args[0]), "Terraform Registry provider metadata scraper.").DefaultEnvars() outFile = app.Flag("out", "Provider metadata output file path").Short('o').Default("provider-metadata.yaml").OpenFile(os.O_CREATE, 0644) providerName = app.Flag("name", "Provider name").Short('n').Required().String() + resourcePrefix = app.Flag("resource-prefix", `Terraform resource name prefix for the Terraform provider. For example, this is "google" for the google Terraform provider.`).String() codeXPath = app.Flag("code-xpath", "Code XPath expression").Default(`//code[@class="language-terraform" or @class="language-hcl"]/text()`).String() preludeXPath = app.Flag("prelude-xpath", "Prelude XPath expression").Default(`//text()[contains(., "description") and contains(., "page_title")]`).String() fieldXPath = app.Flag("field-xpath", "Field documentation XPath expression").Default(`//ul/li//code[1]/text()`).String() @@ -37,6 +38,7 @@ func main() { FieldDocXPath: *fieldXPath, ImportXPath: *importXPath, FileExtensions: *fileExtensions, + ResourcePrefix: *resourcePrefix, }), "Failed to scrape Terraform provider metadata") kingpin.FatalIfError(pm.Store((*outFile).Name()), "Failed to store Terraform provider metadata to file: %s", (*outFile).Name()) } diff --git a/pkg/registry/meta.go b/pkg/registry/meta.go index 56389121..0be317ed 100644 --- a/pkg/registry/meta.go +++ b/pkg/registry/meta.go @@ -59,7 +59,19 @@ func (r *Resource) addExampleManifest(file *hcl.File, body *hclsyntax.Block) err return nil } -func (r *Resource) scrapeExamples(doc *html.Node, codeElXPath string, debug bool) error { // nolint: gocyclo +func getResourceNameFromPath(path, resourcePrefix string) string { + tokens := strings.Split(filepath.Base(path), ".") + if len(tokens) < 2 { + return "" + } + prefix := "" + if len(resourcePrefix) != 0 { + prefix = resourcePrefix + "_" + } + return fmt.Sprintf("%s%s", prefix, tokens[0]) +} + +func (r *Resource) scrapeExamples(doc *html.Node, codeElXPath string, path string, resourcePrefix string, debug bool) error { // nolint: gocyclo resourceName := r.Title nodes := htmlquery.Find(doc, codeElXPath) for _, n := range nodes { @@ -83,6 +95,9 @@ func (r *Resource) scrapeExamples(doc *html.Node, codeElXPath string, debug bool } body.Blocks = trimmed // first try an exact match to find the example + if len(resourceName) == 0 { + resourceName = getResourceNameFromPath(path, resourcePrefix) + } if err := r.findExampleBlock(f, body.Blocks, &resourceName, true); err != nil { return err } @@ -441,7 +456,7 @@ func (r *Resource) scrape(path string, config *ScrapeConfiguration) error { r.scrapeFieldDocs(doc, config.FieldDocXPath) r.scrapeImportStatements(doc, config.ImportXPath) - return r.scrapeExamples(doc, config.CodeXPath, config.Debug) + return r.scrapeExamples(doc, config.CodeXPath, path, config.ResourcePrefix, config.Debug) } // ScrapeConfiguration is a configurator for the scraper @@ -460,6 +475,8 @@ type ScrapeConfiguration struct { ImportXPath string // FileExtensions extensions of the files to be scraped FileExtensions []string + // ResourcePrefix Terraform resource name prefix for the Terraform provider + ResourcePrefix string } func (sc *ScrapeConfiguration) hasExpectedExtension(fileName string) bool {