diff --git a/cmd/app/exec.go b/cmd/app/exec.go index 0dff6109..2af4afa1 100644 --- a/cmd/app/exec.go +++ b/cmd/app/exec.go @@ -67,10 +67,7 @@ func exec(ctx context.Context) error { if err != nil { return err } - if !config.ValidateLinks { - v = nil - } - docProcessor, docTasks, err := document.New(config.DocumentWorkersCount, config.FailFast, reactorWG, documentNodes, config.ResourcesPath, dScheduler, v, rhRegistry, config.Hugo, config.Writer) + docProcessor, docTasks, err := document.New(config.DocumentWorkersCount, config.FailFast, reactorWG, documentNodes, config.ResourcesPath, dScheduler, v, rhRegistry, config.Hugo, config.Writer, config.SkipLinkValidation) if err != nil { return err } diff --git a/cmd/app/flags.go b/cmd/app/flags.go index 95ede279..326963f2 100644 --- a/cmd/app/flags.go +++ b/cmd/app/flags.go @@ -76,9 +76,9 @@ func configureFlags(command *cobra.Command) { "Supported content format extensions (exampel: .md)") _ = vip.BindPFlag("extracted-files-formats", command.Flags().Lookup("extracted-files-formats")) - command.Flags().Bool("validate-links", true, - "Links should be validated") - _ = vip.BindPFlag("validate-links", command.Flags().Lookup("validate-links")) + command.Flags().Bool("skip-link-validation", false, + "Links validation will be skipped") + _ = vip.BindPFlag("skip-link-validation", command.Flags().Lookup("skip-link-validation")) command.Flags().StringSlice("hosts-to-report", []string{}, "When a link has a host from the given array it will get reported") diff --git a/cmd/app/types.go b/cmd/app/types.go index afa20862..cd37c05d 100644 --- a/cmd/app/types.go +++ b/cmd/app/types.go @@ -24,8 +24,8 @@ type Options struct { DryRun bool `mapstructure:"dry-run"` Resolve bool `mapstructure:"resolve"` ExtractedFilesFormats []string `mapstructure:"extracted-files-formats"` - ValidateLinks bool `mapstructure:"validate-links"` HostsToReport []string `mapstructure:"hosts-to-report"` + SkipLinkValidation bool `mapstructure:"skip-link-validation"` } // Writers struct that collects all the writesr diff --git a/pkg/manifest/manifest.go b/pkg/manifest/manifest.go index 70c793c1..cf99058e 100644 --- a/pkg/manifest/manifest.go +++ b/pkg/manifest/manifest.go @@ -348,7 +348,9 @@ func propagateFrontmatter(node *Node, parent *Node, manifest *Node, _ registry.I if parent != nil { newFM := map[string]interface{}{} for k, v := range parent.Frontmatter { - newFM[k] = v + if k != "aliases" { + newFM[k] = v + } } for k, v := range node.Frontmatter { newFM[k] = v @@ -358,41 +360,52 @@ func propagateFrontmatter(node *Node, parent *Node, manifest *Node, _ registry.I return nil } +func propagateSkipValidation(node *Node, parent *Node, manifest *Node, _ registry.Interface) error { + if parent != nil && parent.SkipValidation { + node.SkipValidation = parent.SkipValidation + } + return nil +} + func setParent(node *Node, parent *Node, _ *Node, _ registry.Interface) error { node.parent = parent return nil } -// func calculateAliases(node *Node, parent *Node, _ *Node, _ registry.Interface) error { -// var ( -// nodeAliases []interface{} -// childAliases []interface{} -// formatted bool -// ) -// if nodeAliases, formatted = node.Frontmatter["aliases"].([]interface{}); node.Frontmatter != nil && node.Frontmatter["aliases"] != nil && !formatted { -// return fmt.Errorf("node X \n\n%s\n has invalid alias format", node) -// } -// for _, nodeAlias := range nodeAliases { -// for _, child := range node.Structure { -// if child.Frontmatter == nil { -// child.Frontmatter = map[string]interface{}{} -// } -// if child.Frontmatter["aliases"] == nil { -// child.Frontmatter["aliases"] = []interface{}{} -// } -// if childAliases, formatted = child.Frontmatter["aliases"].([]interface{}); !formatted { -// return fmt.Errorf("node \n\n%s\n has invalid alias format", child) -// } -// finalAlias := strings.TrimSuffix(child.Name(), ".md") + "/" -// if child.Name() == "_index.md" { -// finalAlias = "" -// } -// childAliases = append(childAliases, fmt.Sprintf("%s", nodeAlias)+"/"+finalAlias) -// child.Frontmatter["aliases"] = childAliases -// } -// } -// return nil -// } +func calculateAliases(node *Node, parent *Node, _ *Node, _ registry.Interface) error { + var ( + nodeAliases []interface{} + childAliases []interface{} + formatted bool + ) + if nodeAliases, formatted = node.Frontmatter["aliases"].([]interface{}); node.Frontmatter != nil && node.Frontmatter["aliases"] != nil && !formatted { + return fmt.Errorf("node X \n\n%s\n has invalid alias format", node) + } + for _, nodeAliasI := range nodeAliases { + for _, child := range node.Structure { + if child.Frontmatter == nil { + child.Frontmatter = map[string]interface{}{} + } + if child.Frontmatter["aliases"] == nil { + child.Frontmatter["aliases"] = []interface{}{} + } + if childAliases, formatted = child.Frontmatter["aliases"].([]interface{}); !formatted { + return fmt.Errorf("node \n\n%s\n has invalid alias format", child) + } + childAliasSuffix := strings.TrimSuffix(child.Name(), ".md") + if child.Name() == "_index.md" { + childAliasSuffix = "" + } + nodeAlias := fmt.Sprintf("%s", nodeAliasI) + if !strings.HasPrefix(nodeAlias, "/") { + return fmt.Errorf("there is a node with name %s that has an relative alias %s", node.Name(), nodeAlias) + } + childAliases = append(childAliases, path.Join(nodeAlias, childAliasSuffix)+"/") + child.Frontmatter["aliases"] = childAliases + } + } + return nil +} // ResolveManifest collects files in FileCollector from a given url and resourcehandlers.FileSource func ResolveManifest(url string, r registry.Interface) ([]*Node, error) { @@ -417,6 +430,8 @@ func ResolveManifest(url string, r registry.Interface) ([]*Node, error) { calculatePath, setParent, propagateFrontmatter, + propagateSkipValidation, + calculateAliases, ) if err != nil { return nil, err diff --git a/pkg/manifest/manifest_test.go b/pkg/manifest/manifest_test.go index c7d16863..2288485f 100644 --- a/pkg/manifest/manifest_test.go +++ b/pkg/manifest/manifest_test.go @@ -66,6 +66,7 @@ var _ = Describe("Manifest test", func() { Entry("covering directory merges", "merging"), Entry("covering manifest use cases", "manifest"), Entry("covering multisource", "multisource"), + Entry("covering aliases", "aliases"), ) DescribeTable("Errors", diff --git a/pkg/manifest/node.go b/pkg/manifest/node.go index bc3b2624..2279be05 100755 --- a/pkg/manifest/node.go +++ b/pkg/manifest/node.go @@ -22,7 +22,7 @@ type Node struct { FilesTreeType `yaml:",inline"` // Properties of the node - Properties map[string]interface{} `yaml:"properties,omitempty"` + SkipValidation bool `yaml:"skipValidation,omitempty"` // Frontmatter of the node Frontmatter map[string]interface{} `yaml:"frontmatter,omitempty"` // Type of node diff --git a/pkg/manifest/tests/contents/foo/bar.md b/pkg/manifest/tests/contents/docs/architecture/_index.md similarity index 100% rename from pkg/manifest/tests/contents/foo/bar.md rename to pkg/manifest/tests/contents/docs/architecture/_index.md diff --git a/pkg/manifest/tests/contents/docs/architecture/concept.md b/pkg/manifest/tests/contents/docs/architecture/concept.md new file mode 100644 index 00000000..e69de29b diff --git a/pkg/manifest/tests/manifests/aliases.yaml b/pkg/manifest/tests/manifests/aliases.yaml new file mode 100644 index 00000000..1b8c2470 --- /dev/null +++ b/pkg/manifest/tests/manifests/aliases.yaml @@ -0,0 +1,29 @@ +structure: +- dir: root + structure: + - dir: topic + structure: + - dir: new_section_1 + frontmatter: + aliases: + - "/root2/rebase" + - "/root3/rebase_slash/" + structure: + - file: /contents/README.md + - dir: subsection + structure: + - file: /contents/website/blog/2024/_index.md + - file: /contents/blogs/2024/foo.md + frontmatter: + aliases: + - "/root4/alias/" + - file: /contents/blogs/2024/two.md + frontmatter: + aliases: + - "/root4/normal/alias/" + - dir: architecture + frontmatter: + aliases: + - "/root" + structure: + - fileTree: /contents/docs \ No newline at end of file diff --git a/pkg/manifest/tests/results/aliases.yaml b/pkg/manifest/tests/results/aliases.yaml new file mode 100644 index 00000000..62904d2d --- /dev/null +++ b/pkg/manifest/tests/results/aliases.yaml @@ -0,0 +1,46 @@ +- file: README.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/README.md + path: root/topic/new_section_1 + frontmatter: + aliases: + - "/root2/rebase/README/" + - "/root3/rebase_slash/README/" +- file: _index.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/website/blog/2024/_index.md + path: root/topic/new_section_1/subsection + frontmatter: + aliases: + - "/root2/rebase/subsection/" + - "/root3/rebase_slash/subsection/" +- file: foo.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/blogs/2024/foo.md + path: root/topic/new_section_1/subsection + frontmatter: + aliases: + - "/root4/alias/" + - "/root2/rebase/subsection/foo/" + - "/root3/rebase_slash/subsection/foo/" +- file: two.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/blogs/2024/two.md + path: root + frontmatter: + aliases: + - "/root4/normal/alias/" +- file: _index.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/docs/architecture/_index.md + path: root/architecture + frontmatter: + aliases: + - "/root/" +- file: concept.md + type: file + source: https://github.com/gardener/docforge/blob/master/contents/docs/architecture/concept.md + path: root/architecture + frontmatter: + aliases: + - "/root/concept/" \ No newline at end of file diff --git a/pkg/registry/registry.go b/pkg/registry/registry.go index 7cc95c9d..c3747ce1 100644 --- a/pkg/registry/registry.go +++ b/pkg/registry/registry.go @@ -49,7 +49,7 @@ func NewRegistry(resourcerepoHosts ...repositoryhost.Interface) Interface { } func (r *registry) Client(url string) httpclient.Client { - rh, _, err := r.repositoryHost(url) + rh, _, err := r.anyRepositoryHost(url) if err != nil { return http.DefaultClient } @@ -57,7 +57,7 @@ func (r *registry) Client(url string) httpclient.Client { } func (r *registry) Tree(resourceURL string) ([]string, error) { - rh, url, err := r.repositoryHost(resourceURL) + rh, url, err := r.anyRepositoryHost(resourceURL) if err != nil { return []string{}, err } @@ -65,7 +65,7 @@ func (r *registry) Tree(resourceURL string) ([]string, error) { } func (r *registry) Read(ctx context.Context, resourceURL string) ([]byte, error) { - rh, url, err := r.repositoryHost(resourceURL) + rh, url, err := r.anyRepositoryHost(resourceURL) if err != nil { return []byte{}, err } @@ -73,7 +73,7 @@ func (r *registry) Read(ctx context.Context, resourceURL string) ([]byte, error) } func (r *registry) ResolveRelativeLink(source string, relativeLink string) (string, error) { - rh, url, err := r.repositoryHost(source) + rh, url, err := r.anyRepositoryHost(source) if err != nil { return "", err } @@ -81,7 +81,7 @@ func (r *registry) ResolveRelativeLink(source string, relativeLink string) (stri } func (r *registry) ReadGitInfo(ctx context.Context, resourceURL string) ([]byte, error) { - rh, url, err := r.gitInfoRepositoryHost(resourceURL) + rh, url, err := r.githubRepositoryHost(resourceURL) if err != nil { return []byte{}, err } @@ -89,15 +89,18 @@ func (r *registry) ReadGitInfo(ctx context.Context, resourceURL string) ([]byte, } func (r *registry) LoadRepository(ctx context.Context, resourceURL string) error { - rh, err := r.get(resourceURL) + rh, err := r.acceptGithubRH(resourceURL) if err != nil { + if err.Error() == fmt.Sprintf("no sutiable repository host for %s", resourceURL) { + return nil + } return err } return rh.LoadRepository(ctx, resourceURL) } -func (r *registry) repositoryHost(resourceURL string) (repositoryhost.Interface, *repositoryhost.URL, error) { - rh, err := r.get(resourceURL) +func (r *registry) anyRepositoryHost(resourceURL string) (repositoryhost.Interface, *repositoryhost.URL, error) { + rh, err := r.acceptAnyRH(resourceURL) if err != nil { return nil, nil, err } @@ -109,12 +112,12 @@ func (r *registry) repositoryHost(resourceURL string) (repositoryhost.Interface, } func (r *registry) ResourceURL(resourceURL string) (*repositoryhost.URL, error) { - _, url, err := r.repositoryHost(resourceURL) + _, url, err := r.anyRepositoryHost(resourceURL) return url, err } -func (r *registry) gitInfoRepositoryHost(resourceURL string) (repositoryhost.Interface, *repositoryhost.URL, error) { - rh, err := r.getGitInfo(resourceURL) +func (r *registry) githubRepositoryHost(resourceURL string) (repositoryhost.Interface, *repositoryhost.URL, error) { + rh, err := r.acceptGithubRH(resourceURL) if err != nil { return nil, nil, err } @@ -126,7 +129,7 @@ func (r *registry) gitInfoRepositoryHost(resourceURL string) (repositoryhost.Int return rh, url, nil } -func (r *registry) get(uri string) (repositoryhost.Interface, error) { +func (r *registry) acceptAnyRH(uri string) (repositoryhost.Interface, error) { for _, h := range r.repoHosts { if h.Accept(uri) { return h, nil @@ -135,7 +138,7 @@ func (r *registry) get(uri string) (repositoryhost.Interface, error) { return nil, fmt.Errorf("no sutiable repository host for %s", uri) } -func (r *registry) getGitInfo(uri string) (repositoryhost.Interface, error) { +func (r *registry) acceptGithubRH(uri string) (repositoryhost.Interface, error) { for _, h := range r.repoHosts { if h.Repositories() != nil && h.Accept(uri) { return h, nil diff --git a/pkg/registry/repositoryhost/github_http_cache_test.go b/pkg/registry/repositoryhost/github_http_cache_test.go index ed22d97b..17171c31 100644 --- a/pkg/registry/repositoryhost/github_http_cache_test.go +++ b/pkg/registry/repositoryhost/github_http_cache_test.go @@ -98,18 +98,6 @@ var _ = Describe("Github cache test", func() { git.GetTreeReturns(&tree, nil, nil) ghc.LoadRepository(context.TODO(), "https://github.com/gardener/docforge/blob/master/README.md") - Describe("#GetRateLimit", func() { - BeforeEach(func() { - rls.RateLimitsReturns(nil, nil, errors.New("yataa error")) - }) - - It("return correct rate limit", func() { - _, _, _, err := ghc.GetRateLimit(context.TODO()) - Expect(err).To(Equal(errors.New("yataa error"))) - - }) - }) - testRepositoryHost(ghc) It("repository updated after loading", func() { diff --git a/pkg/workers/document/document_worker.go b/pkg/workers/document/document_worker.go index bb684dd8..e40097fc 100644 --- a/pkg/workers/document/document_worker.go +++ b/pkg/workers/document/document_worker.go @@ -39,8 +39,9 @@ type Worker struct { resourcesRoot string - repositoryhosts registry.Interface - hugo hugo.Hugo + repositoryhosts registry.Interface + hugo hugo.Hugo + skipLinkValidation bool } // docContent defines a document content @@ -51,7 +52,7 @@ type docContent struct { } // NewDocumentWorker creates Worker objects -func NewDocumentWorker(resourcesRoot string, downloader downloader.Interface, validator linkvalidator.Interface, linkResolver linkresolver.Interface, rh registry.Interface, hugo hugo.Hugo, writer writers.Writer) *Worker { +func NewDocumentWorker(resourcesRoot string, downloader downloader.Interface, validator linkvalidator.Interface, linkResolver linkresolver.Interface, rh registry.Interface, hugo hugo.Hugo, writer writers.Writer, skipLinkValidation bool) *Worker { return &Worker{ linkResolver, downloader, @@ -60,6 +61,7 @@ func NewDocumentWorker(resourcesRoot string, downloader downloader.Interface, va resourcesRoot, rh, hugo, + skipLinkValidation, } } @@ -195,7 +197,9 @@ func (d *linkResolverTask) resolveLink(dest string, isEmbeddable bool) (string, if url.IsAbs() { if _, err = d.repositoryhosts.ResourceURL(dest); err != nil { // absolute link that is not referencing any documentation page - d.validator.ValidateLink(dest, d.source) + if !d.node.SkipValidation && !d.skipLinkValidation { + d.validator.ValidateLink(dest, d.source) + } return dest, nil } } diff --git a/pkg/workers/document/document_worker_test.go b/pkg/workers/document/document_worker_test.go index 1258f3a0..130d0e37 100644 --- a/pkg/workers/document/document_worker_test.go +++ b/pkg/workers/document/document_worker_test.go @@ -53,7 +53,7 @@ var _ = Describe("Document resolving", func() { return s1, nil }) w = &writersfakes.FakeWriter{} - dw = document.NewDocumentWorker("__resources", df, vf, lrf, registry, hugo, w) + dw = document.NewDocumentWorker("__resources", df, vf, lrf, registry, hugo, w, false) }) Context("#ProcessNode", func() { diff --git a/pkg/workers/document/job.go b/pkg/workers/document/job.go index e8382233..1c4e210e 100644 --- a/pkg/workers/document/job.go +++ b/pkg/workers/document/job.go @@ -32,7 +32,7 @@ type Processor interface { } // New creates a new Worker -func New(workerCount int, failFast bool, wg *sync.WaitGroup, structure []*manifest.Node, resourcesRoot string, downloadJob downloader.Interface, validator linkvalidator.Interface, rhs registry.Interface, hugo hugo.Hugo, writer writers.Writer) (Processor, taskqueue.QueueController, error) { +func New(workerCount int, failFast bool, wg *sync.WaitGroup, structure []*manifest.Node, resourcesRoot string, downloadJob downloader.Interface, validator linkvalidator.Interface, rhs registry.Interface, hugo hugo.Hugo, writer writers.Writer, skipLinkValidation bool) (Processor, taskqueue.QueueController, error) { lr := &linkresolver.LinkResolver{ Repositoryhosts: rhs, Hugo: hugo, @@ -47,7 +47,7 @@ func New(workerCount int, failFast bool, wg *sync.WaitGroup, structure []*manife } } } - worker := NewDocumentWorker(resourcesRoot, downloadJob, validator, lr, rhs, hugo, writer) + worker := NewDocumentWorker(resourcesRoot, downloadJob, validator, lr, rhs, hugo, writer, skipLinkValidation) queue, err := taskqueue.New("Document", workerCount, worker.execute, failFast, wg) if err != nil { return nil, nil, err diff --git a/pkg/workers/linkvalidator/validator.go b/pkg/workers/linkvalidator/validator.go index 37a523c3..b68cce04 100644 --- a/pkg/workers/linkvalidator/validator.go +++ b/pkg/workers/linkvalidator/validator.go @@ -83,9 +83,9 @@ func (v *ValidatorWorker) Validate(ctx context.Context, LinkDestination string, if req, err = http.NewRequestWithContext(ctx, http.MethodHead, absLinkDestination, nil); err != nil { return fmt.Errorf("failed to prepare HEAD validation request: %v", err) } - if resp, err = doValidation(req, client); err != nil { + if resp, err = doValidation(req, client); err != nil && !errors.Is(err, context.DeadlineExceeded) { klog.Warningf("failed to validate absolute link for %s from source %s: %v\n", LinkDestination, ContentSourcePath, err) - } else if resp.StatusCode >= 400 && resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusUnauthorized { + } else if errors.Is(err, context.DeadlineExceeded) || (resp.StatusCode >= 400 && resp.StatusCode != http.StatusForbidden && resp.StatusCode != http.StatusUnauthorized) { // on error status code different from authorization errors // retry GET ctx, cancel = context.WithTimeout(ctx, 30*time.Second) // reset the context for the GET request