diff --git a/cmd/app/cmd.go b/cmd/app/cmd.go index c09b10b5..fd47e53f 100644 --- a/cmd/app/cmd.go +++ b/cmd/app/cmd.go @@ -3,6 +3,8 @@ package app import ( "context" "flag" + "io" + "os" "github.com/gardener/docforge/pkg/hugo" "github.com/spf13/cobra" @@ -20,6 +22,7 @@ type cmdFlags struct { markdownFmt bool ghOAuthToken string dryRun bool + resolve bool clientMetering bool hugo bool hugoPrettyUrls bool @@ -71,7 +74,9 @@ func (flags *cmdFlags) Configure(command *cobra.Command) { command.Flags().BoolVar(&flags.markdownFmt, "markdownfmt", true, "Applies formatting rules to source markdown.") command.Flags().BoolVar(&flags.dryRun, "dry-run", false, - "Resolves and prints the resolved documentation structure without downloading anything.") + "Runs the command end-to-end but instead of writing files, it will output the proejcted file/folder hierarchy to the standard output and statistics for the processing of each file.") + command.Flags().BoolVar(&flags.resolve, "resolve", false, + "Resolves the documentation structure and prints it to the standard output. The resolution expands nodeSelector constructs into node hierarchies.") command.Flags().IntVar(&flags.minWorkersCount, "min-workers", 10, "Minimum number of parallel workers.") command.Flags().IntVar(&flags.maxWorkersCount, "max-workers", 25, @@ -92,9 +97,10 @@ func (flags *cmdFlags) Configure(command *cobra.Command) { // NewOptions creates an options object from flags func NewOptions(f *cmdFlags) *Options { var ( - tokens map[string]string - metering *Metering - hugoOptions *hugo.Options + tokens map[string]string + metering *Metering + hugoOptions *hugo.Options + dryRunWriter io.Writer ) if len(f.ghOAuthToken) > 0 { tokens = map[string]string{ @@ -115,6 +121,10 @@ func NewOptions(f *cmdFlags) *Options { } } + if f.dryRun { + dryRunWriter = os.Stdout + } + return &Options{ DestinationPath: f.destinationPath, FailFast: f.failFast, @@ -125,6 +135,8 @@ func NewOptions(f *cmdFlags) *Options { MarkdownFmt: f.markdownFmt, GitHubTokens: tokens, Metering: metering, + DryRunWriter: dryRunWriter, + Resolve: f.resolve, Hugo: hugoOptions, } } diff --git a/cmd/app/factory.go b/cmd/app/factory.go index e0ff88cf..1b441cf4 100644 --- a/cmd/app/factory.go +++ b/cmd/app/factory.go @@ -2,6 +2,7 @@ package app import ( "context" + "io" "path/filepath" "github.com/gardener/docforge/pkg/hugo" @@ -30,6 +31,8 @@ type Options struct { MarkdownFmt bool GitHubTokens map[string]string Metering *Metering + DryRunWriter io.Writer + Resolve bool Hugo *hugo.Options } @@ -41,6 +44,7 @@ type Metering struct { // NewReactor creates a Reactor from Options func NewReactor(ctx context.Context, options *Options) *reactor.Reactor { + dryRunWriters := writers.NewDryRunWritersFactory(options.DryRunWriter) o := &reactor.Options{ MaxWorkersCount: options.MaxWorkersCount, MinWorkersCount: options.MinWorkersCount, @@ -50,13 +54,20 @@ func NewReactor(ctx context.Context, options *Options) *reactor.Reactor { ResourceDownloadWorkersCount: options.ResourceDownloadWorkersCount, MarkdownFmt: options.MarkdownFmt, Processor: nil, - Writer: &writers.FSWriter{ + ResourceHandlers: initResourceHanlders(ctx, options), + DryRunWriter: dryRunWriters, + Resolve: options.Resolve, + } + if options.DryRunWriter != nil { + o.Writer = dryRunWriters.GetWriter(options.DestinationPath) + o.ResourceDownloadWriter = dryRunWriters.GetWriter(filepath.Join(options.DestinationPath, options.ResourcesPath)) + } else { + o.Writer = &writers.FSWriter{ Root: options.DestinationPath, - }, - ResourceDownloadWriter: &writers.FSWriter{ + } + o.ResourceDownloadWriter = &writers.FSWriter{ Root: filepath.Join(options.DestinationPath, options.ResourcesPath), - }, - ResourceHandlers: initResourceHanlders(ctx, options), + } } if options.Hugo != nil { @@ -76,8 +87,12 @@ func WithHugo(reactorOptions *reactor.Options, o *Options) { hugo.NewProcessor(hugoOptions), }, } - hugoOptions.Writer = &writers.FSWriter{ - Root: filepath.Join(o.DestinationPath), + if o.DryRunWriter != nil { + hugoOptions.Writer = reactorOptions.Writer + } else { + hugoOptions.Writer = &writers.FSWriter{ + Root: filepath.Join(o.DestinationPath), + } } reactorOptions.Writer = hugo.NewWriter(hugoOptions) } diff --git a/pkg/api/nodes.go b/pkg/api/nodes.go index e741c103..ee898f41 100755 --- a/pkg/api/nodes.go +++ b/pkg/api/nodes.go @@ -130,6 +130,18 @@ func (n *Node) Peers() []*Node { return peers } +// GetStats returns statistics for this node +func (n *Node) GetStats() []*Stat { + return n.stats +} + +// AddStats appends Stats +func (n *Node) AddStats(s ...*Stat) { + for _, stat := range s { + n.stats = append(n.stats, stat) + } +} + // FindNodeByContentSource traverses up and then all around the // tree paths in the node's documentation strcuture, looking for // a node that has contentSource path nodeContentSource diff --git a/pkg/api/stats.go b/pkg/api/stats.go new file mode 100644 index 00000000..45dbe764 --- /dev/null +++ b/pkg/api/stats.go @@ -0,0 +1,8 @@ +package api + +// Stat represents a category recorded by StatsRecorder +type Stat struct { + Title string + Figures string + Details []string +} diff --git a/pkg/api/types.go b/pkg/api/types.go index ce7dceb7..ded605b1 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -105,6 +105,8 @@ type Node struct { // their absolute form for the match // TODO: update this doc LinksSubstitutes LinkSubstitutes `yaml:"linksSubstitutes,omitempty"` + + stats []*Stat } // NodeSelector is an specification for selecting subnodes (children) for a node. @@ -195,6 +197,8 @@ type LocalityDomain struct { DownloadSubstitutes map[string]string `yaml:"downloadSubstitutes,omitempty"` } +// LocalityDomainMap maps domains such as github.com/gardener/gardener +// to LocalityDomainValues type LocalityDomainMap map[string]*LocalityDomainValue // LocalityDomainValue encapsulates the memebers of a @@ -211,8 +215,12 @@ type LocalityDomainValue struct { LinksMatchers `yaml:",inline"` } +// LinkSubstitutes is the mapping between absolute links +// and substitutions for them type LinkSubstitutes map[string]*LinkSubstitute +// LinkSubstitute comprises subtitutes for various link details +// commonly found in markup type LinkSubstitute struct { Text *string `yaml:"text,omitempty"` Destination *string `yaml:"destination,omitempty"` diff --git a/pkg/jobs/controller_test.go b/pkg/jobs/controller_test.go index f7e12590..c458fc82 100644 --- a/pkg/jobs/controller_test.go +++ b/pkg/jobs/controller_test.go @@ -82,13 +82,13 @@ func TestController(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), tc.timeout) defer cancel() job := &Job{ - FailFast: tc.failFast, - ID: "Test", - MaxWorkers: tc.workersCount, - MinWorkers: tc.workersCount, + FailFast: tc.failFast, + ID: "Test", + MaxWorkers: tc.workersCount, + MinWorkers: tc.workersCount, IsWorkerExitsOnEmptyQueue: true, - Worker: WorkerFunc(tc.worker.work), - Queue: NewWorkQueue(tc.tasksCount), + Worker: WorkerFunc(tc.worker.work), + Queue: NewWorkQueue(tc.tasksCount), } c := NewController(job) diff --git a/pkg/reactor/content_processor.go b/pkg/reactor/content_processor.go index c8f10f63..3d2661c3 100644 --- a/pkg/reactor/content_processor.go +++ b/pkg/reactor/content_processor.go @@ -109,6 +109,13 @@ func (c *NodeContentProcessor) reconcileMDLinks(ctx context.Context, docNode *ap return destination, text, title, err } } + if docNode != nil { + if _destination != string(destination) { + recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", string(destination), _destination)) + } else { + recordLinkStats(docNode, "Links", "") + } + } if download != nil { c.schedule(ctx, download, contentSourcePath) } @@ -144,6 +151,13 @@ func (c *NodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode * } destination, _, _, download, err := c.resolveLink(ctx, docNode, url, contentSourcePath) klog.V(6).Infof("[%s] %s -> %s\n", contentSourcePath, url, destination) + if docNode != nil { + if url != destination { + recordLinkStats(docNode, "Links", fmt.Sprintf("%s -> %s", url, destination)) + } else { + recordLinkStats(docNode, "Links", "") + } + } if download != nil { c.schedule(ctx, download, contentSourcePath) } @@ -157,6 +171,7 @@ func (c *NodeContentProcessor) reconcileHTMLLinks(ctx context.Context, docNode * return documentBytes, errors.ErrorOrNil() } +// Download represents a resource that can be downloaded type Download struct { url string resourceName string @@ -167,6 +182,8 @@ func (c *NodeContentProcessor) resolveLink(ctx context.Context, node *api.Node, var ( text, title, substituteDestination *string hasSubstition bool + inLD bool + absLink string ) if strings.HasPrefix(destination, "#") || strings.HasPrefix(destination, "mailto:") { return destination, nil, nil, nil, nil @@ -196,11 +213,13 @@ func (c *NodeContentProcessor) resolveLink(ctx context.Context, node *api.Node, } _a := absLink - recolvedLD := c.localityDomain + resolvedLD := c.localityDomain if node != nil { - recolvedLD = resolveLocalityDomain(node, c.localityDomain) + resolvedLD = resolveLocalityDomain(node, c.localityDomain) + } + if resolvedLD != nil { + absLink, inLD = resolvedLD.MatchPathInLocality(absLink, c.ResourceHandlers) } - absLink, inLD := recolvedLD.MatchPathInLocality(absLink, c.ResourceHandlers) if _a != absLink { klog.V(6).Infof("[%s] Link converted %s -> %s\n", contentSourcePath, _a, absLink) } @@ -227,7 +246,7 @@ func (c *NodeContentProcessor) resolveLink(ctx context.Context, node *api.Node, // and if applicable their destination is updated as relative // path to predefined location for resources if absLink != "" && inLD { - resourceName := c.generateResourceName(absLink, recolvedLD) + resourceName := c.generateResourceName(absLink, resolvedLD) _d := destination destination = buildDestination(node, resourceName, c.resourcesRoot) if _d != destination { @@ -296,3 +315,36 @@ func substitute(absLink string, node *api.Node) (ok bool, destination *string, t } return false, nil, nil, nil } + +// recordLinkStats records link stats for a node +func recordLinkStats(node *api.Node, title, details string) { + var ( + stat *api.Stat + ) + nodeStats := node.GetStats() + if nodeStats != nil { + for _, _stat := range nodeStats { + if _stat.Title == title { + stat = _stat + break + } + } + } + if stat == nil { + stat = &api.Stat{ + Title: title, + } + if len(details) > 0 { + stat.Details = []string{details} + } else { + stat.Details = []string{} + } + stat.Figures = fmt.Sprintf("%d link rewrites", len(stat.Details)) + node.AddStats(stat) + return + } + if len(details) > 0 { + stat.Details = append(stat.Details, details) + } + stat.Figures = fmt.Sprintf("%d link rewrites", len(stat.Details)) +} diff --git a/pkg/reactor/reactor.go b/pkg/reactor/reactor.go index 4a88311a..c97d8bee 100644 --- a/pkg/reactor/reactor.go +++ b/pkg/reactor/reactor.go @@ -3,6 +3,7 @@ package reactor import ( "context" "fmt" + "os" "github.com/gardener/docforge/pkg/processors" "k8s.io/klog/v2" @@ -26,6 +27,8 @@ type Options struct { ResourceDownloadWriter writers.Writer Writer writers.Writer ResourceHandlers []resourcehandlers.ResourceHandler + DryRunWriter writers.DryRunWriter + Resolve bool } // NewReactor creates a Reactor from Options @@ -44,6 +47,8 @@ func NewReactor(o *Options) *Reactor { ResourceHandlers: rhRegistry, DocController: docController, DownloadController: downloadController, + DryRunWriter: o.DryRunWriter, + Resolve: o.Resolve, } return r } @@ -55,30 +60,37 @@ type Reactor struct { localityDomain *localityDomain DocController DocumentController DownloadController DownloadController + DryRunWriter writers.DryRunWriter + Resolve bool } // Run starts build operation on docStruct func (r *Reactor) Run(ctx context.Context, docStruct *api.Documentation, dryRun bool) error { - var err error - if err := r.Resolve(ctx, docStruct.Root); err != nil { + var ( + err error + ld *localityDomain + ) + if err := r.ResolveStructure(ctx, docStruct.Root); err != nil { return err } - ld := copyLocalityDomain(docStruct.LocalityDomain) - if ld == nil || len(ld.mapping) == 0 { - if ld, err = localityDomainFromNode(docStruct.Root, r.ResourceHandlers); err != nil { - return err + if docStruct.LocalityDomain != nil { + ld = copyLocalityDomain(docStruct.LocalityDomain) + if ld == nil || len(ld.mapping) == 0 { + if ld, err = localityDomainFromNode(docStruct.Root, r.ResourceHandlers); err != nil { + return err + } + r.localityDomain = ld } - r.localityDomain = ld } - if dryRun { + if r.Resolve { s, err := api.Serialize(docStruct) if err != nil { return err } - fmt.Println(s) - return nil + os.Stdout.Write([]byte(s)) + os.Stdout.Write([]byte("\n\n")) } ctx, cancel := context.WithCancel(ctx) @@ -89,15 +101,19 @@ func (r *Reactor) Run(ctx context.Context, docStruct *api.Documentation, dryRun return err } + if dryRun { + r.DryRunWriter.Flush() + } + return nil } -// Resolve builds the subnodes hierarchy of a node based on the natural nodes +// ResolveStructure builds the subnodes hierarchy of a node based on the natural nodes // hierarchy and on rules such as those in NodeSelector. // The node hierarchy is resolved by an appropriate handler selected based // on the NodeSelector path URI // The resulting model is the actual flight plan for replicating resources. -func (r *Reactor) Resolve(ctx context.Context, node *api.Node) error { +func (r *Reactor) ResolveStructure(ctx context.Context, node *api.Node) error { node.SetParentsDownwards() if node.NodeSelector != nil { var handler resourcehandlers.ResourceHandler @@ -107,10 +123,12 @@ func (r *Reactor) Resolve(ctx context.Context, node *api.Node) error { if err := handler.ResolveNodeSelector(ctx, node); err != nil { return err } + // remove node selctors after resolution + node.NodeSelector = nil } if len(node.Nodes) > 0 { for _, n := range node.Nodes { - if err := r.Resolve(ctx, n); err != nil { + if err := r.ResolveStructure(ctx, n); err != nil { return err } } diff --git a/pkg/writers/dryRunWriter.go b/pkg/writers/dryRunWriter.go new file mode 100644 index 00000000..265bc58b --- /dev/null +++ b/pkg/writers/dryRunWriter.go @@ -0,0 +1,156 @@ +package writers + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/gardener/docforge/pkg/api" +) + +// DryRunWriter is the functional interface for working +// with dry run writers +type DryRunWriter interface { + // GetWriter creates DryRunWriters writing to the + // same backend but for different roots (e.g. for + // resources and docs) + GetWriter(root string) Writer + // Flush wraps up dryrun writing and flushes + // results to the underlying writer (e.g. os.Stdout) + Flush() bool +} + +type dryRunWriter struct { + Writer io.Writer + writers []*writer + files []*file + t1 time.Time +} + +type file struct { + path string + stats []*api.Stat +} + +type writer struct { + root string + files *[]*file +} + +// NewDryRunWritersFactory creates factory for DryRunWriters +// writing to the same backend but for different roots (e.g. for +// resources and docs) +func NewDryRunWritersFactory(w io.Writer) DryRunWriter { + return &dryRunWriter{ + Writer: w, + writers: []*writer{}, + files: []*file{}, + t1: time.Now(), + } +} + +func (d *dryRunWriter) GetWriter(root string) Writer { + _w := &writer{ + root: root, + files: &d.files, + } + if d.writers == nil { + d.writers = []*writer{_w} + return _w + } + d.writers = append(d.writers, _w) + return _w +} + +func (w *writer) Write(name, path string, docBlob []byte, node *api.Node) error { + var stats []*api.Stat + if len(docBlob) > 0 && node != nil { + if !strings.HasSuffix(name, ".md") { + name = fmt.Sprintf("%s.md", name) + } + stats = node.GetStats() + } + root := filepath.Clean(w.root) + path = filepath.Clean(path) + filePath := fmt.Sprintf("%s/%s/%s", root, path, name) + filePath = filepath.Clean(filePath) + f := &file{ + path: filePath, + stats: stats, + } + *w.files = append(*w.files, f) + return nil +} + +// Flush formats and writes the dry-run result to the +// underlying writer +func (d *dryRunWriter) Flush() bool { + var ( + b bytes.Buffer + bytes []byte + err error + ) + + sort.Slice(d.files, func(i, j int) bool { return d.files[i].path < d.files[j].path }) + format(d.files, &b) + + elapsedTime := time.Since(d.t1) + b.WriteString(fmt.Sprintf("\nBuild finished in %f seconds\n", elapsedTime.Seconds())) + + if bytes, err = ioutil.ReadAll(&b); err != nil { + fmt.Println(err.Error()) + return false + } + if _, err := d.Writer.Write(bytes); err != nil { + fmt.Println(err.Error()) + } + return true +} + +func format(files []*file, b *bytes.Buffer) { + all := []string{} + for _, f := range files { + p := f.path + p = filepath.Clean(p) + dd := strings.Split(p, "/") + indent := 0 + for i, s := range dd { + if i > 0 { + b.Write([]byte(" ")) + indent++ + } + idx := i + 1 + _p := strings.Join(dd[:idx], "/") + if !any(all, _p) { + all = append(all, _p) + b.WriteString(fmt.Sprintf("%s\n", s)) + if i < len(dd)-1 { + b.Write(bytes.Repeat([]byte(" "), i)) + } + for _, st := range f.stats { + b.Write([]byte(" ")) + b.Write(bytes.Repeat([]byte(" "), indent)) + b.WriteString(fmt.Sprintf("%s stats: %s\n", st.Title, st.Figures)) + for _, detail := range st.Details { + b.Write(bytes.Repeat([]byte(" "), indent+2)) + b.WriteString(fmt.Sprintf("%s\n", detail)) + } + } + } + } + } +} + +func any(s []string, str string) bool { + for _, s := range s { + if s == str { + return true + } + } + return false +} diff --git a/pkg/writers/dryRunWriter_test.go b/pkg/writers/dryRunWriter_test.go new file mode 100644 index 00000000..ae16a5a4 --- /dev/null +++ b/pkg/writers/dryRunWriter_test.go @@ -0,0 +1,79 @@ +package writers + +import ( + "bytes" + "fmt" + "io/ioutil" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFormat(t *testing.T) { + var ( + b bytes.Buffer + bytes []byte + err error + ) + in := []string{ + "dev/__resources/015ec383-3c1b-487b-acff-4d7f4f8a1b14.png", + "dev/__resources/173a7246-e1d5-40d5-b981-8cff293e177a.png", + "dev/doc/aws_provider.md", + "dev/doc/gardener", + "dev/doc/gardener/_index.md", + "dev/doc/gardener/concepts", + "dev/doc/gardener/concepts/apiserver.md", + "dev/doc/gardener/deployment", + "dev/doc/gardener/deployment/aks.md", + "dev/doc/gardener/deployment/deploy_gardenlet.md", + "dev/doc/gardener/deployment/feature_gates.md", + "dev/doc/gardener/proposals", + "dev/doc/gardener/proposals/00-template.md", + "dev/doc/gardener/proposals/01-extensibility.md", + "dev/doc/gardener/proposals/_index.md", + "dev/doc/gardener/testing", + "dev/doc/gardener/testing/integration_tests.md", + "dev/doc/gardener/usage", + "dev/doc/gardener/usage/configuration.md", + "dev/doc/gardener/usage/control_plane_migration.md", + } + out := `dev + __resources + 015ec383-3c1b-487b-acff-4d7f4f8a1b14.png + 173a7246-e1d5-40d5-b981-8cff293e177a.png + doc + aws_provider.md + gardener + _index.md + concepts + apiserver.md + deployment + aks.md + deploy_gardenlet.md + feature_gates.md + proposals + 00-template.md + 01-extensibility.md + _index.md + testing + integration_tests.md + usage + configuration.md + control_plane_migration.md +` + + files := []*file{} + for _, p := range in { + files = append(files, &file{ + path: p, + }) + } + + format(files, &b) + + if bytes, err = ioutil.ReadAll(&b); err != nil { + t.Error(err.Error()) + } + assert.Equal(t, out, string(bytes)) + fmt.Printf("%s\n", string(bytes)) +}