Skip to content

Commit

Permalink
refactor(injector): new API with better performance (#234)
Browse files Browse the repository at this point in the history
Create a new `injector.Injector` API that does not rely on
`decorator.Load` (internally using `packages.Load`), instead using the
basic `go/types` API to type-check the AST nodes in order to obtain the
`Uses` map that is sufficient to build an import-managing
`decorator.Decorator` instance.

The package name resolution is done by parsing type information from the
archives mentioned in the `importcfg` file.

This change removes unnecessary compilation of un-instrumented archives
which are not useful, and hence saves time and disk space.

---

The new API also does not consider `PreserveLineInfo` to be optional
(this was never exposed to end-users anyway), so a bunch of test
reference files have changed to now include line directives.

---------

Co-authored-by: Eliott Bouhana <[email protected]>
  • Loading branch information
RomainMuller and eliottness authored Aug 27, 2024
1 parent b4bdb4f commit 62d7333
Show file tree
Hide file tree
Showing 28 changed files with 824 additions and 470 deletions.
25 changes: 0 additions & 25 deletions internal/injector/builtin/generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 0 additions & 38 deletions internal/injector/builtin/generator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"strings"

"github.com/dave/jennifer/jen"
"golang.org/x/tools/go/packages"
"gopkg.in/yaml.v3"
)

Expand Down Expand Up @@ -162,29 +161,6 @@ func main() {
g.Empty().Line()
})

file.Comment("RestorerMap is a set of import path to name mappings for packages that would be incorrectly named by restorer.Guess")
file.Var().Id("RestorerMap").Op("=").Map(jen.String()).String().ValuesFunc(func(g *jen.Group) {
pkgs, err := packages.Load(&packages.Config{}, "gopkg.in/DataDog/dd-trace-go.v1/...")
if err != nil {
log.Fatalf("Failed to load packages: %v\n", err)
}
sort.Sort(sortable(pkgs))

for _, pkg := range pkgs {
if strings.HasSuffix(pkg.PkgPath, "/internal") || strings.Contains(pkg.PkgPath, "/internal/") {
// We don't care about internal packages here (at least for now)
continue
}
if strings.HasSuffix(pkg.PkgPath, "/"+pkg.Name) {
// We don't care about packages for which `restorer.Guess` would infer the right name.
continue
}
g.Line().Lit(pkg.PkgPath).Op(":").Lit(pkg.Name)
}

g.Line().Empty()
})

file.Comment("InjectedPaths is a set of import paths that may be injected by built-in aspects. This list is used to ensure proper")
file.Comment("invalidation of cached artifacts when injected dependencies change.")
file.Var().Id("InjectedPaths").Op("=").Index(jen.Op("...")).String().ValuesFunc(func(g *jen.Group) {
Expand Down Expand Up @@ -213,20 +189,6 @@ func main() {
}
}

type sortable []*packages.Package

func (s sortable) Len() int {
return len(s)
}

func (s sortable) Less(i, j int) bool {
return s[i].PkgPath < s[j].PkgPath
}

func (s sortable) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}

func writeAll(w io.Writer, data []byte) {
for len(data) > 0 {
n, err := w.Write(data)
Expand Down
88 changes: 55 additions & 33 deletions internal/injector/builtin/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/datadog/orchestrion/internal/version"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/tools/go/packages"
)

var (
Expand All @@ -44,51 +46,71 @@ func Test(t *testing.T) {

pkgDir := filepath.Join(samplesDir, dir)

testLookup := func(path string) (io.ReadCloser, error) {
pkgs, err := packages.Load(
&packages.Config{
Mode: packages.NeedExportFile,
Dir: pkgDir,
Logf: t.Logf,
},
path,
)
if err != nil {
return nil, err
}
file := pkgs[0].ExportFile
if file == "" {
return nil, fmt.Errorf("no export data for %s", path)
}
return os.Open(file)
}

tmp := t.TempDir()
inj, err := injector.New(pkgDir, injector.Options{
inj := injector.Injector{
Aspects: builtin.Aspects[:],
Dir: pkgDir,
ModifiedFile: func(filename string) string {
return filepath.Join(tmp, filepath.Base(filename))
},
PreserveLineInfo: true,
})
require.NoError(t, err)
RootConfig: map[string]string{"httpmode": "wrap"},
ImportPath: fmt.Sprintf("github.com/datadog/orchestrion/samples/%s", dir),
Lookup: testLookup,
}

files, err := filepath.Glob(filepath.Join(pkgDir, "*.go"))
require.NoError(t, err)
for _, filename := range files {
t.Run(filepath.Base(filename), func(t *testing.T) {
res, err := inj.InjectFile(filename, map[string]string{"httpmode": "wrap"})
require.NoError(t, err)

referenceFile := filepath.Join(referenceDir, dir, filepath.Base(filename)) + ".snap"
if !res.Modified {
_, err := os.Stat(referenceFile)
if updateSnapshots && err == nil {
require.NoError(t, os.Remove(referenceFile))
}
require.ErrorIs(t, err, os.ErrNotExist)
return
}

data, err := os.ReadFile(res.Filename)
require.NoError(t, err)

data = bytes.ReplaceAll(data, []byte(samplesDir), []byte("samples"))
data = bytes.ReplaceAll(data, []byte(fmt.Sprintf("%q", version.Tag)), []byte("\"<version.Tag>\""))
results, err := inj.InjectFiles(files)
require.NoError(t, err)

reference, err := os.ReadFile(referenceFile)
if updateSnapshots && errors.Is(err, os.ErrNotExist) {
require.NoError(t, os.MkdirAll(filepath.Dir(referenceFile), 0o755))
require.NoError(t, os.WriteFile(referenceFile, data, 0o644))
}
require.NoError(t, err)
for _, filename := range files {
referenceFile := filepath.Join(referenceDir, dir, filepath.Base(filename)) + ".snap"

if !assert.Equal(t, string(reference), string(data)) && updateSnapshots {
require.NoError(t, os.WriteFile(referenceFile, data, 0o644))
res, modified := results[filename]
if !modified {
_, err := os.Stat(referenceFile)
if updateSnapshots && err == nil {
require.NoError(t, os.Remove(referenceFile))
}
})
require.ErrorIs(t, err, os.ErrNotExist)
return
}

data, err := os.ReadFile(res.Filename)
require.NoError(t, err)

data = bytes.ReplaceAll(data, []byte(samplesDir), []byte("samples"))
data = bytes.ReplaceAll(data, []byte(fmt.Sprintf("%q", version.Tag)), []byte("\"<version.Tag>\""))

reference, err := os.ReadFile(referenceFile)
if updateSnapshots && errors.Is(err, os.ErrNotExist) {
require.NoError(t, os.MkdirAll(filepath.Dir(referenceFile), 0o755))
require.NoError(t, os.WriteFile(referenceFile, data, 0o644))
}
require.NoError(t, err)

if !assert.Equal(t, string(reference), string(data)) && updateSnapshots {
require.NoError(t, os.WriteFile(referenceFile, data, 0o644))
}
}
})
}
Expand Down
34 changes: 34 additions & 0 deletions internal/injector/check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2023-present Datadog, Inc.

package injector

import (
"fmt"
"go/ast"
"go/importer"
"go/token"
"go/types"
"runtime"
)

// typeCheck runs the Go type checker on the provided files, and returns the
// Uses type information map that is built in the process.
func (i *Injector) typeCheck(fset *token.FileSet, files []*ast.File) (map[*ast.Ident]types.Object, error) {
pkg := types.NewPackage(i.ImportPath, i.Name)
typeInfo := types.Info{Uses: make(map[*ast.Ident]types.Object)}

checkerCfg := types.Config{
GoVersion: i.GoVersion,
Importer: importer.ForCompiler(fset, runtime.Compiler, i.Lookup),
}
checker := types.NewChecker(&checkerCfg, fset, pkg, &typeInfo)

if err := checker.Files(files); err != nil {
return nil, fmt.Errorf("type-checking files: %w", err)
}

return typeInfo.Uses, nil
}
115 changes: 115 additions & 0 deletions internal/injector/imports.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2023-present Datadog, Inc.

package injector

import (
"go/token"
"strconv"

"github.com/dave/dst"
"github.com/dave/dst/dstutil"
)

// canonicalizeImports works around the issue detailed in https://github.com/dave/dst/issues/45
// where dave/dst improperly handles multiple imports of the same package with different aliases,
// resulting in invalid output source code.
//
// To do so, it modifies the AST file so that it only includes a single import per path, using the
// first non-empty alias found.
func canonicalizeImports(file *dst.File) {
specsByPath := importSpecsByImportPath(file)

retain := filterExtraneousImports(specsByPath)

file.Imports = file.Imports[:0] // Re-use the backing store, we'll keep <= what was there.
for spec := range retain {
file.Imports = append(file.Imports, spec)
}

filterDecls(file, retain)
}

func importSpecsByImportPath(file *dst.File) map[string][]*dst.ImportSpec {
byPath := make(map[string][]*dst.ImportSpec, len(file.Imports))

for _, imp := range file.Imports {
path, err := strconv.Unquote(imp.Path.Value)
if err != nil {
continue
}
list := append(byPath[path], imp)
byPath[path] = list
}

return byPath
}

func filterExtraneousImports(byPath map[string][]*dst.ImportSpec) map[*dst.ImportSpec]struct{} {
result := make(map[*dst.ImportSpec]struct{}, len(byPath))

for _, specs := range byPath {
retain := specs[0]
for _, spec := range specs[1:] {
if (spec.Name == nil && (retain.Name == nil || retain.Name.Name != "_")) || spec.Name.Name == "_" {
continue
}
retain = spec
break
}
result[retain] = struct{}{}
}

return result
}

func filterDecls(file *dst.File, retain map[*dst.ImportSpec]struct{}) {
dstutil.Apply(
file,
func(csor *dstutil.Cursor) bool {
switch node := csor.Node().(type) {
case *dst.GenDecl:
// Only visit the children of `import` declarations.
return node.Tok == token.IMPORT
case *dst.ImportSpec:
// Filter out ImportSpec entries to keep only those in retain
if _, ret := retain[node]; !ret {
csor.Delete()
}
// No need to traverse children.
return false
case dst.Decl:
// No need to visit any other kind of declaration
return false
default:
// Visit other node types (e.g, the *ast.File)
return true
}
},
func(csor *dstutil.Cursor) bool {
switch node := csor.Node().(type) {
case *dst.GenDecl:
if node.Tok != token.IMPORT {
// Imports are before any other kind of declaration, we can abort traversal as soon as we
// find a declaration that is not an `import` declaration.
return false
}

if len(node.Specs) == 0 {
csor.Delete()
}
// Proceed with the rest of the nodes (there may be more imports).
return true
case dst.Decl:
// Imports are before any other kind of declaration, we can abort traversal as soon as we
// find a declaration that is not an `import` declaration.
return false
default:
// Proceed with the rest of the nodes (there may be imports down there).
return true
}
},
)
}
Loading

0 comments on commit 62d7333

Please sign in to comment.