Skip to content

Commit

Permalink
cmd/go: make module index loading O(1)
Browse files Browse the repository at this point in the history
For a large module, opening the index was populating tables with
entries for every package in the module. If we are only using a small
number of those packages, this is wasted work that can dwarf the
benefit from the index.

This CL changes the index reader to avoid loading all packages
at module index open time. It also refactors the code somewhat
for clarity.

It also removes some duplication by defining that a per-package
index is a per-module index containing a single package, rather
than having two different formats and two different decoders.

It also changes the string table to use uvarint-prefixed data
instead of having to scan for a NUL byte. This makes random access
to long strings more efficient - O(1) instead of O(n) - and can significantly
speed up the strings.Compare operation in the binary search looking
for a given package.

Also add a direct test of the indexing code.

For #53577.

Change-Id: I7428d28133e4e7fe2d2993fa014896cd15af48af
Reviewed-on: https://go-review.googlesource.com/c/go/+/416178
Reviewed-by: Bryan Mills <[email protected]>
  • Loading branch information
rsc committed Jul 11, 2022
1 parent b8bf820 commit 7510e59
Show file tree
Hide file tree
Showing 4 changed files with 327 additions and 275 deletions.
87 changes: 87 additions & 0 deletions src/cmd/go/internal/modindex/index_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package modindex

import (
"encoding/hex"
"encoding/json"
"go/build"
"internal/diff"
"path/filepath"
"reflect"
"runtime"
"testing"
)

func init() {
isTest = true
enabled = true // to allow GODEBUG=goindex=0 go test, when things are very broken
}

func TestIndex(t *testing.T) {
src := filepath.Join(runtime.GOROOT(), "src")
checkPkg := func(t *testing.T, m *Module, pkg string, data []byte) {
p := m.Package(pkg)
bp, err := p.Import(build.Default, build.ImportComment)
if err != nil {
t.Fatal(err)
}
bp1, err := build.Default.Import(pkg, filepath.Join(src, pkg), build.ImportComment)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(bp, bp1) {
t.Errorf("mismatch")
t.Logf("index:\n%s", hex.Dump(data))

js, err := json.MarshalIndent(bp, "", "\t")
if err != nil {
t.Fatal(err)
}
js1, err := json.MarshalIndent(bp1, "", "\t")
if err != nil {
t.Fatal(err)
}
t.Logf("diff:\n%s", diff.Diff("index", js, "correct", js1))
t.FailNow()
}
}

// Check packages in increasing complexity, one at a time.
pkgs := []string{
"crypto",
"encoding",
"unsafe",
"encoding/json",
"runtime",
"net",
}
var raws []*rawPackage
for _, pkg := range pkgs {
raw := importRaw(src, pkg)
raws = append(raws, raw)
t.Run(pkg, func(t *testing.T) {
data := encodeModuleBytes([]*rawPackage{raw})
m, err := fromBytes(src, data)
if err != nil {
t.Fatal(err)
}
checkPkg(t, m, pkg, data)
})
}

// Check that a multi-package index works too.
t.Run("all", func(t *testing.T) {
data := encodeModuleBytes(raws)
m, err := fromBytes(src, data)
if err != nil {
t.Fatal(err)
}
for _, pkg := range pkgs {
checkPkg(t, m, pkg, data)
}
})
}
Loading

0 comments on commit 7510e59

Please sign in to comment.