Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
Completely resolve indexed search performance regression in v3.2.0 (#…
Browse files Browse the repository at this point in the history
…4048)

* search: completely resolve indexed search performance regression in v3.2.0

In #3685 I resolved a major indexed search performance regression that was
introduced in v3.2.0.

For a search returning 100 repositories and 100 results per repository:

1. After the regression: we would do 100*100 == 10,000 Git OID resolutions (which would overload Gitserver and cause searches to timeout)

2. After my previous resolution, we would no longer do duplicate work and thus only did 100 Git OID resolutions (1 per repository).
    - This was shown in benchmarks and load tests on customer instances to be a substantial improvement.
    - On one customer instance, we've found this still hasn't been enough of an improvement.
    - On another customer instance, we've found we could be doing better.

3. After *this* resolution, we are 100% at the performance level we were at prior to v3.2.0 because we are no longer doing *any* additional Git OID resolutions. Zoekt already has this information and we already aquire this information prior to executing searches, we just weren't correctly reusing this information.

Should fully resolve an outstanding issue https://app.hubspot.com/contacts/2762526/company/464956351 is facing.
Helps https://app.hubspot.com/contacts/2762526/company/407948923

* remove now unnecessary Git OID resolution & caching logic

* CHANGELOG

* fixup reversion

* fixup

* update tests
  • Loading branch information
slimsag authored May 16, 2019
1 parent 7d5f551 commit c893ed3
Show file tree
Hide file tree
Showing 13 changed files with 113 additions and 468 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ All notable changes to Sourcegraph are documented in this file.
- Fixed an issue where in some rare cases PostgreSQL starting up slowly could incorrectly trigger a panic in the `frontend` service.
- Fixed an issue where the management console password would incorrectly reset to a new secure one after a user account was created.
- Substantially improved the performance of updating external service configurations on instances with thousands of repositories, going from e.g. several minutes to about a minute for ~20k repositories.
- Fully resolved the search performance regression in v3.2.0, restoring performance of search back to the same levels it was before changes made in v3.2.0.

## 3.3.7

Expand Down
12 changes: 2 additions & 10 deletions cmd/frontend/graphqlbackend/discussion_threads.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,23 +590,15 @@ func (r *discussionThreadTargetRepoResolver) RelativeSelection(ctx context.Conte
endLine: *r.t.EndLine,
endCharacter: *r.t.EndCharacter,
}
if oid, _ := commit.OID(); r.t.Revision != nil && *r.t.Revision == string(oid) {
if r.t.Revision != nil && *r.t.Revision == string(commit.OID()) {
return oldSel, nil // nothing to do (requested relative revision is identical to the stored revision)
}
if r.t.Branch != nil {
branchCommit, err := repo.Commit(ctx, &repositoryCommitArgs{Rev: *r.t.Branch})
if err != nil {
return nil, err
}
bOid, err := branchCommit.OID()
if err != nil {
return nil, err
}
oid, err := commit.OID()
if err != nil {
return nil, err
}
if bOid == oid {
if branchCommit.OID() == commit.OID() {
return oldSel, nil // nothing to do (requested relative revision is identical to the stored branch revision)
}
}
Expand Down
14 changes: 2 additions & 12 deletions cmd/frontend/graphqlbackend/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,7 @@ func (r *gitTreeEntryResolver) Content(ctx context.Context) (string, error) {
return "", err
}

oid, err := r.commit.OID()
if err != nil {
return "", err
}

contents, err := git.ReadFile(ctx, *cachedRepo, api.CommitID(oid), r.path)
contents, err := git.ReadFile(ctx, *cachedRepo, api.CommitID(r.commit.OID()), r.path)
if err != nil {
return "", err
}
Expand Down Expand Up @@ -105,12 +100,7 @@ func (r *gitTreeEntryResolver) Highlight(ctx context.Context, args *struct {
return nil, err
}

oid, err := r.commit.OID()
if err != nil {
return nil, err
}

content, err := git.ReadFile(ctx, *cachedRepo, api.CommitID(oid), r.path)
content, err := git.ReadFile(ctx, *cachedRepo, api.CommitID(r.commit.OID()), r.path)
if err != nil {
return nil, err
}
Expand Down
6 changes: 1 addition & 5 deletions cmd/frontend/graphqlbackend/git_blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@ func (r *gitTreeEntryResolver) Blame(ctx context.Context,
StartLine int32
EndLine int32
}) ([]*hunkResolver, error) {
oid, err := r.commit.OID()
if err != nil {
return nil, err
}
hunks, err := git.BlameFile(ctx, gitserver.Repo{Name: r.commit.repo.repo.Name}, r.path, &git.BlameOptions{
NewestCommit: api.CommitID(oid),
NewestCommit: api.CommitID(r.commit.OID()),
StartLine: int(args.StartLine),
EndLine: int(args.EndLine),
})
Expand Down
153 changes: 19 additions & 134 deletions cmd/frontend/graphqlbackend/git_commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"

"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/externallink"
Expand Down Expand Up @@ -36,9 +34,8 @@ type gitCommitResolver struct {
// to avoid redirecting a user browsing a revision "mybranch" to the absolute commit ID as they follow links in the UI.
inputRev *string

oid gitObjectID
oidErr error
once sync.Once
// oid MUST be specified and a 40-character Git SHA.
oid gitObjectID

author signatureResolver
committer *signatureResolver
Expand Down Expand Up @@ -78,86 +75,15 @@ func unmarshalGitCommitID(id graphql.ID) (repoID graphql.ID, commitID gitObjectI
}

func (r *gitCommitResolver) ID() graphql.ID {
oid, _ := r.OID()
return marshalGitCommitID(r.repo.ID(), oid)
return marshalGitCommitID(r.repo.ID(), r.oid)
}

func (r *gitCommitResolver) Repository() *repositoryResolver { return r.repo }

func (r *gitCommitResolver) OID() (gitObjectID, error) {
r.once.Do(func() {
if r.oid != "" {
return // We already have an oid because the creator of this *gitCommitResolver specified it.
}

// Try fetching it from the Redis cache to avoid doing lots of work
// previously done (as this method is called very often, e.g. multiple
// times per search result).
start := time.Now()
result, ok := oidResolutionCache.Get(string(r.repo.repo.ID))
oidResolutionCacheLookupDuration.Observe(time.Since(start).Seconds())
if ok {
oidResolutionCounter.WithLabelValues("hit").Inc()
r.oid = gitObjectID(result)
return
}

// The cache doesn't have it, so compute it and update the cache if we
// resolved it successfully.
start = time.Now()
r.resolveCommitIODUncached()
oidResolutionDuration.Observe(time.Since(start).Seconds())
if r.oidErr == nil {
oidResolutionCounter.WithLabelValues("miss").Inc()
oidResolutionCache.Set(string(r.repo.repo.ID), string(r.oid))
} else {
oidResolutionCounter.WithLabelValues("miss_error").Inc()
}
})
return r.oid, r.oidErr
}

func (r *gitCommitResolver) resolveCommitIODUncached() {
if r.oid != "" || r.oidErr != nil {
// Possible scenarios for this case:
//
// - We already have an r.oid because the creator of this *gitCommitResolver specified it.
// - We already have an r.oid because we were called before.
// - We don't have an r.oid but have an r.oidErr from being called before.
//
// In any case, there is no point in doing the work again, so we return
// now.
return
}

// Commit OID is the empty string denoting the default branch. Find out
// what is the latest commit indexed by zoekt.

indexInfo := r.repo.TextSearchIndex()

ctx := context.Background()
func (r *gitCommitResolver) OID() gitObjectID { return r.oid }

var refs []*repositoryTextSearchIndexedRef
refs, r.oidErr = indexInfo.Refs(ctx)
if r.oidErr != nil {
return
}

for _, ref := range refs {
current, _ := ref.Current(ctx)
if current {
r.oid = ref.indexedCommit
break
}
}
}

func (r *gitCommitResolver) AbbreviatedOID() (string, error) {
commit, err := r.OID()
if err != nil {
return "", err
}
return string(commit)[:7], err
func (r *gitCommitResolver) AbbreviatedOID() string {
return string(r.oid)[:7]
}
func (r *gitCommitResolver) Author() *signatureResolver { return &r.author }
func (r *gitCommitResolver) Committer() *signatureResolver { return r.committer }
Expand All @@ -184,27 +110,15 @@ func (r *gitCommitResolver) Parents(ctx context.Context) ([]*gitCommitResolver,
}

func (r *gitCommitResolver) URL() (string, error) {
rev, err := r.inputRevOrImmutableRev()
if err != nil {
return "", err
}
return r.repo.URL() + "/-/commit/" + string(rev), nil
return r.repo.URL() + "/-/commit/" + string(r.inputRevOrImmutableRev()), nil
}

func (r *gitCommitResolver) CanonicalURL() (string, error) {
oid, err := r.OID()
if err != nil {
return "", err
}
return r.repo.URL() + "/-/commit/" + string(oid), nil
return r.repo.URL() + "/-/commit/" + string(r.oid), nil
}

func (r *gitCommitResolver) ExternalURLs(ctx context.Context) ([]*externallink.Resolver, error) {
oid, err := r.OID()
if err != nil {
return nil, err
}
return externallink.Commit(ctx, r.repo.repo, api.CommitID(oid))
return externallink.Commit(ctx, r.repo.repo, api.CommitID(r.oid))
}

func (r *gitCommitResolver) Tree(ctx context.Context, args *struct {
Expand All @@ -215,11 +129,7 @@ func (r *gitCommitResolver) Tree(ctx context.Context, args *struct {
if err != nil {
return nil, err
}
oid, err := r.OID()
if err != nil {
return nil, err
}
stat, err := git.Stat(ctx, *cachedRepo, api.CommitID(oid), args.Path)
stat, err := git.Stat(ctx, *cachedRepo, api.CommitID(r.oid), args.Path)
if err != nil {
return nil, err
}
Expand All @@ -241,11 +151,7 @@ func (r *gitCommitResolver) Blob(ctx context.Context, args *struct {
if err != nil {
return nil, err
}
oid, err := r.OID()
if err != nil {
return nil, err
}
stat, err := git.Stat(ctx, *cachedRepo, api.CommitID(oid), args.Path)
stat, err := git.Stat(ctx, *cachedRepo, api.CommitID(r.oid), args.Path)
if err != nil {
return nil, err
}
Expand All @@ -266,11 +172,7 @@ func (r *gitCommitResolver) File(ctx context.Context, args *struct {
}

func (r *gitCommitResolver) Languages(ctx context.Context) ([]string, error) {
oid, err := r.OID()
if err != nil {
return nil, err
}
inventory, err := backend.Repos.GetInventory(ctx, r.repo.repo, api.CommitID(oid))
inventory, err := backend.Repos.GetInventory(ctx, r.repo.repo, api.CommitID(r.oid))
if err != nil {
return nil, err
}
Expand All @@ -287,12 +189,8 @@ func (r *gitCommitResolver) Ancestors(ctx context.Context, args *struct {
Query *string
Path *string
}) (*gitCommitConnectionResolver, error) {
oid, err := r.OID()
if err != nil {
return nil, nil
}
return &gitCommitConnectionResolver{
revisionRange: string(oid),
revisionRange: string(r.oid),
first: args.ConnectionArgs.First,
query: args.Query,
path: args.Path,
Expand All @@ -307,11 +205,7 @@ func (r *gitCommitResolver) BehindAhead(ctx context.Context, args *struct {
if err != nil {
return nil, err
}
oid, err := r.OID()
if err != nil {
return nil, err
}
counts, err := git.GetBehindAhead(ctx, *cachedRepo, args.Revspec, string(oid))
counts, err := git.GetBehindAhead(ctx, *cachedRepo, args.Revspec, string(r.oid))
if err != nil {
return nil, err
}
Expand All @@ -328,12 +222,11 @@ func (r *behindAheadCountsResolver) Ahead() int32 { return r.ahead }

// inputRevOrImmutableRev returns the input revspec, if it is provided and nonempty. Otherwise it returns the
// canonical OID for the revision.
func (r *gitCommitResolver) inputRevOrImmutableRev() (string, error) {
func (r *gitCommitResolver) inputRevOrImmutableRev() string {
if r.inputRev != nil && *r.inputRev != "" {
return escapeRevspecForURL(*r.inputRev), nil
return escapeRevspecForURL(*r.inputRev)
}
oid, err := r.OID()
return string(oid), err
return string(r.oid)
}

// repoRevURL returns the URL path prefix to use when constructing URLs to resources at this
Expand All @@ -347,11 +240,7 @@ func (r *gitCommitResolver) repoRevURL() (string, error) {
if r.inputRev != nil {
rev = *r.inputRev // use the original input rev from the user
} else {
oid, err := r.OID()
if err != nil {
return "", err
}
rev = string(oid)
rev = string(r.oid)
}
if rev != "" {
return url + "@" + escapeRevspecForURL(rev), nil
Expand All @@ -360,11 +249,7 @@ func (r *gitCommitResolver) repoRevURL() (string, error) {
}

func (r *gitCommitResolver) canonicalRepoRevURL() (string, error) {
oid, err := r.OID()
if err != nil {
return "", err
}
return r.repo.URL() + "@" + string(oid), nil
return r.repo.URL() + "@" + string(r.oid), nil
}

// gitCommitBody returns the first line of the Git commit message.
Expand Down
Loading

0 comments on commit c893ed3

Please sign in to comment.