diff --git a/CHANGELOG.md b/CHANGELOG.md index 45f533a88a56..9a00f4b72a27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ All notable changes to Sourcegraph are documented in this file. ## Changed - All 24 language extensions are enabled by default. +- Fixed a major indexed search performance regression that occurred in v3.2.0. (#3685) ## 3.3.5 diff --git a/cmd/frontend/graphqlbackend/git_commit.go b/cmd/frontend/graphqlbackend/git_commit.go index b61d1830de39..9cf430ceb539 100644 --- a/cmd/frontend/graphqlbackend/git_commit.go +++ b/cmd/frontend/graphqlbackend/git_commit.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" "sync" + "time" "github.com/sourcegraph/sourcegraph/cmd/frontend/backend" "github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/externallink" @@ -35,9 +36,9 @@ type gitCommitResolver struct { // to avoid redirecting a user browsing a revision "mybranch" to the absolute commit ID as they follow links in the UI. inputRev *string - oid gitObjectID - once sync.Once - onceErr error + oid gitObjectID + oidErr error + once sync.Once author signatureResolver committer *signatureResolver @@ -84,40 +85,71 @@ func (r *gitCommitResolver) ID() graphql.ID { func (r *gitCommitResolver) Repository() *repositoryResolver { return r.repo } func (r *gitCommitResolver) OID() (gitObjectID, error) { - return r.getCommitOID() -} - -func (r *gitCommitResolver) getCommitOID() (gitObjectID, error) { r.once.Do(func() { - // If we already have the commit, no need to try to compute it. if r.oid != "" { + return // We already have an oid because the creator of this *gitCommitResolver specified it. + } + + // Try fetching it from the Redis cache to avoid doing lots of work + // previously done (as this method is called very often, e.g. multiple + // times per search result). + start := time.Now() + result, ok := oidResolutionCache.Get(string(r.repo.repo.ID)) + oidResolutionCacheLookupDuration.Observe(time.Since(start).Seconds()) + if ok { + oidResolutionCounter.WithLabelValues("hit").Inc() + r.oid = gitObjectID(result) return } - // Commit OID is the empty string denoting the default branch. Find out - // what is the latest commit indexed by zoekt. + // The cache doesn't have it, so compute it and update the cache if we + // resolved it successfully. + start = time.Now() + r.resolveCommitIODUncached() + oidResolutionDuration.Observe(time.Since(start).Seconds()) + if r.oidErr == nil { + oidResolutionCounter.WithLabelValues("miss").Inc() + oidResolutionCache.Set(string(r.repo.repo.ID), string(r.oid)) + } else { + oidResolutionCounter.WithLabelValues("miss_error").Inc() + } + }) + return r.oid, r.oidErr +} - indexInfo := r.repo.TextSearchIndex() +func (r *gitCommitResolver) resolveCommitIODUncached() { + if r.oid != "" || r.oidErr != nil { + // Possible scenarios for this case: + // + // - We already have an r.oid because the creator of this *gitCommitResolver specified it. + // - We already have an r.oid because we were called before. + // - We don't have an r.oid but have an r.oidErr from being called before. + // + // In any case, there is no point in doing the work again, so we return + // now. + return + } - ctx := context.Background() + // Commit OID is the empty string denoting the default branch. Find out + // what is the latest commit indexed by zoekt. - var refs []*repositoryTextSearchIndexedRef - refs, r.onceErr = indexInfo.Refs(ctx) - if r.onceErr != nil { - return - } + indexInfo := r.repo.TextSearchIndex() - for _, ref := range refs { - current, _ := ref.Current(ctx) - if current { - r.oid = ref.indexedCommit + ctx := context.Background() - break - } - } - }) + var refs []*repositoryTextSearchIndexedRef + refs, r.oidErr = indexInfo.Refs(ctx) + if r.oidErr != nil { + return + } - return r.oid, r.onceErr + for _, ref := range refs { + current, _ := ref.Current(ctx) + if current { + r.oid = ref.indexedCommit + break + } + } } func (r *gitCommitResolver) AbbreviatedOID() (string, error) { diff --git a/cmd/frontend/graphqlbackend/git_commit_resolution_cache.go b/cmd/frontend/graphqlbackend/git_commit_resolution_cache.go new file mode 100644 index 000000000000..6f1dce69dac5 --- /dev/null +++ b/cmd/frontend/graphqlbackend/git_commit_resolution_cache.go @@ -0,0 +1,123 @@ +package graphqlbackend + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +type resolutionCacheEntry struct { + t time.Time + value string +} + +type resolutionCache struct { + // ttl indicates how long before cache entries expire. There is no limit on + // the size of the cache except the effective # of repositories on the + // Sourcegraph instance. + ttl time.Duration + + // cacheEntries, if non-nil, is used to record the number of entries in the cache. + cacheEntries prometheus.Histogram + + // workerInterval, if non-zero, specifies the interval at which the worker + // checks for entries to evict. Defaults ttl / 2. + workerInterval time.Duration + + // mockSleep, if non-nil can be used to mock time.Sleep for testing purposes. + mockSleep func(d time.Duration) + + m sync.Map +} + +func (r *resolutionCache) Set(k, v string) { + r.m.Store(k, resolutionCacheEntry{ + t: time.Now(), + value: v, + }) +} + +func (r *resolutionCache) Get(k string) (string, bool) { + v, ok := r.m.Load(k) + if !ok { + return "", false + } + e := v.(resolutionCacheEntry) + if time.Since(e.t) >= r.ttl { + // entry has expired + r.m.Delete(k) + return "", false + } + return e.value, true +} + +func (r *resolutionCache) startWorker() *resolutionCache { + if r.workerInterval == 0 { + r.workerInterval = r.ttl / 2 + } + sleep := time.Sleep + if r.mockSleep != nil { + sleep = r.mockSleep + } + go func() { + for { + sleep(r.workerInterval) + size := 0 + r.m.Range(func(key, value interface{}) bool { + size++ + e := value.(resolutionCacheEntry) + if time.Since(e.t) >= r.ttl { + // entry has expired + r.m.Delete(key) + } + return true + }) + r.cacheEntries.Observe(float64(size)) + } + }() + return r +} + +var ( + // oidResolutionCache is used to cache Git commit OID resolution. This is + // used because OID resolution happens extremely often (e.g. multiple times + // per search result). + oidResolutionCache = (&resolutionCache{ + ttl: 60 * time.Second, + cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_cache_entries", + Help: "Total number of entries in the in-memory Git commit OID resolution cache.", + }), + }).startWorker() + + oidResolutionCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_cache_hit", + Help: "Counts cache hits and misses for Git commit OID resolution.", + }, []string{"type"}) + + oidResolutionDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_duration_seconds", + Help: "Total time spent performing uncached Git commit OID resolution.", + }) + + oidResolutionCacheLookupDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_cache_lookup_duration_seconds", + Help: "Total time spent performing cache lookups for Git commit OID resolution.", + }) +) + +func init() { + prometheus.MustRegister(oidResolutionCache.cacheEntries) + prometheus.MustRegister(oidResolutionCounter) + prometheus.MustRegister(oidResolutionDuration) + prometheus.MustRegister(oidResolutionCacheLookupDuration) +} diff --git a/cmd/frontend/graphqlbackend/git_commit_resolution_cache_test.go b/cmd/frontend/graphqlbackend/git_commit_resolution_cache_test.go new file mode 100644 index 000000000000..e61056409ceb --- /dev/null +++ b/cmd/frontend/graphqlbackend/git_commit_resolution_cache_test.go @@ -0,0 +1,96 @@ +package graphqlbackend + +import ( + "fmt" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestGitCommitResolutionCache(t *testing.T) { + doneSleep := make(chan struct{}) + cache := (&resolutionCache{ + ttl: 15 * time.Millisecond, + cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_cache_entries", + Help: "Total number of entries in the in-memory Git commit OID resolution cache.", + }), + mockSleep: func(d time.Duration) { + <-doneSleep + }, + }).startWorker() + + cache.Set("repo-1", "commit-1") + cache.Set("repo-2", "commit-2") + if v, ok := cache.Get("repo-1"); !ok || v != "commit-1" { + t.Fatal("expected cache get to succeed") + } + if v, ok := cache.Get("repo-2"); !ok || v != "commit-2" { + t.Fatal("expected cache get to succeed") + } + doneSleep <- struct{}{} // cause worker to begin a single iteration + time.Sleep(30 * time.Millisecond) // wait long enough for TTL + if _, ok := cache.Get("repo-1"); ok { + t.Fatal("expected cache entry to have expired") + } + if _, ok := cache.Get("repo-2"); ok { + t.Fatal("expected cache entry to have expired") + } +} + +// Merely shows that the cache can support a very high concurrent read rate +// with a low write rate. Run it like: +// +// $ go test -bench BenchmarkGitCommitResolutionCache -benchtime=30s ./cmd/frontend/graphqlbackend/ +// BenchmarkGitCommitResolutionCache/8-8 200000000 202 ns/op +// BenchmarkGitCommitResolutionCache/16-8 100000000 410 ns/op +// BenchmarkGitCommitResolutionCache/32-8 50000000 879 ns/op +// BenchmarkGitCommitResolutionCache/64-8 30000000 1709 ns/op +// BenchmarkGitCommitResolutionCache/128-8 20000000 3345 ns/op +// BenchmarkGitCommitResolutionCache/256-8 20000000 6177 ns/op +// +// The last one shows that while 256 concurrent cache reads are occurring we +// can perform 8 cache reads in 6177ns. +func BenchmarkGitCommitResolutionCache(b *testing.B) { + cache := (&resolutionCache{ + ttl: 10 * time.Minute, + cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "src", + Subsystem: "graphql", + Name: "git_commit_oid_resolution_cache_entries", + Help: "Total number of entries in the in-memory Git commit OID resolution cache.", + }), + }).startWorker() + + cache.Set("repo-1", "commit-1") + + for _, concurrentReads := range []int{8, 16, 32, 64, 128, 256} { + b.Run(fmt.Sprint(concurrentReads), func(b *testing.B) { + done := make(chan bool) + defer close(done) + for i := 0; i < concurrentReads; i++ { + go func() { + for { + select { + case <-done: + return + default: + cache.Get("repo-1") + } + } + }() + } + time.Sleep(1 * time.Second) // Time for goroutines to start running. + b.ResetTimer() + + for n := 0; n < b.N; n++ { + if v, ok := cache.Get("repo-1"); !ok || v != "commit-1" { + b.Fatal("expected cache get to succeed") + } + } + }) + } +} diff --git a/enterprise/dev/Procfile b/enterprise/dev/Procfile index ab89f4e06691..990a7985c927 100644 --- a/enterprise/dev/Procfile +++ b/enterprise/dev/Procfile @@ -11,7 +11,7 @@ nginx: nginx -p . -g 'daemon off;' -c $PWD/dev/nginx.conf 2>&1 | grep -v 'could web: ENTERPRISE=1 ./node_modules/.bin/gulp --color watch syntect_server: ./dev/syntect_server zoekt-indexserver: ./dev/zoekt-wrapper zoekt-sourcegraph-indexserver -sourcegraph_url http://localhost:3090 -index $HOME/.sourcegraph/zoekt/index -interval 1m -listen :6072 -zoekt-webserver: ./dev/zoekt-wrapper zoekt-webserver -index $HOME/.sourcegraph/zoekt/index -pprof -rpc +zoekt-webserver: ./dev/zoekt-wrapper zoekt-webserver -index $HOME/.sourcegraph/zoekt/index -pprof -rpc -listen :3070 management-console: PROJECT_ROOT=./cmd/management-console TLS_CERT=$HOME/.sourcegraph/management/cert.pem TLS_KEY=$HOME/.sourcegraph/management/key.pem management-console management-console-web: cd ./cmd/management-console/web && ./serve.sh keycloak: ./enterprise/dev/auth-provider/keycloak.sh