Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

resolve major search perf regression in v3.2.0 (add redis cache to git commit OID resolution) #3685

Merged
merged 10 commits into from
May 1, 2019
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ All notable changes to Sourcegraph are documented in this file.
## Changed

- All 24 language extensions are enabled by default.
- Fixed a major indexed search performance regression that occurred in v3.2.0. (#3685)

## 3.3.5

Expand Down
84 changes: 58 additions & 26 deletions cmd/frontend/graphqlbackend/git_commit.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"strings"
"sync"
"time"

"github.com/sourcegraph/sourcegraph/cmd/frontend/backend"
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/externallink"
Expand Down Expand Up @@ -35,9 +36,9 @@ type gitCommitResolver struct {
// to avoid redirecting a user browsing a revision "mybranch" to the absolute commit ID as they follow links in the UI.
inputRev *string

oid gitObjectID
once sync.Once
onceErr error
oid gitObjectID
oidErr error
once sync.Once

author signatureResolver
committer *signatureResolver
Expand Down Expand Up @@ -84,40 +85,71 @@ func (r *gitCommitResolver) ID() graphql.ID {
func (r *gitCommitResolver) Repository() *repositoryResolver { return r.repo }

func (r *gitCommitResolver) OID() (gitObjectID, error) {
return r.getCommitOID()
}

func (r *gitCommitResolver) getCommitOID() (gitObjectID, error) {
r.once.Do(func() {
// If we already have the commit, no need to try to compute it.
if r.oid != "" {
return // We already have an oid because the creator of this *gitCommitResolver specified it.
}

// Try fetching it from the Redis cache to avoid doing lots of work
// previously done (as this method is called very often, e.g. multiple
// times per search result).
start := time.Now()
result, ok := oidResolutionCache.Get(string(r.repo.repo.ID))
oidResolutionCacheLookupDuration.Observe(time.Since(start).Seconds())
if ok {
oidResolutionCounter.WithLabelValues("hit").Inc()
r.oid = gitObjectID(result)
return
}

// Commit OID is the empty string denoting the default branch. Find out
// what is the latest commit indexed by zoekt.
// The cache doesn't have it, so compute it and update the cache if we
// resolved it successfully.
start = time.Now()
r.resolveCommitIODUncached()
oidResolutionDuration.Observe(time.Since(start).Seconds())
if r.oidErr == nil {
oidResolutionCounter.WithLabelValues("miss").Inc()
oidResolutionCache.Set(string(r.repo.repo.ID), string(r.oid))
} else {
oidResolutionCounter.WithLabelValues("miss_error").Inc()
}
})
return r.oid, r.oidErr
}

indexInfo := r.repo.TextSearchIndex()
func (r *gitCommitResolver) resolveCommitIODUncached() {
if r.oid != "" || r.oidErr != nil {
// Possible scenarios for this case:
//
// - We already have an r.oid because the creator of this *gitCommitResolver specified it.
// - We already have an r.oid because we were called before.
// - We don't have an r.oid but have an r.oidErr from being called before.
//
// In any case, there is no point in doing the work again, so we return
// now.
return
}

ctx := context.Background()
// Commit OID is the empty string denoting the default branch. Find out
// what is the latest commit indexed by zoekt.

var refs []*repositoryTextSearchIndexedRef
refs, r.onceErr = indexInfo.Refs(ctx)
if r.onceErr != nil {
return
}
indexInfo := r.repo.TextSearchIndex()

for _, ref := range refs {
current, _ := ref.Current(ctx)
if current {
r.oid = ref.indexedCommit
ctx := context.Background()

break
}
}
})
var refs []*repositoryTextSearchIndexedRef
refs, r.oidErr = indexInfo.Refs(ctx)
if r.oidErr != nil {
return
}

return r.oid, r.onceErr
for _, ref := range refs {
current, _ := ref.Current(ctx)
if current {
r.oid = ref.indexedCommit
break
}
}
}

func (r *gitCommitResolver) AbbreviatedOID() (string, error) {
Expand Down
123 changes: 123 additions & 0 deletions cmd/frontend/graphqlbackend/git_commit_resolution_cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package graphqlbackend

import (
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
)

type resolutionCacheEntry struct {
t time.Time
value string
}

type resolutionCache struct {
// ttl indicates how long before cache entries expire. There is no limit on
// the size of the cache except the effective # of repositories on the
// Sourcegraph instance.
ttl time.Duration

// cacheEntries, if non-nil, is used to record the number of entries in the cache.
cacheEntries prometheus.Histogram

// workerInterval, if non-zero, specifies the interval at which the worker
// checks for entries to evict. Defaults ttl / 2.
workerInterval time.Duration

// mockSleep, if non-nil can be used to mock time.Sleep for testing purposes.
mockSleep func(d time.Duration)

m sync.Map
}

func (r *resolutionCache) Set(k, v string) {
r.m.Store(k, resolutionCacheEntry{
t: time.Now(),
value: v,
})
}

func (r *resolutionCache) Get(k string) (string, bool) {
v, ok := r.m.Load(k)
if !ok {
return "", false
}
e := v.(resolutionCacheEntry)
if time.Since(e.t) >= r.ttl {
// entry has expired
r.m.Delete(k)
return "", false
}
return e.value, true
}

func (r *resolutionCache) startWorker() *resolutionCache {
if r.workerInterval == 0 {
r.workerInterval = r.ttl / 2
}
sleep := time.Sleep
if r.mockSleep != nil {
sleep = r.mockSleep
}
go func() {
for {
sleep(r.workerInterval)
size := 0
r.m.Range(func(key, value interface{}) bool {
size++
e := value.(resolutionCacheEntry)
if time.Since(e.t) >= r.ttl {
// entry has expired
r.m.Delete(key)
}
return true
})
r.cacheEntries.Observe(float64(size))
}
}()
return r
}

var (
// oidResolutionCache is used to cache Git commit OID resolution. This is
// used because OID resolution happens extremely often (e.g. multiple times
// per search result).
oidResolutionCache = (&resolutionCache{
ttl: 60 * time.Second,
cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_cache_entries",
Help: "Total number of entries in the in-memory Git commit OID resolution cache.",
}),
}).startWorker()

oidResolutionCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_cache_hit",
Help: "Counts cache hits and misses for Git commit OID resolution.",
}, []string{"type"})

oidResolutionDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_duration_seconds",
Help: "Total time spent performing uncached Git commit OID resolution.",
})

oidResolutionCacheLookupDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_cache_lookup_duration_seconds",
Help: "Total time spent performing cache lookups for Git commit OID resolution.",
})
)

func init() {
prometheus.MustRegister(oidResolutionCache.cacheEntries)
prometheus.MustRegister(oidResolutionCounter)
prometheus.MustRegister(oidResolutionDuration)
prometheus.MustRegister(oidResolutionCacheLookupDuration)
}
96 changes: 96 additions & 0 deletions cmd/frontend/graphqlbackend/git_commit_resolution_cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package graphqlbackend

import (
"fmt"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
)

func TestGitCommitResolutionCache(t *testing.T) {
doneSleep := make(chan struct{})
cache := (&resolutionCache{
ttl: 15 * time.Millisecond,
cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_cache_entries",
Help: "Total number of entries in the in-memory Git commit OID resolution cache.",
}),
mockSleep: func(d time.Duration) {
<-doneSleep
},
}).startWorker()

cache.Set("repo-1", "commit-1")
cache.Set("repo-2", "commit-2")
if v, ok := cache.Get("repo-1"); !ok || v != "commit-1" {
t.Fatal("expected cache get to succeed")
}
if v, ok := cache.Get("repo-2"); !ok || v != "commit-2" {
t.Fatal("expected cache get to succeed")
}
doneSleep <- struct{}{} // cause worker to begin a single iteration
time.Sleep(30 * time.Millisecond) // wait long enough for TTL
if _, ok := cache.Get("repo-1"); ok {
t.Fatal("expected cache entry to have expired")
}
if _, ok := cache.Get("repo-2"); ok {
t.Fatal("expected cache entry to have expired")
}
}

// Merely shows that the cache can support a very high concurrent read rate
// with a low write rate. Run it like:
//
// $ go test -bench BenchmarkGitCommitResolutionCache -benchtime=30s ./cmd/frontend/graphqlbackend/
// BenchmarkGitCommitResolutionCache/8-8 200000000 202 ns/op
// BenchmarkGitCommitResolutionCache/16-8 100000000 410 ns/op
// BenchmarkGitCommitResolutionCache/32-8 50000000 879 ns/op
// BenchmarkGitCommitResolutionCache/64-8 30000000 1709 ns/op
// BenchmarkGitCommitResolutionCache/128-8 20000000 3345 ns/op
// BenchmarkGitCommitResolutionCache/256-8 20000000 6177 ns/op
//
// The last one shows that while 256 concurrent cache reads are occurring we
// can perform 8 cache reads in 6177ns.
func BenchmarkGitCommitResolutionCache(b *testing.B) {
cache := (&resolutionCache{
ttl: 10 * time.Minute,
cacheEntries: prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "src",
Subsystem: "graphql",
Name: "git_commit_oid_resolution_cache_entries",
Help: "Total number of entries in the in-memory Git commit OID resolution cache.",
}),
}).startWorker()

cache.Set("repo-1", "commit-1")

for _, concurrentReads := range []int{8, 16, 32, 64, 128, 256} {
b.Run(fmt.Sprint(concurrentReads), func(b *testing.B) {
done := make(chan bool)
defer close(done)
for i := 0; i < concurrentReads; i++ {
go func() {
for {
select {
case <-done:
return
default:
cache.Get("repo-1")
}
}
}()
}
time.Sleep(1 * time.Second) // Time for goroutines to start running.
b.ResetTimer()

for n := 0; n < b.N; n++ {
if v, ok := cache.Get("repo-1"); !ok || v != "commit-1" {
b.Fatal("expected cache get to succeed")
}
}
})
}
}
2 changes: 1 addition & 1 deletion enterprise/dev/Procfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ nginx: nginx -p . -g 'daemon off;' -c $PWD/dev/nginx.conf 2>&1 | grep -v 'could
web: ENTERPRISE=1 ./node_modules/.bin/gulp --color watch
syntect_server: ./dev/syntect_server
zoekt-indexserver: ./dev/zoekt-wrapper zoekt-sourcegraph-indexserver -sourcegraph_url http://localhost:3090 -index $HOME/.sourcegraph/zoekt/index -interval 1m -listen :6072
zoekt-webserver: ./dev/zoekt-wrapper zoekt-webserver -index $HOME/.sourcegraph/zoekt/index -pprof -rpc
zoekt-webserver: ./dev/zoekt-wrapper zoekt-webserver -index $HOME/.sourcegraph/zoekt/index -pprof -rpc -listen :3070
slimsag marked this conversation as resolved.
Show resolved Hide resolved
management-console: PROJECT_ROOT=./cmd/management-console TLS_CERT=$HOME/.sourcegraph/management/cert.pem TLS_KEY=$HOME/.sourcegraph/management/key.pem management-console
management-console-web: cd ./cmd/management-console/web && ./serve.sh
keycloak: ./enterprise/dev/auth-provider/keycloak.sh
Expand Down