Skip to content

Commit

Permalink
maintner: flesh out godata package, add caching network mutation source
Browse files Browse the repository at this point in the history
Also, the maintner.Corpus constructor is now gone. An API is added for
maintnerd to become the leader and specify the cache dir.

Updates golang/go#19866

Change-Id: Ia726aa00ca1337b6c130cfee040ff9a1f935d0c2
Reviewed-on: https://go-review.googlesource.com/42148
Reviewed-by: Kevin Burke <[email protected]>
  • Loading branch information
bradfitz committed Apr 29, 2017
1 parent a5b059a commit 04f8c52
Show file tree
Hide file tree
Showing 8 changed files with 310 additions and 50 deletions.
34 changes: 17 additions & 17 deletions maintner/gerrit.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
// Gerrit holds information about a number of Gerrit projects.
type Gerrit struct {
c *Corpus
dataDir string // the root Corpus data directory
projects map[string]*GerritProject // keyed by "go.googlesource.com/build"

clsReferencingGithubIssue map[GitHubIssueRef][]*GerritCL
Expand All @@ -46,7 +45,6 @@ func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject {
proj = &GerritProject{
gerrit: g,
proj: gerritProj,
gitDir: filepath.Join(g.dataDir, url.PathEscape(gerritProj)),
cls: map[int32]*GerritCL{},
remote: map[gerritCLVersion]GitHash{},
}
Expand All @@ -69,15 +67,15 @@ func (g *Gerrit) ForeachProjectUnsorted(fn func(*GerritProject) error) error {
type GerritProject struct {
gerrit *Gerrit
proj string // "go.googlesource.com/net"
// TODO: Many different Git remotes can share the same Gerrit instance, e.g.
// the Go Gerrit instance supports build, gddo, go. For the moment these are
// all treated separately, since the remotes are separate.
gitDir string
cls map[int32]*GerritCL
remote map[gerritCLVersion]GitHash
need map[GitHash]bool
}

func (gp *GerritProject) gitDir() string {
return filepath.Join(gp.gerrit.c.getDataDir(), url.PathEscape(gp.proj))
}

func (gp *GerritProject) ServerSlashProject() string { return gp.proj }

// Server returns the Gerrit server, such as "go.googlesource.com".
Expand Down Expand Up @@ -215,7 +213,6 @@ func (c *Corpus) initGerrit() {
}
c.gerrit = &Gerrit{
c: c,
dataDir: c.dataDir,
projects: map[string]*GerritProject{},
clsReferencingGithubIssue: map[GitHubIssueRef][]*GerritCL{},
}
Expand All @@ -231,6 +228,7 @@ type watchedGerritRepo struct {
func (c *Corpus) AddGerrit(gerritProj string) {
c.mu.Lock()
defer c.mu.Unlock()

if strings.Count(gerritProj, "/") != 1 {
panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj))
}
Expand Down Expand Up @@ -463,18 +461,19 @@ func (gp *GerritProject) sync(ctx context.Context, loop bool) error {

func (gp *GerritProject) syncOnce(ctx context.Context) error {
c := gp.gerrit.c
gitDir := gp.gitDir()

fetchCtx, cancel := context.WithTimeout(ctx, time.Minute)
cmd := exec.CommandContext(fetchCtx, "git", "fetch", "origin")
cmd.Dir = gp.gitDir
cmd.Dir = gitDir
out, err := cmd.CombinedOutput()
cancel()
if err != nil {
return fmt.Errorf("git fetch origin: %v, %s", err, out)
}

cmd = exec.CommandContext(ctx, "git", "ls-remote")
cmd.Dir = gp.gitDir
cmd.Dir = gitDir
out, err = cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git ls-remote: %v, %s", err, out)
Expand Down Expand Up @@ -567,7 +566,7 @@ func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) {
lastLog = now
gp.logf("parsing commits (%v done)", n)
}
commit, err := parseCommitFromGit(gp.gitDir, hash)
commit, err := parseCommitFromGit(gp.gitDir(), hash)
if err != nil {
return n, err
}
Expand Down Expand Up @@ -603,7 +602,7 @@ func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []GitHash) erro
}
gp.logf("fetching %v hashes...", len(hashes))
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = gp.gitDir
cmd.Dir = gp.gitDir()
if out, err := cmd.CombinedOutput(); err != nil {
log.Printf("error fetching %d hashes from gerrit project %s: %s", len(hashes), gp.proj, out)
return err
Expand All @@ -620,7 +619,8 @@ func formatExecError(err error) string {
}

func (gp *GerritProject) init(ctx context.Context) error {
if err := os.MkdirAll(gp.gitDir, 0755); err != nil {
gitDir := gp.gitDir()
if err := os.MkdirAll(gitDir, 0755); err != nil {
return err
}
// try to short circuit a git init error, since the init error matching is
Expand All @@ -629,12 +629,12 @@ func (gp *GerritProject) init(ctx context.Context) error {
return fmt.Errorf("looking for git binary: %v", err)
}

if _, err := os.Stat(filepath.Join(gp.gitDir, ".git", "config")); err == nil {
if _, err := os.Stat(filepath.Join(gitDir, ".git", "config")); err == nil {
cmd := exec.CommandContext(ctx, "git", "remote", "-v")
cmd.Dir = gp.gitDir
cmd.Dir = gitDir
remoteBytes, err := cmd.Output()
if err != nil {
return fmt.Errorf("running git remote -v in %v: %v", gp.gitDir, formatExecError(err))
return fmt.Errorf("running git remote -v in %v: %v", gitDir, formatExecError(err))
}
if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) {
return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes))
Expand All @@ -647,7 +647,7 @@ func (gp *GerritProject) init(ctx context.Context) error {
buf := new(bytes.Buffer)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
cmd.Dir = gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git init": %s`, buf.String())
return err
Expand All @@ -656,7 +656,7 @@ func (gp *GerritProject) init(ctx context.Context) error {
cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj)
cmd.Stdout = buf
cmd.Stderr = buf
cmd.Dir = gp.gitDir
cmd.Dir = gitDir
if err := cmd.Run(); err != nil {
log.Printf(`Error running "git remote add origin": %s`, buf.String())
return err
Expand Down
2 changes: 1 addition & 1 deletion maintner/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ func (c *Corpus) processGitCommit(commit *maintpb.GitCommit) (*GitCommit, error)
if c.gitCommitTodo != nil {
delete(c.gitCommitTodo, hash)
}
if c.Verbose {
if c.verbose {
now := time.Now()
if now.After(c.lastGitCount.Add(time.Second)) {
c.lastGitCount = now
Expand Down
2 changes: 1 addition & 1 deletion maintner/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ type githubIssueDiffer struct {
}

func (d githubIssueDiffer) verbose() bool {
return d.gr.github != nil && d.gr.github.c != nil && d.gr.github.c.Verbose
return d.gr.github != nil && d.gr.github.c != nil && d.gr.github.c.verbose
}

// returns nil if no changes.
Expand Down
48 changes: 40 additions & 8 deletions maintner/godata/godata.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,55 @@ package godata

import (
"context"
"log"
"os"
"os/user"
"path/filepath"
"runtime"

"golang.org/x/build/maintner"
)

// Get returns the Go project's corpus.
func Get(ctx context.Context) (*maintner.Corpus, error) {
// TODO: this is a dummy implementation for now. It should
// really create a cache dir, and slurp as-needed from the
// network (once we run a server), and then load it. For now
// we assume it's already on disk.
dir := filepath.Join(os.Getenv("HOME"), "var", "maintnerd")
logger := maintner.NewDiskMutationLogger(dir)
corpus := maintner.NewCorpus(logger, dir)
if err := corpus.Initialize(ctx, logger); err != nil {
targetDir := filepath.Join(xdgCacheDir(), "golang-maintner")
if err := os.MkdirAll(targetDir, 0700); err != nil {
return nil, err
}
mutSrc := maintner.NewNetworkMutationSource("https://maintner.golang.org/logs", targetDir)
corpus := new(maintner.Corpus)
if err := corpus.Initialize(ctx, mutSrc); err != nil {
return nil, err
}
return corpus, nil
}

// xdgCacheDir returns the XDG Base Directory Specification cache
// directory.
func xdgCacheDir() string {
cache := os.Getenv("XDG_CACHE_HOME")
if cache != "" {
return cache
}
home := homeDir()
// Not XDG but standard for OS X.
if runtime.GOOS == "darwin" {
return filepath.Join(home, "Library/Caches")
}
return filepath.Join(home, ".cache")
}

func homeDir() string {
if runtime.GOOS == "windows" {
return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
}
home := os.Getenv("HOME")
if home != "" {
return home
}
u, err := user.Current()
if err != nil {
log.Fatalf("failed to get home directory or current user: %v", err)
}
return u.HomeDir
}
29 changes: 20 additions & 9 deletions maintner/maintner.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ import (
// is populated from a MutationSource (disk, database), and the polling phase,
// when the Corpus polls for new events and stores/writes them to disk.
type Corpus struct {
MutationLogger MutationLogger
Verbose bool
mutationLogger MutationLogger // non-nil when this is a self-updating corpus
verbose bool
dataDir string

mu sync.RWMutex // guards all following fields
// corpus state:
Expand All @@ -56,17 +57,27 @@ type Corpus struct {
gitCommitTodo map[GitHash]bool // -> true
gitOfHg map[string]GitHash // hg hex hash -> git hash
zoneCache map[string]*time.Location // "+0530" => location
dataDir string
}

type polledGitCommits struct {
repo *maintpb.GitRepo
dir string
}

// NewCorpus creates a new Corpus.
func NewCorpus(logger MutationLogger, dataDir string) *Corpus {
return &Corpus{MutationLogger: logger, dataDir: dataDir}
// EnableLeaderMode prepares c to be the leader.
// The provided scratchDir will store git checkouts.
func (c *Corpus) EnableLeaderMode(logger MutationLogger, scratchDir string) {
c.mutationLogger = logger
c.dataDir = scratchDir
}

func (c *Corpus) SetVerbose(v bool) { c.verbose = v }

func (c *Corpus) getDataDir() string {
if c.dataDir == "" {
panic("getDataDir called before Corpus.EnableLeaderMode")
}
return c.dataDir
}

// GitHub returns the corpus's github data.
Expand Down Expand Up @@ -214,17 +225,17 @@ func (c *Corpus) Update(ctx context.Context) error {

// addMutation adds a mutation to the log and immediately processes it.
func (c *Corpus) addMutation(m *maintpb.Mutation) {
if c.Verbose {
if c.verbose {
log.Printf("mutation: %v", m)
}
c.mu.Lock()
c.processMutationLocked(m)
c.mu.Unlock()

if c.MutationLogger == nil {
if c.mutationLogger == nil {
return
}
err := c.MutationLogger.Log(m)
err := c.mutationLogger.Log(m)
if err != nil {
// TODO: handle errors better? failing is only safe option.
log.Fatalf("could not log mutation %v: %v\n", m, err)
Expand Down
16 changes: 4 additions & 12 deletions maintner/maintnerd/gcslog.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,6 @@ func (gl *gcsLog) initLoad(ctx context.Context) error {
return nil
}

type LogSegmentJSON struct {
Number int `json:"number"`
Size int64 `json:"size"`
SHA224 string `json:"sha224"`
URL string `json:"url"` // TODO ....
// TODO ....
}

func (gl *gcsLog) serveLogFile(w http.ResponseWriter, r *http.Request) {
if r.Method != "GET" && r.Method != "HEAD" {
http.Error(w, "bad method", http.StatusBadRequest)
Expand Down Expand Up @@ -210,24 +202,24 @@ func (gl *gcsLog) serveJSONLogsIndex(w http.ResponseWriter, r *http.Request) {
w.Write(body)
}

func (gl *gcsLog) getJSONLogs(startSeg int) (segs []LogSegmentJSON) {
func (gl *gcsLog) getJSONLogs(startSeg int) (segs []maintner.LogSegmentJSON) {
gl.mu.Lock()
defer gl.mu.Unlock()
if startSeg > gl.curNum || startSeg < 0 {
startSeg = 0
}
segs = make([]LogSegmentJSON, 0, gl.curNum-startSeg)
segs = make([]maintner.LogSegmentJSON, 0, gl.curNum-startSeg)
for i := startSeg; i < gl.curNum; i++ {
seg := gl.seg[i]
segs = append(segs, LogSegmentJSON{
segs = append(segs, maintner.LogSegmentJSON{
Number: i,
Size: seg.size,
SHA224: seg.sha224,
URL: fmt.Sprintf("https://storage.googleapis.com/%s/%s", gl.bucketName, seg.ObjectName()),
})
}
if gl.logBuf.Len() > 0 {
segs = append(segs, LogSegmentJSON{
segs = append(segs, maintner.LogSegmentJSON{
Number: gl.curNum,
Size: int64(gl.logBuf.Len()),
SHA224: fmt.Sprintf("%x", gl.logSHA224.Sum(nil)),
Expand Down
5 changes: 3 additions & 2 deletions maintner/maintnerd/maintnerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ func main() {
logger = maintner.NewDiskMutationLogger(*dataDir)
}

corpus := maintner.NewCorpus(logger, *dataDir)
corpus := new(maintner.Corpus)
corpus.EnableLeaderMode(logger, *dataDir)
if *debug {
corpus.SetDebug()
}
corpus.Verbose = *verbose
corpus.SetVerbose(*verbose)
switch *config {
case "":
// Nothing
Expand Down
Loading

0 comments on commit 04f8c52

Please sign in to comment.