From ac39ba8218d531cae453b1ae2e2ab5bd066cc343 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Thu, 14 May 2015 13:39:58 -0700 Subject: [PATCH] dashboard, cmd/coordinator: sharded ARM trybot-only builders This is a quick & hacky version of our eventual sharding plan. Later we want to run make.bash once, snapshot it, and then intelligently schedule all the tests out over N machines (starting with the built snapshot), giving each the next test to run as they become idle. And then we want to stitch together all of their output. But this CL just makes 6 ARM builders, each of which runs make.bash (4 minutes) and then each which runs a statically-configured subset of the tests. Based on measurements, each should run between 8.2 and 10.6 minutes. We can break up some of the larger test cases later into more shardable pieces (in particular: "test"). Update golang/go#10029 Change-Id: Ib3e008b61ce6f28d799ae298c8467bf62d89e5d4 Reviewed-on: https://go-review.googlesource.com/10056 Reviewed-by: Andrew Gerrand --- cmd/coordinator/coordinator.go | 18 +++++++- cmd/coordinator/dash.go | 4 ++ dashboard/builders.go | 83 ++++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 6 deletions(-) diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go index 52f4f56d30..bcc6361fe4 100644 --- a/cmd/coordinator/coordinator.go +++ b/cmd/coordinator/coordinator.go @@ -92,6 +92,12 @@ func init() { "plan9-386-gcepartial", "nacl-386", "nacl-amd64p32", + "linux-arm-shard_test", + "linux-arm-shard_std_am", + "linux-arm-shard_std_nz", + "linux-arm-shard_runtimecpu", + "linux-arm-shard_cgotest", + "linux-arm-shard_misc", } for _, bname := range tryList { conf, ok := dashboard.Builders[bname] @@ -935,12 +941,20 @@ func (st *buildStatus) start() { } func (st *buildStatus) build() (retErr error) { - pool, err := poolForConf(st.conf) + buildletType := st.conf.BuildletType + if buildletType == "" { + buildletType = st.conf.Name + } + bconf, ok := dashboard.Builders[buildletType] + if !ok { + return fmt.Errorf("invalid BuildletType %q for %q", buildletType, st.conf.Name) + } + pool, err := poolForConf(bconf) if err != nil { return err } st.logEventTime("get_buildlet") - bc, err := pool.GetBuildlet(st.conf.Name, st.rev, st) + bc, err := pool.GetBuildlet(buildletType, st.rev, st) if err != nil { return fmt.Errorf("failed to get a buildlet: %v", err) } diff --git a/cmd/coordinator/dash.go b/cmd/coordinator/dash.go index 70e01ebb16..50a759cddf 100644 --- a/cmd/coordinator/dash.go +++ b/cmd/coordinator/dash.go @@ -120,6 +120,10 @@ func recordResult(builderName string, ok bool, hash, buildLog string, runTime ti // pingDashboard runs in its own goroutine, created periodically to // POST to build.golang.org/building to let it know that we're still working on a build. func (st *buildStatus) pingDashboard() { + if st.conf.TryOnly { + // Builders that are trybot-only don't appear on the dashboard. + return + } if *mode == "dev" { log.Print("In dev mode, not pinging dashboard") return diff --git a/dashboard/builders.go b/dashboard/builders.go index 61df9fe402..e28793d28e 100644 --- a/dashboard/builders.go +++ b/dashboard/builders.go @@ -30,6 +30,19 @@ type BuildConfig struct { RegularDisk bool // if true, use spinning disk instead of SSD TryOnly bool // only used for trybots, and not regular builds + // BuildletType optionally specifies the type of buildlet to + // request from the buildlet pool. If empty, it defaults to + // the value of Name. + // + // Note: we should probably start using this mechanism for + // more builder types, which combined with buildlet reuse + // could reduce latency. (e.g. "linux-386-387", "linux-amd64", + // and "linux-amd64-race" all sharing same buildlet and + // machine type, and able to jump onto each others + // buidlets... they vary only in env/args). For now we're + // only using this for ARM trybots. + BuildletType string + env []string // extra environment ("key=value") pairs } @@ -251,6 +264,68 @@ func init() { IsReverse: true, env: []string{"GOROOT_BOOTSTRAP=/usr/local/go"}, }) + // Sharded ARM trybots: + addBuilder(BuildConfig{ + Name: "linux-arm-shard_test", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=^test$", + }, + }) + addBuilder(BuildConfig{ + Name: "linux-arm-shard_std_am", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=^go_test:[a-m]$", + }, + }) + addBuilder(BuildConfig{ + Name: "linux-arm-shard_std_nz", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=^go_test:[n-z]$", + }, + }) + addBuilder(BuildConfig{ + Name: "linux-arm-shard_runtimecpu", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=^runtime:cpu124$", + }, + }) + addBuilder(BuildConfig{ + Name: "linux-arm-shard_cgotest", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=^cgo_test$", + }, + }) + addBuilder(BuildConfig{ + Name: "linux-arm-shard_misc", + BuildletType: "linux-arm", + TryOnly: true, + IsReverse: true, + env: []string{ + "GOROOT_BOOTSTRAP=/usr/local/go", + "GOTESTONLY=!^(go_test:|test$|cgo_test$|runtime:cpu124$|)", + }, + }) + addBuilder(BuildConfig{ Name: "linux-arm-arm5", IsReverse: true, @@ -291,10 +366,10 @@ func init() { Notes: "Plan 9 from 0intro; GCE VM is built from script in build/env/plan9-386; runs with GOTESTONLY=std (only stdlib tests)", VMImage: "plan9-386-v2", Go14URL: "https://storage.googleapis.com/go-builder-data/go1.4-plan9-386.tar.gz", - // It's named "partial" because the buildlet sets - // GOTESTONLY=std to stop after the "go test std" - // tests because it's so slow otherwise. - env: []string{"GOTESTONLY=std"}, + // It's named "partial" because the buildlet only runs + // the standard library tests ("go test std cmd", basically). + // TODO: run a full Plan 9 builder, or a sharded one. + env: []string{"GOTESTONLY=^go_test:"}, // We *were* using n1-standard-1 because Plan 9 can only // reliably use a single CPU. Using 2 or 4 and we see