Skip to content

Commit

Permalink
diagnostics: lock while populating hardware information
Browse files Browse the repository at this point in the history
The shirou/gopsutil/host library that we use to gather hardware information
during diagnostics reporting is not multi-thread safe. As one example, it
lazily initializes a global map the first time the Virtualization function
is called, but takes no lock while doing so. Work around this limitation by
taking our own lock.

This code never triggered race conditions before, but is doing so after recent
changes I made to the diagnostics reporting code. Previously, we were using a
single background goroutine to do both diagnostics reporting and checking for
updates. Now we are doing each of those on different goroutines, which triggers
race detection.

Fixes #61091

Release justification: fixes for high-priority or high-severity bugs in existing
functionality
Release note: None
  • Loading branch information
andy-kimball committed Feb 25, 2021
1 parent 059c22b commit 1aee317
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 42 deletions.
1 change: 1 addition & 0 deletions pkg/server/diagnostics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ go_library(
"//pkg/util/log/logcrash",
"//pkg/util/protoutil",
"//pkg/util/stop",
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
Expand Down
53 changes: 53 additions & 0 deletions pkg/server/diagnostics/diagnostics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,23 @@
package diagnostics

import (
"context"
"math/rand"
"net/url"
"runtime"
"strconv"
"time"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server/diagnostics/diagnosticspb"
"github.com/cockroachdb/cockroach/pkg/util/cloudinfo"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/load"
"github.com/shirou/gopsutil/mem"
)

// updatesURL is the URL used to check for new versions. Can be nil if an empty
Expand Down Expand Up @@ -112,3 +120,48 @@ func addJitter(d time.Duration) time.Duration {
j := time.Duration(rand.Intn(jitterSeconds*2)-jitterSeconds) * time.Second
return d + j
}

var populateMutex syncutil.Mutex

// populateHardwareInfo populates OS, CPU, memory, etc. information about the
// environment in which CRDB is running.
func populateHardwareInfo(ctx context.Context, e *diagnosticspb.Environment) {
// The shirou/gopsutil/host library is not multi-thread safe. As one
// example, it lazily initializes a global map the first time the
// Virtualization function is called, but takes no lock while doing so.
// Work around this limitation by taking our own lock.
populateMutex.Lock()
defer populateMutex.Unlock()

if platform, family, version, err := host.PlatformInformation(); err == nil {
e.Os.Family = family
e.Os.Platform = platform
e.Os.Version = version
}

if virt, role, err := host.Virtualization(); err == nil && role == "guest" {
e.Hardware.Virtualization = virt
}

if m, err := mem.VirtualMemory(); err == nil {
e.Hardware.Mem.Available = m.Available
e.Hardware.Mem.Total = m.Total
}

e.Hardware.Cpu.Numcpu = int32(runtime.NumCPU())
if cpus, err := cpu.InfoWithContext(ctx); err == nil && len(cpus) > 0 {
e.Hardware.Cpu.Sockets = int32(len(cpus))
c := cpus[0]
e.Hardware.Cpu.Cores = c.Cores
e.Hardware.Cpu.Model = c.ModelName
e.Hardware.Cpu.Mhz = float32(c.Mhz)
e.Hardware.Cpu.Features = c.Flags
}

if l, err := load.AvgWithContext(ctx); err == nil {
e.Hardware.Loadavg15 = float32(l.Load15)
}

e.Hardware.Provider, e.Hardware.InstanceClass = cloudinfo.GetInstanceClass(ctx)
e.Topology.Provider, e.Topology.Region = cloudinfo.GetInstanceRegion(ctx)
}
42 changes: 0 additions & 42 deletions pkg/server/diagnostics/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"net/http"
"net/url"
"reflect"
"runtime"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
Expand All @@ -36,7 +35,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/util/cloudinfo"
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
Expand All @@ -46,10 +44,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/mitchellh/reflectwalk"
"github.com/shirou/gopsutil/cpu"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/load"
"github.com/shirou/gopsutil/mem"
"google.golang.org/protobuf/proto"
)

Expand Down Expand Up @@ -338,42 +332,6 @@ func getLicenseType(ctx context.Context, settings *cluster.Settings) string {
return licenseType
}

// populateHardwareInfo populates OS, CPU, memory, etc. information about the
// environment in which CRDB is running.
func populateHardwareInfo(ctx context.Context, e *diagnosticspb.Environment) {
if platform, family, version, err := host.PlatformInformation(); err == nil {
e.Os.Family = family
e.Os.Platform = platform
e.Os.Version = version
}

if virt, role, err := host.Virtualization(); err == nil && role == "guest" {
e.Hardware.Virtualization = virt
}

if m, err := mem.VirtualMemory(); err == nil {
e.Hardware.Mem.Available = m.Available
e.Hardware.Mem.Total = m.Total
}

e.Hardware.Cpu.Numcpu = int32(runtime.NumCPU())
if cpus, err := cpu.InfoWithContext(ctx); err == nil && len(cpus) > 0 {
e.Hardware.Cpu.Sockets = int32(len(cpus))
c := cpus[0]
e.Hardware.Cpu.Cores = c.Cores
e.Hardware.Cpu.Model = c.ModelName
e.Hardware.Cpu.Mhz = float32(c.Mhz)
e.Hardware.Cpu.Features = c.Flags
}

if l, err := load.AvgWithContext(ctx); err == nil {
e.Hardware.Loadavg15 = float32(l.Load15)
}

e.Hardware.Provider, e.Hardware.InstanceClass = cloudinfo.GetInstanceClass(ctx)
e.Topology.Provider, e.Topology.Region = cloudinfo.GetInstanceRegion(ctx)
}

func anonymizeZoneConfig(dst *zonepb.ZoneConfig, src zonepb.ZoneConfig, secret string) {
if src.RangeMinBytes != nil {
dst.RangeMinBytes = proto.Int64(*src.RangeMinBytes)
Expand Down

0 comments on commit 1aee317

Please sign in to comment.