Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
71740: server: create gauge metric for license expiry tracking r=rimadeodhar,abarganier a=dhartunian

Previously, the license expiry metric was manually appended to the
Prometheus output resulting in some unexpected behavior. First, the
expected Prometheus metadata that accompanies other metrics did not
accompany this metric. Second, the metric was unavailable in the DB
Console for viewing in custom graphs.

This change updates the license expiry metric to use the standard
`Metric` types and API. The metric is updated using a callback attached
to the cluster setting.

Resolves: cockroachdb#71525

Release note (ops change): The license expiry metric is now available in
the DB Console and includes the expected `HELP` and `TYPE` annotations
in the promtheus output on `_status/vars`.

Co-authored-by: David Hartunian <[email protected]>
  • Loading branch information
craig[bot] and dhartunian committed Dec 15, 2021
2 parents 9dd7555 + e24de91 commit 0f69631
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 106 deletions.
2 changes: 2 additions & 0 deletions pkg/base/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ go_library(
"//pkg/util/errorutil",
"//pkg/util/humanizeutil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/mon",
"//pkg/util/netutil/addr",
"//pkg/util/retry",
"//pkg/util/stop",
"//pkg/util/timeutil",
"//pkg/util/tracing",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
Expand Down
38 changes: 28 additions & 10 deletions pkg/base/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ package base

import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)
Expand All @@ -30,15 +32,31 @@ var CheckEnterpriseEnabled = func(_ *cluster.Settings, _ uuid.UUID, org, feature
return errEnterpriseNotEnabled // nb: this is squarely in the hot path on OSS builds
}

// TimeToEnterpriseLicenseExpiry returns a duration object that measures the time until
// the currently set enterprise license expires starting from the 3rd argument
// passed in.
//
// This function is overridden by an init hook in CCL builds
var TimeToEnterpriseLicenseExpiry = func(
ctx context.Context, _ *cluster.Settings, _ time.Time,
) (time.Duration, error) {
return 0, nil
var licenseTTLMetadata = metric.Metadata{
// This metric name isn't namespaced for backwards
// compatibility. The prior version of this metric was manually
// inserted into the prometheus output
Name: "seconds_until_enterprise_license_expiry",
Help: "Seconds until enterprise license expiry (0 if no license present or running without enterprise features)",
Measurement: "Seconds",
Unit: metric.Unit_SECONDS,
}

// LicenseTTL is a metric gauge that measures the number of seconds
// until the current enterprise license (if any) expires.
var LicenseTTL = metric.NewGauge(licenseTTLMetadata)

// UpdateMetricOnLicenseChange is a function that's called on startup
// in order to connect the enterprise license setting update to the
// prometheus metric provided as an argument.
var UpdateMetricOnLicenseChange = func(
ctx context.Context,
st *cluster.Settings,
metric *metric.Gauge,
ts timeutil.TimeSource,
stopper *stop.Stopper,
) error {
return nil
}

// LicenseType returns what type of license the cluster is running with, or
Expand Down
5 changes: 5 additions & 0 deletions pkg/ccl/utilccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ go_library(
"//pkg/sql/types",
"//pkg/util/envutil",
"//pkg/util/grpcutil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/stop",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_circuitbreaker//:circuitbreaker",
Expand All @@ -40,10 +43,12 @@ go_test(
],
embed = [":utilccl"],
deps = [
"//pkg/base",
"//pkg/ccl/utilccl/licenseccl",
"//pkg/settings/cluster",
"//pkg/testutils",
"//pkg/util/envutil",
"//pkg/util/stop",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_stretchr_testify//require",
Expand Down
57 changes: 45 additions & 12 deletions pkg/ccl/utilccl/license_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -122,23 +125,53 @@ func IsEnterpriseEnabled(st *cluster.Settings, cluster uuid.UUID, org, feature s
func init() {
base.CheckEnterpriseEnabled = CheckEnterpriseEnabled
base.LicenseType = getLicenseType
base.TimeToEnterpriseLicenseExpiry = TimeToEnterpriseLicenseExpiry
base.UpdateMetricOnLicenseChange = UpdateMetricOnLicenseChange
server.ApplyTenantLicense = ApplyTenantLicense
}

// TimeToEnterpriseLicenseExpiry returns a Duration from `asOf` until the current
// enterprise license expires. If a license does not exist, we return a
// zero duration.
func TimeToEnterpriseLicenseExpiry(
ctx context.Context, st *cluster.Settings, asOf time.Time,
) (time.Duration, error) {
var licenseMetricUpdateFrequency = 1 * time.Minute

// UpdateMetricOnLicenseChange starts a task to periodically update
// the given metric with the seconds remaining until license expiry.
func UpdateMetricOnLicenseChange(
ctx context.Context,
st *cluster.Settings,
metric *metric.Gauge,
ts timeutil.TimeSource,
stopper *stop.Stopper,
) error {
enterpriseLicense.SetOnChange(&st.SV, func(ctx context.Context) {
updateMetricWithLicenseTTL(ctx, st, metric, ts)
})
return stopper.RunAsyncTask(ctx, "write-license-expiry-metric", func(ctx context.Context) {
ticker := time.NewTicker(licenseMetricUpdateFrequency)
defer ticker.Stop()
for {
select {
case <-ticker.C:
updateMetricWithLicenseTTL(ctx, st, metric, ts)
case <-stopper.ShouldQuiesce():
return
}
}
})
}

func updateMetricWithLicenseTTL(
ctx context.Context, st *cluster.Settings, metric *metric.Gauge, ts timeutil.TimeSource,
) {
license, err := getLicense(st)
if err != nil || license == nil {
return 0, err
if err != nil {
log.Errorf(ctx, "unable to update license expiry metric: %v", err)
metric.Update(0)
return
}

expiration := timeutil.Unix(license.ValidUntilUnixSec, 0)
return expiration.Sub(asOf), nil
if license == nil {
metric.Update(0)
return
}
sec := timeutil.Unix(license.ValidUntilUnixSec, 0).Sub(ts.Now()).Seconds()
metric.Update(int64(sec))
}

func checkEnterpriseEnabledAt(
Expand Down
29 changes: 16 additions & 13 deletions pkg/ccl/utilccl/license_check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import (
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -163,29 +165,30 @@ func TestTimeToEnterpriseLicenseExpiry(t *testing.T) {

st := cluster.MakeTestingClusterSettings()
updater := st.MakeUpdater()
stopper := stop.NewStopper()
defer stopper.Stop(ctx)
manualTime := timeutil.NewManualTime(t0)

err := UpdateMetricOnLicenseChange(context.Background(), st, base.LicenseTTL, manualTime, stopper)
require.NoError(t, err)

for _, tc := range []struct {
desc string
lic string
ttlHours float64
desc string
lic string
ttlSeconds int64
}{
{"One Month", lic1M, 24 * 31},
{"Two Month", lic2M, 24*31 + 24*30},
{"One Month", lic1M, 24 * 31 * 3600},
{"Two Month", lic2M, (24*31 + 24*30) * 3600},
{"Zero Month", lic0M, 0},
{"Expired", licExpired, -24 * 30},
{"Expired", licExpired, (-24 * 30) * 3600},
{"No License", "", 0},
} {
t.Run(tc.desc, func(t *testing.T) {
if err := updater.Set(ctx, "enterprise.license", tc.lic, "s"); err != nil {
t.Fatal(err)
}

actual, err := TimeToEnterpriseLicenseExpiry(context.Background(), st, t0)
if err != nil {
t.Fatal(err)
}

require.Equal(t, tc.ttlHours, actual.Hours())
actual := base.LicenseTTL.Value()
require.Equal(t, tc.ttlSeconds, actual)
})
}
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,12 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
runtimeSampler := status.NewRuntimeStatSampler(ctx, clock)
registry.AddMetricStruct(runtimeSampler)

registry.AddMetric(base.LicenseTTL)
err = base.UpdateMetricOnLicenseChange(ctx, cfg.Settings, base.LicenseTTL, timeutil.DefaultTimeSource{}, stopper)
if err != nil {
log.Errorf(ctx, "unable to initialize periodic license metric update: %v", err)
}

// Create and add KV metric rules
kvserver.CreateAndAddRules(ctx, ruleRegistry)

Expand Down Expand Up @@ -2701,6 +2707,7 @@ func startSampleEnvironment(ctx context.Context, cfg sampleEnvironmentCfg) error
curStats := goMemStats.Load().(*status.GoMemStats)
cgoStats := status.GetCGoMemStats(ctx)
cfg.runtime.SampleEnvironment(ctx, curStats, cgoStats)

if goroutineDumper != nil {
goroutineDumper.MaybeDump(ctx, cfg.st, cfg.runtime.Goroutines.Value())
}
Expand Down
22 changes: 0 additions & 22 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -1802,31 +1802,9 @@ func (h varsHandler) handleVars(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusInternalServerError)
}

h.appendLicenseExpiryMetric(ctx, w)
telemetry.Inc(telemetryPrometheusVars)
}

// appendLicenseExpiryMetric computes the seconds until the enterprise licence
// expires on this clusters. the license expiry metric is computed on-demand
// since it's not regularly computed as part of running the cluster unless
// enterprise features are accessed.
func (h varsHandler) appendLicenseExpiryMetric(ctx context.Context, w io.Writer) {
durationToExpiry, err := base.TimeToEnterpriseLicenseExpiry(ctx, h.st, timeutil.Now())
if err != nil {
log.Errorf(ctx, "unable to generate time to license expiry: %v", err)
return
}

secondsToExpiry := int64(durationToExpiry / time.Second)

_, err = w.Write([]byte(
fmt.Sprintf("seconds_until_enterprise_license_expiry %d\n", secondsToExpiry),
))
if err != nil {
log.Errorf(ctx, "problem writing license expiry metric: %v", err)
}
}

func (s *statusServer) handleVars(w http.ResponseWriter, r *http.Request) {
varsHandler{s.metricSource, s.st}.handleVars(w, r)
}
Expand Down
49 changes: 0 additions & 49 deletions pkg/server/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/server/diagnostics/diagnosticspb"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/server/status/statuspb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
Expand Down Expand Up @@ -2620,54 +2619,6 @@ func TestRegionsResponseFromNodesResponse(t *testing.T) {
}
}

func TestLicenseExpiryMetricNoLicense(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ts := startServer(t)
defer ts.Stopper().Stop(context.Background())

for _, tc := range []struct {
name string
expected string
expiryFunc func(context.Context, *cluster.Settings, time.Time) (time.Duration, error)
}{
{"No License", "seconds_until_enterprise_license_expiry 0\n", nil},
{"Valid 1 second License", "seconds_until_enterprise_license_expiry 1\n", func(
_ context.Context, _ *cluster.Settings, _ time.Time,
) (time.Duration, error) {
return time.Second, nil
}},
{"Valid Long License", "seconds_until_enterprise_license_expiry 1603926294\n", func(
_ context.Context, _ *cluster.Settings, _ time.Time,
) (time.Duration, error) {
return timeutil.Unix(1603926294, 0).Sub(timeutil.Unix(0, 0)), nil
}},
{"Valid Long Past License", "seconds_until_enterprise_license_expiry -1603926294\n", func(
_ context.Context, _ *cluster.Settings, _ time.Time,
) (time.Duration, error) {
return timeutil.Unix(0, 0).Sub(timeutil.Unix(1603926294, 0)), nil
}},
{"Error License", "", func(
_ context.Context, _ *cluster.Settings, _ time.Time,
) (time.Duration, error) {
return 0, errors.New("bad license")
}},
} {
t.Run(tc.name, func(t *testing.T) {
vh := varsHandler{ts.status.metricSource, ts.status.st}
if tc.expiryFunc != nil {
base.TimeToEnterpriseLicenseExpiry = tc.expiryFunc
}

buf := new(bytes.Buffer)
vh.appendLicenseExpiryMetric(context.Background(), buf)

require.Equal(t, tc.expected, buf.String())
})
}
}

func TestStatusServer_nodeStatusToResp(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
6 changes: 6 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ var charts = []sectionDescription{
Percentiles: false,
Metrics: []string{"node-id"},
},
{
Title: "License TTL",
Downsampler: DescribeAggregator_MIN,
Percentiles: false,
Metrics: []string{"seconds_until_enterprise_license_expiry"},
},
},
},
{
Expand Down

0 comments on commit 0f69631

Please sign in to comment.