Skip to content

Commit

Permalink
server: add a configuration to enable GC of system.rangelog
Browse files Browse the repository at this point in the history
system.rangelog table currently grows unboundedly. The rate of
growth is slow (as long as there is no replica rebalancing
thrashing), but it can still become a problem in long running
clusters.

This commit adds cluster settings to specify interval and TTL
for rows in system.rangelog.
By default, TTL of system.rangelog is set to 30 days.

Fixes cockroachdb#21260

Release note: Add configuration to enable GC of system.rangelog
  • Loading branch information
Vijay Karthik committed Oct 9, 2018
1 parent 8e130b7 commit 759bb39
Show file tree
Hide file tree
Showing 6 changed files with 386 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@
<tr><td><code>server.clock.persist_upper_bound_interval</code></td><td>duration</td><td><code>0s</code></td><td>the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature.</td></tr>
<tr><td><code>server.consistency_check.interval</code></td><td>duration</td><td><code>24h0m0s</code></td><td>the time between range consistency checks; set to 0 to disable consistency checking</td></tr>
<tr><td><code>server.declined_reservation_timeout</code></td><td>duration</td><td><code>1s</code></td><td>the amount of time to consider the store throttled for up-replication after a reservation was declined</td></tr>
<tr><td><code>server.eventlog.ttl</code></td><td>duration</td><td><code>0s</code></td><td>if non zero, event log entries older than this duration are deleted periodically (10m period).</td></tr>
<tr><td><code>server.failed_reservation_timeout</code></td><td>duration</td><td><code>5s</code></td><td>the amount of time to consider the store throttled for up-replication after a failed reservation call</td></tr>
<tr><td><code>server.heap_profile.max_profiles</code></td><td>integer</td><td><code>5</code></td><td>maximum number of profiles to be kept. Profiles with lower score are GC'ed, but latest profile is always kept</td></tr>
<tr><td><code>server.heap_profile.system_memory_threshold_fraction</code></td><td>float</td><td><code>0.85</code></td><td>fraction of system memory beyond which if Rss increases, then heap profile is triggered</td></tr>
<tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if non zero, range log entries older than this duration are deleted periodically (10m period).</td></tr>
<tr><td><code>server.remote_debugging.mode</code></td><td>string</td><td><code>local</code></td><td>set to enable remote debugging, localhost-only or disable (any, local, off)</td></tr>
<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with the rest of the shutdown process</td></tr>
<tr><td><code>server.shutdown.query_wait</code></td><td>duration</td><td><code>10s</code></td><td>the server will wait for at least this amount of time for active queries to finish</td></tr>
Expand Down
18 changes: 18 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ var (
"feature.",
0,
)

// RangeLogTTL is the TTL for rows in system.rangelog. If non zero, range log
// entries are periodically garbage collected.
RangeLogTTL = settings.RegisterDurationSetting(
"server.rangelog.ttl",
"if non zero, range log entries older than this duration are deleted periodically (10m period).",
30*24*time.Hour, // 30 days
)

// EventLogTTL is the TTL for rows in system.eventlog. If non zero, event log
// entries are periodically garbage collected.
EventLogTTL = settings.RegisterDurationSetting(
"server.eventlog.ttl",
"if non zero, event log entries older than this duration are deleted periodically (10m period).",
0,
)
)

// TODO(peter): Until go1.11, ServeMux.ServeHTTP was not safe to call
Expand Down Expand Up @@ -1679,6 +1695,8 @@ func (s *Server) Start(ctx context.Context) error {
})
}

s.startSystemLogsGC(ctx)

// Record that this node joined the cluster in the event log. Since this
// executes a SQL query, this must be done after the SQL layer is ready.
s.node.recordJoinEvent()
Expand Down
163 changes: 163 additions & 0 deletions pkg/server/server_systemlog_gc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package server

import (
"context"
"fmt"
"time"

"github.com/pkg/errors"

"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

// gcSystemLog deletes entries in the given system log table older than the
// given cutoffTimestamp if the server is the lease holder for range 1.
// Leaseholder constraint is present so that only one node in the cluster
// performs gc.
// The system log table is expected to have a "timestamp" column.
// It returns the the number of rows affected and error (if any).
func (s *Server) gcSystemLog(
ctx context.Context, table string, cutoffTimestamp time.Time,
) (int64, error) {
var totalRowsAffected int64
repl, err := s.node.stores.GetReplicaForRangeID(roachpb.RangeID(1))
if err != nil {
return 0, err
}

if repl.IsFirstRange() && repl.OwnsValidLease(s.clock.Now()) {
deleteStmt := fmt.Sprintf(
`SELECT count(1), max(timestamp) FROM
[DELETE FROM system.%s WHERE timestamp >= $1 AND timestamp <= $2 LIMIT 1000 RETURNING timestamp]`,
table,
)
// A timestamp lower-bound is used in the delete query to avoid hitting
// tombstones.
// This lower bound is periodically updated after every successful delete
// batch.
timestampLowerBound := timeutil.Unix(0, 0)
for {
var rowsAffected int64
err := s.db.Txn(ctx, func(ctx context.Context, txn *client.Txn) error {
var err error
row, err := s.internalExecutor.QueryRow(
ctx,
table+"-gc",
txn,
deleteStmt,
timestampLowerBound,
cutoffTimestamp,
)
if err != nil {
return err
}

if row != nil {
rowCount, ok := row[0].(*tree.DInt)
if !ok {
return errors.Errorf("row count is of unknown type %T", row[0])
}
if rowCount == nil {
return errors.New("error parsing row count")
}
rowsAffected = int64(*rowCount)

if rowsAffected > 0 {
maxTimestamp, ok := row[1].(*tree.DTimestamp)
if !ok {
return errors.Errorf("timestamp is of unknown type %T", row[1])
}
if maxTimestamp == nil {
return errors.New("error parsing timestamp")
}
timestampLowerBound = maxTimestamp.Time
}
}
return nil
})
totalRowsAffected += rowsAffected
if err != nil || rowsAffected == 0 {
return totalRowsAffected, err
}
}
}

return 0, nil
}

// startSystemLogsGC starts a worker which periodically GCs system.rangelog
// and system.eventlog.
// The TTLs for each of these logs is retrieved from cluster settings.
func (s *Server) startSystemLogsGC(ctx context.Context) {
systemLogsToGC := map[string]*settings.DurationSetting{
"rangelog": RangeLogTTL,
"eventlog": EventLogTTL,
}

s.stopper.RunWorker(ctx, func(ctx context.Context) {
period := 10 * time.Minute
if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCPeriod != 0 {
period = storeKnobs.SystemLogsGCPeriod
}

t := time.NewTicker(period)
defer t.Stop()

for {
select {
case <-t.C:
for table, tableTTL := range systemLogsToGC {
ttl := tableTTL.Get(&s.cfg.Settings.SV)
if ttl > 0 {
cutoffTimestamp := timeutil.Unix(0, s.clock.PhysicalNow()-int64(ttl))
rowsAffected, err := s.gcSystemLog(ctx, table, cutoffTimestamp)
if err != nil {
log.Errorf(
ctx,
"error garbage collecting %s %v",
table,
err,
)
}

if rowsAffected > 0 {
log.Infof(ctx, "garbage collected %d rows from %s", rowsAffected, table)
}
}
}

if storeKnobs, ok := s.cfg.TestingKnobs.Store.(*storage.StoreTestingKnobs); ok && storeKnobs.SystemLogsGCGCDone != nil {
select {
case storeKnobs.SystemLogsGCGCDone <- struct{}{}:
case <-s.stopper.ShouldStop():
// Test has finished.
return
default:
}
}
case <-s.stopper.ShouldStop():
return
}
}
})
}
12 changes: 12 additions & 0 deletions pkg/server/testserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,18 @@ func (ts *TestServer) ExecutorConfig() interface{} {
return *ts.execCfg
}

// GCSystemLog deletes entries in the given system log table older than the
// given cutoffTimestamp if the server is the lease holder for range 1.
// Leaseholder constraint is present so that only one node in the cluster
// performs gc.
// The system log table is expected to have a "timestamp" column.
// It returns the the number of rows affected and error (if any).
func (ts *TestServer) GCSystemLog(
ctx context.Context, table string, cutoffTimestamp time.Time,
) (int64, error) {
return ts.gcSystemLog(ctx, table, cutoffTimestamp)
}

type testServerFactoryImpl struct{}

// TestServerFactory can be passed to serverutils.InitTestServerFactory
Expand Down
Loading

0 comments on commit 759bb39

Please sign in to comment.