Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
84282: sql: new cluster setting for index recommendation r=maryliag a=maryliag

As part of #83782, we will generate index recommendation
per statement fingerprint id. As a safety, creating the
cluster setting
`sql.metrics.statement_details.index_recommendation_collection.enabled`
that can be disable, in case there are any issues with
performance.

Release note (sql change): Creation of cluster setting
`sql.metrics.statement_details.index_recommendation_collection.enabled`
that can be disabled if index recommendation generation is
causing performance issues.

84316: authors: add Jake to authors r=j82w a=j82w

Release note: None

84367: kvserver: Fix missing increment in SSTSnapshotStorage refcount r=erikgrinaker a=itsbilal

Previously, in rare cases where we had two SSTSnapshotStorageScratch
for a given range, we'd have not incremented the refcount for the
second scratch creation, leading to an inconsistent refcount
panic down the line. This change fixes it and adds a test to
exercise the concurrent case.

Bug popped up in #84100.

Release note: None.

Co-authored-by: Marylia Gutierrez <[email protected]>
Co-authored-by: j82w <[email protected]>
Co-authored-by: Bilal Akhtar <[email protected]>
  • Loading branch information
4 people committed Jul 13, 2022
4 parents 351498d + ab2b9be + 26e1418 + 1f18df8 commit 76c45dc
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 10 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ Ivan Kozik <[email protected]>
Jaewan Park <[email protected]>
Jack Wu <[email protected]>
Jackson Owens <[email protected]> <[email protected]>
Jake Willey <[email protected]>
James Graves <[email protected]>
James Jung <[email protected]> <[email protected]>
James H. Linder <[email protected]>
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ sql.metrics.max_mem_stmt_fingerprints integer 100000 the maximum number of state
sql.metrics.max_mem_txn_fingerprints integer 100000 the maximum number of transaction fingerprints stored in memory
sql.metrics.statement_details.dump_to_logs boolean false dump collected statement statistics to node logs when periodically cleared
sql.metrics.statement_details.enabled boolean true collect per-statement query statistics
sql.metrics.statement_details.index_recommendation_collection.enabled boolean true generate an index recommendation for each fingerprint ID
sql.metrics.statement_details.plan_collection.enabled boolean true periodically save a logical plan for each fingerprint
sql.metrics.statement_details.plan_collection.period duration 5m0s the time until a new logical plan is collected
sql.metrics.statement_details.threshold duration 0s minimum execution time to cause statement statistics to be collected. If configured, no transaction stats are collected.
Expand Down
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@
<tr><td><code>sql.metrics.max_mem_txn_fingerprints</code></td><td>integer</td><td><code>100000</code></td><td>the maximum number of transaction fingerprints stored in memory</td></tr>
<tr><td><code>sql.metrics.statement_details.dump_to_logs</code></td><td>boolean</td><td><code>false</code></td><td>dump collected statement statistics to node logs when periodically cleared</td></tr>
<tr><td><code>sql.metrics.statement_details.enabled</code></td><td>boolean</td><td><code>true</code></td><td>collect per-statement query statistics</td></tr>
<tr><td><code>sql.metrics.statement_details.index_recommendation_collection.enabled</code></td><td>boolean</td><td><code>true</code></td><td>generate an index recommendation for each fingerprint ID</td></tr>
<tr><td><code>sql.metrics.statement_details.plan_collection.enabled</code></td><td>boolean</td><td><code>true</code></td><td>periodically save a logical plan for each fingerprint</td></tr>
<tr><td><code>sql.metrics.statement_details.plan_collection.period</code></td><td>duration</td><td><code>5m0s</code></td><td>the time until a new logical plan is collected</td></tr>
<tr><td><code>sql.metrics.statement_details.threshold</code></td><td>duration</td><td><code>0s</code></td><td>minimum execution time to cause statement statistics to be collected. If configured, no transaction stats are collected.</td></tr>
Expand Down
17 changes: 7 additions & 10 deletions pkg/kv/kvserver/replica_sst_snapshot_storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type SSTSnapshotStorage struct {
dir string
mu struct {
syncutil.Mutex
ranges map[roachpb.RangeID]int
rangeRefCount map[roachpb.RangeID]int
}
}

Expand All @@ -46,8 +46,8 @@ func NewSSTSnapshotStorage(engine storage.Engine, limiter *rate.Limiter) SSTSnap
dir: filepath.Join(engine.GetAuxiliaryDir(), "sstsnapshot"),
mu: struct {
syncutil.Mutex
ranges map[roachpb.RangeID]int
}{ranges: make(map[roachpb.RangeID]int)},
rangeRefCount map[roachpb.RangeID]int
}{rangeRefCount: make(map[roachpb.RangeID]int)},
}
}

Expand All @@ -57,10 +57,7 @@ func (s *SSTSnapshotStorage) NewScratchSpace(
rangeID roachpb.RangeID, snapUUID uuid.UUID,
) *SSTSnapshotStorageScratch {
s.mu.Lock()
rangeStorage := s.mu.ranges[rangeID]
if rangeStorage == 0 {
s.mu.ranges[rangeID] = 1
}
s.mu.rangeRefCount[rangeID]++
s.mu.Unlock()
snapDir := filepath.Join(s.dir, strconv.Itoa(int(rangeID)), snapUUID.String())
return &SSTSnapshotStorageScratch{
Expand All @@ -82,14 +79,14 @@ func (s *SSTSnapshotStorage) Clear() error {
func (s *SSTSnapshotStorage) scratchClosed(rangeID roachpb.RangeID) {
s.mu.Lock()
defer s.mu.Unlock()
val := s.mu.ranges[rangeID]
val := s.mu.rangeRefCount[rangeID]
if val <= 0 {
panic("inconsistent scratch ref count")
}
val--
s.mu.ranges[rangeID] = val
s.mu.rangeRefCount[rangeID] = val
if val == 0 {
delete(s.mu.ranges, rangeID)
delete(s.mu.rangeRefCount, rangeID)
// Suppressing an error here is okay, as orphaned directories are at worst
// a performance issue when we later walk directories in pebble.Capacity()
// but not a correctness issue.
Expand Down
113 changes: 113 additions & 0 deletions pkg/kv/kvserver/replica_sst_snapshot_storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"io/ioutil"
"path/filepath"
"strconv"
"sync"
"testing"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer"
Expand All @@ -23,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/errors/oserror"
"github.com/stretchr/testify/require"
"golang.org/x/time/rate"
Expand Down Expand Up @@ -114,6 +116,117 @@ func TestSSTSnapshotStorage(t *testing.T) {
}
}

func TestSSTSnapshotStorageConcurrentRange(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
testRangeID := roachpb.RangeID(1)
testSnapUUID := uuid.Must(uuid.FromBytes([]byte("foobar1234567890")))
testSnapUUID2 := uuid.Must(uuid.FromBytes([]byte("foobar2345678910")))
testLimiter := rate.NewLimiter(rate.Inf, 0)

cleanup, eng := newOnDiskEngine(ctx, t)
defer cleanup()
defer eng.Close()

sstSnapshotStorage := NewSSTSnapshotStorage(eng, testLimiter)

runForSnap := func(snapUUID uuid.UUID) error {
scratch := sstSnapshotStorage.NewScratchSpace(testRangeID, snapUUID)

// Check that the storage lazily creates the directories on first write.
_, err := eng.Stat(scratch.snapDir)
if !oserror.IsNotExist(err) {
return errors.Errorf("expected %s to not exist", scratch.snapDir)
}

f, err := scratch.NewFile(ctx, 0)
require.NoError(t, err)
defer func() {
require.NoError(t, f.Close())
}()

// Check that even though the files aren't created, they are still recorded in SSTs().
require.Equal(t, len(scratch.SSTs()), 1)

// Check that the storage lazily creates the files on write.
for _, fileName := range scratch.SSTs() {
_, err := eng.Stat(fileName)
if !oserror.IsNotExist(err) {
return errors.Errorf("expected %s to not exist", fileName)
}
}

_, err = f.Write([]byte("foo"))
require.NoError(t, err)

// After writing to files, check that they have been flushed to disk.
for _, fileName := range scratch.SSTs() {
f, err := eng.Open(fileName)
require.NoError(t, err)
data, err := ioutil.ReadAll(f)
require.NoError(t, err)
require.Equal(t, data, []byte("foo"))
require.NoError(t, f.Close())
}

// Check that closing is idempotent.
require.NoError(t, f.Close())
require.NoError(t, f.Close())

// Check that writing to a closed file is an error.
_, err = f.Write([]byte("foo"))
require.EqualError(t, err, "file has already been closed")

// Check that closing an empty file is an error.
f, err = scratch.NewFile(ctx, 0)
require.NoError(t, err)
require.EqualError(t, f.Close(), "file is empty")
_, err = f.Write([]byte("foo"))
require.NoError(t, err)

// Check that Close removes the snapshot directory.
require.NoError(t, scratch.Close())
_, err = eng.Stat(scratch.snapDir)
if !oserror.IsNotExist(err) {
return errors.Errorf("expected %s to not exist", scratch.snapDir)
}
return nil
}

var wg sync.WaitGroup
wg.Add(2)
errChan := make(chan error)
for _, snapID := range []uuid.UUID{testSnapUUID, testSnapUUID2} {
snapID := snapID
go func() {
defer wg.Done()
if err := runForSnap(snapID); err != nil {
errChan <- err
}
}()
}
wg.Wait()
select {
case err := <-errChan:
t.Fatal(err)
default:
}
// Ensure that the range directory was deleted after the scratches were
// closed.
rangeDir := filepath.Join(sstSnapshotStorage.dir, strconv.Itoa(int(testRangeID)))
_, err := eng.Stat(rangeDir)
if !oserror.IsNotExist(err) {
t.Fatalf("expected %s to not exist", rangeDir)
}
require.NoError(t, sstSnapshotStorage.Clear())
_, err = eng.Stat(sstSnapshotStorage.dir)
if !oserror.IsNotExist(err) {
t.Fatalf("expected %s to not exist", sstSnapshotStorage.dir)
}
}

// TestSSTSnapshotStorageContextCancellation verifies that writing to an
// SSTSnapshotStorage is reactive to context cancellation.
func TestSSTSnapshotStorageContextCancellation(t *testing.T) {
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/sqlstats/cluster_settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,12 @@ var MaxSQLStatReset = settings.RegisterDurationSetting(
time.Hour*2,
settings.NonNegativeDurationWithMaximum(time.Hour*24),
).WithPublic()

// SampleIndexRecommendation specifies whether we generate an index recommendation
// for each fingerprint ID.
var SampleIndexRecommendation = settings.RegisterBoolSetting(
settings.TenantWritable,
"sql.metrics.statement_details.index_recommendation_collection.enabled",
"generate an index recommendation for each fingerprint ID",
true,
).WithPublic()

0 comments on commit 76c45dc

Please sign in to comment.