Skip to content

Commit

Permalink
Merge #96669 #97039
Browse files Browse the repository at this point in the history
96669: storage: set default of storage.value_blocks.enabled to true r=nicktrav a=sumeerbhola

This enables the use of sstable format TableFormatPebblev3 which improves read performance with MVCC keys with multiple versions.

Informs #1170

Epic: CRDB-20378

Release note: None

97039: insights: detect failed executions at the stmt and txn levels r=gtr a=gtr

Fixes: #94381.

Previously, the insights subsystem only kept track of failed executions
that were also slow executions. This commit enables the insights
subsystem to keep track of all failed executions, regardless if they were
slow or not.

Release note (sql change): The insights subsystem in `sqlstats` is able
to detect failed executions, regardless if they were slow or not.

Co-authored-by: sumeerbhola <[email protected]>
Co-authored-by: gtr <[email protected]>
  • Loading branch information
3 people committed Feb 13, 2023
3 parents 47331b1 + a593861 + 2d5449d commit 5f1a3d7
Show file tree
Hide file tree
Showing 12 changed files with 86 additions and 17 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
<tr><td><div id="setting-sql-ttl-default-select-batch-size" class="anchored"><code>sql.ttl.default_select_batch_size</code></div></td><td>integer</td><td><code>500</code></td><td>default amount of rows to select in a single query during a TTL job</td></tr>
<tr><td><div id="setting-sql-ttl-job-enabled" class="anchored"><code>sql.ttl.job.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>whether the TTL job is enabled</td></tr>
<tr><td><div id="setting-sql-txn-fingerprint-id-cache-capacity" class="anchored"><code>sql.txn_fingerprint_id_cache.capacity</code></div></td><td>integer</td><td><code>100</code></td><td>the maximum number of txn fingerprint IDs stored</td></tr>
<tr><td><div id="setting-storage-value-blocks-enabled" class="anchored"><code>storage.value_blocks.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>set to true to enable writing of value blocks in sstables</td></tr>
<tr><td><div id="setting-storage-value-blocks-enabled" class="anchored"><code>storage.value_blocks.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>set to true to enable writing of value blocks in sstables</td></tr>
<tr><td><div id="setting-timeseries-storage-enabled" class="anchored"><code>timeseries.storage.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>if set, periodic timeseries data is stored within the cluster; disabling is not recommended unless you are storing the data elsewhere</td></tr>
<tr><td><div id="setting-timeseries-storage-resolution-10s-ttl" class="anchored"><code>timeseries.storage.resolution_10s.ttl</code></div></td><td>duration</td><td><code>240h0m0s</code></td><td>the maximum age of time series data stored at the 10 second resolution. Data older than this is subject to rollup and deletion.</td></tr>
<tr><td><div id="setting-timeseries-storage-resolution-30m-ttl" class="anchored"><code>timeseries.storage.resolution_30m.ttl</code></div></td><td>duration</td><td><code>2160h0m0s</code></td><td>the maximum age of time series data stored at the 30 minute resolution. Data older than this is subject to deletion.</td></tr>
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvnemesis/applier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func TestApplier(t *testing.T) {
sstFile := &storage.MemFile{}
{
st := cluster.MakeTestingClusterSettings()
storage.ValueBlocksEnabled.Override(ctx, &st.SV, true)
w := storage.MakeIngestionSSTWriter(ctx, st, sstFile)
defer w.Close()

Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvnemesis/operations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func TestOperationsFormat(t *testing.T) {
sstFile := &storage.MemFile{}
{
st := cluster.MakeTestingClusterSettings()
storage.ValueBlocksEnabled.Override(ctx, &st.SV, false)
w := storage.MakeIngestionSSTWriter(ctx, st, sstFile)
defer w.Close()

Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvnemesis/testdata/TestApplier/addsstable
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
echo
----
db1.AddSSTable(ctx, tk(1), tk(4), ... /* @s1 */) // 1251 bytes (as writes)
db1.AddSSTable(ctx, tk(1), tk(4), ... /* @s1 */) // 1252 bytes (as writes)
// ^-- tk(1) -> sv(s1): /Table/100/"0000000000000001"/<ts> -> /BYTES/v1
// ^-- tk(2) -> sv(s1): /Table/100/"0000000000000002"/<ts> -> /<empty>
// ^-- [tk(3), tk(4)) -> sv(s1): /Table/100/"000000000000000{3"-4"} -> /<empty>
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
echo
----
db0.AddSSTable(ctx, tk(1), tk(3), ... /* @s1 */) // 1050 bytes
db0.AddSSTable(ctx, tk(1), tk(3), ... /* @s1 */) // 1051 bytes
// ^-- tk(1) -> sv(s1): /Table/100/"0000000000000001"/0.000000001,0 -> /BYTES/v1
// ^-- tk(2) -> sv(s1): /Table/100/"0000000000000002"/0.000000001,0 -> /<empty>
/Table/100/"0000000000000001"/0.000000001,0 @ s1 v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ echo
----
db0.Put(ctx, tk(1), sv(1)) // @0.000000001,0 <nil>
db0.Put(ctx, tk(2), sv(1)) // @0.000000001,0 <nil>
db0.AddSSTable(ctx, tk(1), tk(3), ... /* @s2 */) // 1051 bytes
db0.AddSSTable(ctx, tk(1), tk(3), ... /* @s2 */) // 1052 bytes
// ^-- tk(1) -> sv(s2): /Table/100/"0000000000000001"/0.000000001,0 -> /BYTES/v2
// ^-- tk(2) -> sv(s2): /Table/100/"0000000000000002"/0.000000001,0 -> /<empty>
db0.Scan(ctx, tk(1), tk(3), 0) // @0.000000003,0 (/Table/100/"0000000000000001":v2, <nil>)
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvnemesis/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func TestValidate(t *testing.T) {
makeAddSSTable := func(seq kvnemesisutil.Seq, kvs []sstKV) Operation {
f := &storage.MemFile{}
st := cluster.MakeTestingClusterSettings()
storage.ValueBlocksEnabled.Override(ctx, &st.SV, true)
w := storage.MakeIngestionSSTWriter(ctx, st, f)
defer w.Close()

Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/sqlstats/insights/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,7 @@ func (d *latencyThresholdDetector) enabled() bool {
func (d *latencyThresholdDetector) isSlow(s *Statement) bool {
return d.enabled() && s.LatencyInSeconds >= LatencyThreshold.Get(&d.st.SV).Seconds()
}

func isFailed(s *Statement) bool {
return s.Status == Statement_Failed
}
57 changes: 57 additions & 0 deletions pkg/sql/sqlstats/insights/detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,63 @@ func TestLatencyQuantileDetector(t *testing.T) {
}
})

// Testing the slow and failure detectors at the same time.
t.Run("isSlow and isFailed", func(t *testing.T) {
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
AnomalyDetectionEnabled.Override(ctx, &st.SV, true)
AnomalyDetectionLatencyThreshold.Override(ctx, &st.SV, 100*time.Millisecond)

tests := []struct {
name string
seedLatency time.Duration
candidateLatency time.Duration
status Statement_Status
isSlow bool
isFailed bool
}{{
name: "slow and failed statement",
seedLatency: 100 * time.Millisecond,
candidateLatency: 200 * time.Millisecond,
status: Statement_Failed,
isSlow: true,
isFailed: true,
}, {
name: "slow and non-failed statement",
seedLatency: 100 * time.Millisecond,
candidateLatency: 200 * time.Millisecond,
status: Statement_Completed,
isSlow: true,
isFailed: false,
}, {
name: "fast and non-failed statement",
seedLatency: 100 * time.Millisecond,
candidateLatency: 50 * time.Millisecond,
status: Statement_Completed,
isSlow: false,
isFailed: false,
}, {
name: "fast and failed statement",
seedLatency: 100 * time.Millisecond,
candidateLatency: 50 * time.Millisecond,
status: Statement_Failed,
isSlow: false,
isFailed: true,
}}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
d := newAnomalyDetector(st, NewMetrics())
for i := 0; i < 1000; i++ {
d.isSlow(&Statement{LatencyInSeconds: test.seedLatency.Seconds()})
}
stmt := &Statement{LatencyInSeconds: test.candidateLatency.Seconds(), Status: test.status}
require.Equal(t, test.isSlow, d.isSlow(stmt))
require.Equal(t, test.isFailed, isFailed(stmt))
})
}
})

t.Run("metrics", func(t *testing.T) {
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
Expand Down
15 changes: 7 additions & 8 deletions pkg/sql/sqlstats/insights/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,11 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
delete(r.statements, sessionID)
defer statements.release()

var slowStatements intsets.Fast
// Mark statements which are detected as slow or have a failed status.
var slowOrFailedStatements intsets.Fast
for i, s := range *statements {
if r.detector.isSlow(s) {
slowStatements.Add(i)
if r.detector.isSlow(s) || isFailed(s) {
slowOrFailedStatements.Add(i)
}
}

Expand All @@ -112,8 +113,8 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
highContention = transaction.Contention.Seconds() >= LatencyThreshold.Get(&r.causes.st.SV).Seconds()
}

if slowStatements.Empty() && !highContention {
// We only record an insight if we have slow statements or high txn contention.
if slowOrFailedStatements.Empty() && !highContention {
// We only record an insight if we have slow or failed statements or high txn contention.
return
}

Expand All @@ -127,14 +128,12 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
}

for i, s := range *statements {
if slowStatements.Contains(i) {
if slowOrFailedStatements.Contains(i) {
switch s.Status {
case Statement_Completed:
s.Problem = Problem_SlowExecution
s.Causes = r.causes.examine(s.Causes, s)
case Statement_Failed:
// Note that we'll be building better failure support for 23.1.
// For now, we only mark failed statements that were also slow.
s.Problem = Problem_FailedExecution
}

Expand Down
12 changes: 8 additions & 4 deletions pkg/sql/sqlstats/insights/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ func newStmtWithProblemAndCauses(stmt *Statement, problem Problem, causes []Caus
return &newStmt
}

// Return a new failed statement.
func newFailedStmt(stmt *Statement) *Statement {
newStmt := *stmt
newStmt.Problem = Problem_FailedExecution
return &newStmt
}

func TestRegistry(t *testing.T) {
ctx := context.Background()

Expand Down Expand Up @@ -72,9 +79,6 @@ func TestRegistry(t *testing.T) {
})

t.Run("failure detection", func(t *testing.T) {
// Note that we don't fully support detecting and reporting statement failures yet.
// We only report failures when the statement was also slow.
// We'll be coming back to build a better failure story for 23.1.
statement := &Statement{
ID: clusterunique.IDFromBytes([]byte("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")),
FingerprintID: appstatspb.StmtFingerprintID(100),
Expand All @@ -93,7 +97,7 @@ func TestRegistry(t *testing.T) {
Session: session,
Transaction: transaction,
Statements: []*Statement{
newStmtWithProblemAndCauses(statement, Problem_FailedExecution, nil),
newFailedStmt(statement),
},
}}
var actual []*Insight
Expand Down
4 changes: 3 additions & 1 deletion pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/storage/fs"
"github.com/cockroachdb/cockroach/pkg/storage/pebbleiter"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
Expand Down Expand Up @@ -89,7 +90,8 @@ var ValueBlocksEnabled = settings.RegisterBoolSetting(
settings.SystemOnly,
"storage.value_blocks.enabled",
"set to true to enable writing of value blocks in sstables",
false).WithPublic()
util.ConstantWithMetamorphicTestBool(
"storage.value_blocks.enabled", true)).WithPublic()

// EngineKeyCompare compares cockroach keys, including the version (which
// could be MVCC timestamps).
Expand Down

0 comments on commit 5f1a3d7

Please sign in to comment.