Skip to content

Commit

Permalink
Merge #99876 #100120
Browse files Browse the repository at this point in the history
99876: sql: deflake and unskip TestTenantStatementTimeoutAdmissionQueueCancelation r=cucaroach a=cucaroach

Make sure we get 4 blockers parked in the admission control queues and no more and retry the
main query until it gets parked and booted from it, before we expected the main query to always 
reach the admission control q but that isn't reliable.
Fixes: #78494


100120: spanconfigs: enable span config bounds by default  r=knz a=arulajmani

We only clamp spanconfigs if the `spanconfig.bounds.enabled` cluster setting dictates as such. For 23.1, it's default off, but going forward we want it to be default on. Let's also make this cluster setting public.

Epic: none
Epic: CRDB-8035

Release note: None

Co-authored-by: Tommy Reilly <[email protected]>
Co-authored-by: Arul Ajmani <[email protected]>
  • Loading branch information
3 people committed Mar 30, 2023
3 parents dbf2dfb + 5bf7940 + 0d4c5bc commit b60a8cd
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 19 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
<tr><td><div id="setting-server-user-login-upgrade-bcrypt-stored-passwords-to-scram-enabled" class="anchored"><code>server.user_login.upgrade_bcrypt_stored_passwords_to_scram.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>if server.user_login.password_encryption=scram-sha-256, this controls whether to automatically re-encode stored passwords using crdb-bcrypt to scram-sha-256</td></tr>
<tr><td><div id="setting-server-web-session-purge-ttl" class="anchored"><code>server.web_session.purge.ttl</code></div></td><td>duration</td><td><code>1h0m0s</code></td><td>if nonzero, entries in system.web_sessions older than this duration are periodically purged</td></tr>
<tr><td><div id="setting-server-web-session-timeout" class="anchored"><code>server.web_session_timeout</code></div></td><td>duration</td><td><code>168h0m0s</code></td><td>the duration that a newly created web session will be valid</td></tr>
<tr><td><div id="setting-spanconfig-bounds-enabled" class="anchored"><code>spanconfig.bounds.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>dictates whether span config bounds are consulted when serving span configs for secondary tenants</td></tr>
<tr><td><div id="setting-sql-auth-change-own-password-enabled" class="anchored"><code>sql.auth.change_own_password.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>controls whether a user is allowed to change their own password, even if they have no other privileges</td></tr>
<tr><td><div id="setting-sql-auth-resolve-membership-single-scan-enabled" class="anchored"><code>sql.auth.resolve_membership_single_scan.enabled</code></div></td><td>boolean</td><td><code>true</code></td><td>determines whether to populate the role membership cache with a single scan</td></tr>
<tr><td><div id="setting-sql-closed-session-cache-capacity" class="anchored"><code>sql.closed_session_cache.capacity</code></div></td><td>integer</td><td><code>1000</code></td><td>the maximum number of sessions in the cache</td></tr>
Expand Down
4 changes: 2 additions & 2 deletions pkg/spanconfig/spanconfigstore/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ var boundsEnabled = settings.RegisterBoolSetting(
settings.SystemOnly,
"spanconfig.bounds.enabled",
"dictates whether span config bounds are consulted when serving span configs for secondary tenants",
false,
)
true,
).WithPublic()

// Store is an in-memory data structure to store, retrieve, and incrementally
// update the span configuration state. Internally, it makes use of an interval
Expand Down
4 changes: 1 addition & 3 deletions pkg/spanconfig/spanconfigstore/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,10 @@ func TestDataDriven(t *testing.T) {

ctx := context.Background()
boundsReader := newMockBoundsReader()
settings := cluster.MakeTestingClusterSettings()
boundsEnabled.Override(ctx, &settings.SV, true)
datadriven.Walk(t, datapathutils.TestDataPath(t), func(t *testing.T, path string) {
store := New(
spanconfigtestutils.ParseConfig(t, "FALLBACK"),
settings,
cluster.MakeClusterSettings(),
boundsReader,
&spanconfig.TestingKnobs{
StoreIgnoreCoalesceAdjacentExceptions: true,
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ go_test(
"@com_github_jackc_pgx_v4//:pgx",
"@com_github_lib_pq//:pq",
"@com_github_lib_pq//oid",
"@com_github_petermattis_goid//:goid",
"@com_github_pmezard_go_difflib//difflib",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
Expand Down
52 changes: 38 additions & 14 deletions pkg/sql/run_control_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"net/url"
"strings"
"sync"
"sync/atomic"
"testing"
"time"

Expand All @@ -40,6 +41,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/petermattis/goid"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -836,7 +838,6 @@ func getUserConn(t *testing.T, username string, server serverutils.TestServerInt
// main statement with a timeout is blocked.
func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
defer leaktest.AfterTest(t)()
skip.WithIssue(t, 78494, "flaky test")
defer log.Scope(t).Close(t)

skip.UnderStress(t, "times out under stress")
Expand All @@ -845,8 +846,9 @@ func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tenantID := serverutils.TestTenantID()

var hitMainQuery uint64
numBlockers := 4
var matches int64

// We can't get the tableID programmatically here, checked below with assert.
const tableID = 104
Expand All @@ -865,9 +867,12 @@ func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
matchBatch := func(ctx context.Context, req *kvpb.BatchRequest) bool {
tid, ok := roachpb.ClientTenantFromContext(ctx)
if ok && tid == tenantID && len(req.Requests) > 0 {
scan, ok := req.Requests[0].GetInner().(*kvpb.ScanRequest)
if ok && tableSpan.ContainsKey(scan.Key) {
return true
scan, ok := req.Requests[0].GetInner().(*kvpb.GetRequest)
if ok {
if tableSpan.ContainsKey(scan.Key) {
log.Infof(ctx, "matchBatch %d", goid.Get())
return true
}
}
}
return false
Expand All @@ -885,18 +890,30 @@ func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
Store: &kvserver.StoreTestingKnobs{
TestingRequestFilter: func(ctx context.Context, req *kvpb.BatchRequest) *kvpb.Error {
if matchBatch(ctx, req) {
m := atomic.AddInt64(&matches, 1)
// If any of the blockers get retried just ignore.
if m > int64(numBlockers) {
log.Infof(ctx, "ignoring extra blocker %d", goid.Get())
return nil
}
// Notify we're blocking.
log.Infof(ctx, "blocking %d", goid.Get())
unblockClientCh <- struct{}{}
<-qBlockersCh
}
return nil
},
TestingResponseErrorEvent: func(ctx context.Context, req *kvpb.BatchRequest, err error) {
if matchBatch(ctx, req) {
tid, ok := roachpb.ClientTenantFromContext(ctx)
if ok && tid == tenantID && len(req.Requests) > 0 {
scan, ok := req.Requests[0].GetInner().(*kvpb.ScanRequest)
if ok && tableSpan.ContainsKey(scan.Key) {
cancel()
wg.Done()
log.Infof(ctx, "%s %d", scan, goid.Get())
if ok {
if tableSpan.ContainsKey(scan.Key) && atomic.CompareAndSwapUint64(&hitMainQuery, 0, 1) {
log.Infof(ctx, "got scan request error %d", goid.Get())
cancel()
wg.Done()
}
}
}
},
Expand All @@ -911,8 +928,8 @@ func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
defer db.Close()

r1 := sqlutils.MakeSQLRunner(db)
r1.Exec(t, `CREATE TABLE foo (t int)`)

r1.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`)
r1.Exec(t, `CREATE TABLE foo (t int PRIMARY KEY)`)
row := r1.QueryRow(t, `SELECT id FROM system.namespace WHERE name = 'foo'`)
var id int64
row.Scan(&id)
Expand All @@ -931,18 +948,25 @@ func TestTenantStatementTimeoutAdmissionQueueCancelation(t *testing.T) {
for _, r := range blockers {
go func(r *sqlutils.SQLRunner) {
defer wg.Done()
r.Exec(t, `SELECT * FROM foo`)
r.Exec(t, `SELECT * FROM foo WHERE t = 1234`)
}(r)
}
// Wait till all blockers are parked.
for i := 0; i < numBlockers; i++ {
<-unblockClientCh
}
client.ExpectErr(t, "timeout", `SELECT * FROM foo`)
// Unblock the blockers.
log.Infof(ctx, "blockers parked")
// Because we don't know when statement timeout will happen we have to repeat
// till we get one into the KV layer.
for atomic.LoadUint64(&hitMainQuery) == 0 {
_, err := client.DB.ExecContext(context.Background(), `SELECT * FROM foo`)
require.Error(t, err)
log.Infof(ctx, "main req finished: %v", err)
}
for i := 0; i < numBlockers; i++ {
qBlockersCh <- struct{}{}
}
log.Infof(ctx, "unblocked blockers")
wg.Wait()
require.ErrorIs(t, ctx.Err(), context.Canceled)
}

0 comments on commit b60a8cd

Please sign in to comment.