Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
55797: opt: increase types supported for range calculations in histogram r=rytaft a=ArjunM98

Prior to this commit, it was not possible to calculate the range of non-numeric
types like strings/uuid/inet family for use in the histogram. To work around this,
the range of these data types would be reduced by half when filtered.

This commit fixes the problem by implementing support for range calculations of
non-numeric types. Leveraging the byte format of these types and treating each
byte as a base(16) number, we can estimate the range for an arbitrary set of
strings/uuid/inet family. Additionally, support for time/timetz range
calculations was also added.

Release note (performance improvement): Improved the accuracy of histogram
calculations for the following types: string/uuid/inet family. Additionally,
support for time/timetz histogram calculations was also added. This improves
optimizer's estimates and results in better query plans in certain instances.

Resolves cockroachdb#49568

58612: sql: set running status for GC jobs r=postamar a=postamar

Previously GC jobs didn't populate their RunningStatus leaving users no
way to tell whether a job is waiting on the timer or actually doing
garbage collection. This change addresses this.

Fixes cockroachdb#57826.

Release note (sql change): GC jobs now populate the running_status
column for SHOW JOBS.

Co-authored-by: ArjunM98 <[email protected]>
Co-authored-by: Marius Posta <[email protected]>
  • Loading branch information
3 people committed Jan 19, 2021
3 parents 8451341 + 7d4af69 + 1a17f57 commit 6a51818
Show file tree
Hide file tree
Showing 11 changed files with 794 additions and 363 deletions.
9 changes: 6 additions & 3 deletions pkg/sql/gcjob_test/gc_job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ func TestSchemaChangeGCJob(t *testing.T) {
DescriptorIDs: descpb.IDs{myTableID},
Details: details,
Progress: jobspb.SchemaChangeGCProgress{},
RunningStatus: sql.RunningStatusWaitingGC,
NonCancelable: true,
}

Expand All @@ -191,7 +192,7 @@ func TestSchemaChangeGCJob(t *testing.T) {

// Check that the job started.
jobIDStr := strconv.Itoa(int(*job.ID()))
if err := jobutils.VerifySystemJob(t, sqlDB, 0, jobspb.TypeSchemaChangeGC, jobs.StatusRunning, lookupJR); err != nil {
if err := jobutils.VerifyRunningSystemJob(t, sqlDB, 0, jobspb.TypeSchemaChangeGC, sql.RunningStatusWaitingGC, lookupJR); err != nil {
t.Fatal(err)
}

Expand Down Expand Up @@ -280,12 +281,14 @@ SELECT parent_id, table_id
// Now we should be able to find our GC job
var jobID int64
var status jobs.Status
var runningStatus jobs.RunningStatus
sqlDB.QueryRow(t, `
SELECT job_id, status
SELECT job_id, status, running_status
FROM crdb_internal.jobs
WHERE description LIKE 'GC for DROP TABLE db.public.foo';
`).Scan(&jobID, &status)
`).Scan(&jobID, &status, &runningStatus)
require.Equal(t, jobs.StatusRunning, status)
require.Equal(t, sql.RunningStatusWaitingGC, runningStatus)

// Manually delete the table.
require.NoError(t, kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
Expand Down
12 changes: 6 additions & 6 deletions pkg/sql/logictest/testdata/logic_test/alter_table
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ WHERE job_type = 'SCHEMA CHANGE' OR job_type = 'SCHEMA CHANGE GC'
ORDER BY created DESC
LIMIT 2
----
SCHEMA CHANGE GC GC for ROLLBACK of ALTER TABLE test.public.t ADD CONSTRAINT bar UNIQUE (c) root running NULL 0.00
SCHEMA CHANGE ALTER TABLE test.public.t ADD CONSTRAINT bar UNIQUE (c) root failed NULL 0.00
SCHEMA CHANGE GC GC for ROLLBACK of ALTER TABLE test.public.t ADD CONSTRAINT bar UNIQUE (c) root running waiting for GC TTL 0.00
SCHEMA CHANGE ALTER TABLE test.public.t ADD CONSTRAINT bar UNIQUE (c) root failed NULL 0.00

query IIII colnames,rowsort
SELECT * FROM t
Expand Down Expand Up @@ -199,8 +199,8 @@ WHERE job_type = 'SCHEMA CHANGE' OR job_type = 'SCHEMA CHANGE GC'
ORDER BY created DESC
LIMIT 2
----
SCHEMA CHANGE GC GC for DROP INDEX test.public.t@foo CASCADE root running NULL 0 ·
SCHEMA CHANGE DROP INDEX test.public.t@foo CASCADE root succeeded NULL 1 ·
SCHEMA CHANGE GC GC for DROP INDEX test.public.t@foo CASCADE root running waiting for GC TTL 0 ·
SCHEMA CHANGE DROP INDEX test.public.t@foo CASCADE root succeeded NULL 1 ·

query TTBITTBB colnames
SHOW INDEXES FROM t
Expand Down Expand Up @@ -281,8 +281,8 @@ WHERE job_type = 'SCHEMA CHANGE' OR job_type = 'SCHEMA CHANGE GC'
ORDER BY created DESC
LIMIT 2
----
SCHEMA CHANGE GC GC for DROP INDEX test.public.t@t_f_idx root running NULL 0 ·
SCHEMA CHANGE DROP INDEX test.public.t@t_f_idx root succeeded NULL 1 ·
SCHEMA CHANGE GC GC for DROP INDEX test.public.t@t_f_idx root running waiting for GC TTL 0 ·
SCHEMA CHANGE DROP INDEX test.public.t@t_f_idx root succeeded NULL 1 ·

statement ok
ALTER TABLE t DROP COLUMN f
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/opt/constraint/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ func parseDatumPath(evalCtx *tree.EvalContext, str string, typs []types.Family)
if err == nil {
val = tree.NewDOid(*dInt)
}
case types.UuidFamily:
val, err = tree.ParseDUuidFromString(valStr)
case types.INetFamily:
val, err = tree.ParseDIPAddrFromINetString(valStr)
case types.TimeFamily:
val, _, err = tree.ParseDTime(evalCtx, valStr, time.Microsecond)
case types.TimeTZFamily:
val, _, err = tree.ParseDTimeTZ(evalCtx, valStr, time.Microsecond)
default:
panic(errors.AssertionFailedf("type %s not supported", typs[i].String()))
}
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/select
Original file line number Diff line number Diff line change
Expand Up @@ -1981,6 +1981,26 @@ select
└── filters
└── created:3 = '2020-04-11 06:25:41+00:00' [type=bool, outer=(3), constraints=(/3: [/'2020-04-11 06:25:41+00:00' - /'2020-04-11 06:25:41+00:00']; tight), fd=()-->(3)]

# Make sure that using a histogram produces correct stats with range
# calculations for non-numeric types.
norm
SELECT * FROM data WHERE name >= 'bbb'
----
select
├── columns: user_id:1(uuid!null) name:2(varchar!null) created:3(timestamptz)
├── stats: [rows=3090.89689, distinct(2)=309.433397, null(2)=0]
│ histogram(2)= 0 0 3089.9 1
│ <--- 'bbb' -------- 'c'
├── scan data
│ ├── columns: user_id:1(uuid!null) name:2(varchar) created:3(timestamptz)
│ └── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(2)=1000, null(2)=0]
│ histogram(1)= 0 1 4998 1 4999 1
│ <--- '3b57b3e4-a68a-9b47-2752-e365d7d8954e' ------ '6b49a786-387b-d5a2-6582-4e963eb4d537' ------ 'd9739a48-d5be-9a62-e752-34d877e56ba5'
│ histogram(2)= 0 1 4998 1 4999 1
│ <--- 'a' ------ 'b' ------ 'c'
└── filters
└── name:2 >= 'bbb' [type=bool, outer=(2), constraints=(/2: [/'bbb' - ]; tight)]

exec-ddl
ALTER TABLE a INJECT STATISTICS '[
{
Expand Down
Loading

0 comments on commit 6a51818

Please sign in to comment.