Skip to content

Commit

Permalink
backupccl: do not backup merged statistics
Browse files Browse the repository at this point in the history
Merged statistics are generated from partial statistics and the
related full statistic. Partial statistics are not currently included
in the backup, but will be soon, thus we are excluding merged
statistics from the backup.

Release note: None
  • Loading branch information
stevendanna committed Jan 12, 2023
1 parent 82cc3c1 commit c8060fd
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 12 deletions.
5 changes: 1 addition & 4 deletions pkg/ccl/backupccl/backup_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,10 +423,7 @@ func statShouldBeIncludedInBackupRestore(stat *stats.TableStatisticProto) bool {
// Forecasts and merged stats are computed from the persisted
// stats on demand and do not need to be backed up or
// restored.
if stat.Name == jobspb.ForecastStatsName || stat.Name == jobspb.MergedStatsName {
return false
}
return true
return stat.Name != jobspb.ForecastStatsName && stat.Name != jobspb.MergedStatsName
}

type backupResumer struct {
Expand Down
49 changes: 41 additions & 8 deletions pkg/ccl/backupccl/testdata/backup-restore/metadata
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
# This test that we don't back up forecasts or merged statistics.
#
# This also serves as a regression test for #86806 -- a bug in which 2
# or more forecsts would break metadata SST writing.
new-cluster name=s
----

# Make sure that the presence of forecasted stats doesn't break
# metadata SST writing. This is a regression test for #86806.
#
# We create 3 statistics manually which is the minimum needed to
# create a forecast. Automatic collection is disabled to avoid
# flakiness.
exec-sql
SET CLUSTER SETTING kv.bulkio.write_metadata_sst.enabled = true;
----

# Automatic collection is disabled to avoid flakiness.
exec-sql
SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;
----
Expand All @@ -19,24 +18,58 @@ exec-sql
CREATE DATABASE db1;
USE db1;
CREATE TABLE tab (a INT PRIMARY KEY, b INT);
INSERT INTO tab VALUES (1, 1), (2, 2), (100, 100);
----

# We create 3 statistics manually which is the minimum needed to
# create a forecast and a partial statistic on column A so that a
# merged statistics is also generated.
exec-sql
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
CREATE STATISTICS __auto__ ON a FROM tab;
CREATE STATISTICS __auto__ ON b FROM tab;
CREATE STATISTICS partial ON a FROM tab USING EXTREMES;
----

query-sql
SELECT count(1) FROM [ SHOW STATISTICS FOR TABLE tab ]
----
7

# The previous bug occurs when there are two or more forecasts since
# forecasts do not hava a statistics ID.
query-sql
SELECT count(1) FROM [ SHOW STATISTICS FOR TABLE tab WITH FORECAST ] WHERE statistics_name = '__forecast__'
----
2

query-sql
SELECT count(1) FROM [ SHOW STATISTICS FOR TABLE tab WITH MERGE ] WHERE statistics_name = '__merged__'
----
1

exec-sql
BACKUP DATABASE db1 INTO 'nodelocal://0/test/'
----

# TODO(ssd): The expectation here is 6 stats rather than 7 because the
# 'partial' stat is not backed up even though it is persisted. I think
# we may want a different API for fetching statistics.
query-sql
SELECT
json_array_length(
crdb_internal.pb_to_json(
'cockroach.ccl.backupccl.StatsTable',
crdb_internal.read_file(
concat(
'nodelocal://0/test/',
(SELECT PATH FROM [SHOW BACKUPS IN 'nodelocal://0/test/']),
'/BACKUP-STATISTICS'
)
)
)
-> 'statistics'
)
----
6

0 comments on commit c8060fd

Please sign in to comment.