From ab64ed987b16cabe7da47b593941a17315637634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Nordstr=C3=B6m?= Date: Wed, 18 Sep 2024 14:07:33 +0200 Subject: [PATCH] Add dump/restore support for Hypercore TAM Add support for dumping and restoring hypertables that have chunks that use the Hypercore TAM. Dumping a Hypercore table requires special consideration because its data is internally stored in two separate relations: one for compressed data and one for non-compressed data. The TAM returns data from both relations, but they may be dumped as separate tables. This risks dumping the compressed data twice: once via the TAM and once via the compressed table in compressed format. The `pg_dump` tool uses `COPY TO` to create dumps of each table, and, to avoid data duplication when used on Hypercore tables, this change introduces a GUC that allows selecting one of these two behaviors: 1. A `COPY TO` on a Hypercore table returns all data via the TAM, including data stored in the compressed relation. A `COPY TO` on the internal compressed relation returns no data. 2. A `COPY TO` on a Hypercore returns only non-compressed data, while a `COPY TO` on the compressed relation returns compressed data. A `SELECT` still returns all the data as normal. The second approach is the default because it is consistent with compression when Hypercore TAM is not used. It will produce a `pg_dump` archive that includes data in compressed form (if data was compressed when dumped). Conversely, option (1) will produce an archive that looks identical to a dump from an non-compressed table. There are pros and cons of each dump format. A non-compressed archive is a platform-agnostic logical dump that can be restored to any platform and architecture, while a compressed archive includes data that is compressed in a platform-dependent way and needs to be restored to a compatible system. A test is added that tests both these settings and corresponding dumping and restoring. --- src/guc.c | 23 ++ src/guc.h | 19 ++ test/sql/utils/pg_dump_aux_dump.sh | 3 +- tsl/src/compression/api.c | 18 +- tsl/src/hypercore/hypercore_handler.c | 58 +++- tsl/src/hypercore/hypercore_handler.h | 7 +- tsl/src/nodes/columnar_scan/columnar_scan.c | 19 ++ tsl/src/planner.c | 2 +- tsl/src/process_utility.c | 75 +++++ tsl/test/expected/hypercore_dump_restore.out | 316 +++++++++++++++++++ tsl/test/sql/CMakeLists.txt | 1 + tsl/test/sql/hypercore_dump_restore.sql | 161 ++++++++++ 12 files changed, 682 insertions(+), 20 deletions(-) create mode 100644 tsl/test/expected/hypercore_dump_restore.out create mode 100644 tsl/test/sql/hypercore_dump_restore.sql diff --git a/src/guc.c b/src/guc.c index b5cf747db09..1e3e178363b 100644 --- a/src/guc.c +++ b/src/guc.c @@ -106,6 +106,12 @@ static const struct config_enum_entry transparent_decompression_options[] = { { NULL, 0, false } }; +static const struct config_enum_entry hypercore_copy_to_options[] = { + { "all_data", HYPERCORE_COPY_ALL_DATA, false }, + { "no_compressed_data", HYPERCORE_COPY_NO_COMPRESSED_DATA, false }, + { NULL, 0, false } +}; + bool ts_guc_enable_deprecation_warnings = true; bool ts_guc_enable_optimizations = true; bool ts_guc_restoring = false; @@ -147,6 +153,8 @@ bool ts_guc_enable_tss_callbacks = true; TSDLLEXPORT bool ts_guc_enable_delete_after_compression = false; TSDLLEXPORT bool ts_guc_enable_merge_on_cagg_refresh = false; TSDLLEXPORT char *ts_guc_hypercore_indexam_whitelist; +TSDLLEXPORT HypercoreCopyToBehavior ts_guc_hypercore_copy_to_behavior = + HYPERCORE_COPY_NO_COMPRESSED_DATA; /* default value of ts_guc_max_open_chunks_per_insert and * ts_guc_max_cached_chunks_per_hypertable will be set as their respective boot-value when the @@ -163,6 +171,7 @@ char *ts_last_tune_time = NULL; char *ts_last_tune_version = NULL; bool ts_guc_debug_require_batch_sorted_merge = false; + bool ts_guc_debug_allow_cagg_with_deprecated_funcs = false; #ifdef TS_DEBUG @@ -973,6 +982,20 @@ _guc_init(void) /* assign_hook= */ NULL, /* show_hook= */ NULL); + DefineCustomEnumVariable(MAKE_EXTOPTION("hypercore_copy_to_behavior"), + "The behavior of COPY TO on a hypercore table", + "Set to 'all_data' to return both compressed and uncompressed data " + "via the Hypercore table when using COPY TO. Set to " + "'no_compressed_data' to skip compressed data.", + /* valueAddr= */ (int *) &ts_guc_hypercore_copy_to_behavior, + /* bootValue= */ HYPERCORE_COPY_NO_COMPRESSED_DATA, + /* options= */ hypercore_copy_to_options, + /* context= */ PGC_USERSET, + 0, + NULL, + NULL, + NULL); + #ifdef TS_DEBUG DefineCustomBoolVariable(/* name= */ MAKE_EXTOPTION("shutdown_bgw_scheduler"), /* short_desc= */ "immediately shutdown the bgw scheduler", diff --git a/src/guc.h b/src/guc.h index a66653d6370..59766943086 100644 --- a/src/guc.h +++ b/src/guc.h @@ -100,6 +100,25 @@ extern TSDLLEXPORT bool ts_guc_debug_require_batch_sorted_merge; extern TSDLLEXPORT bool ts_guc_debug_allow_cagg_with_deprecated_funcs; extern TSDLLEXPORT char *ts_guc_hypercore_indexam_whitelist; +/* + * Defines the behavior of COPY TO when used on a Hypercore table. + * + * If set to COPY_ALL_DATA, all data is copied from a Hypercore table, + * including compressed data (but in uncompressed form) from the internal + * compressed relation. When doing a COPY TO on the internal compressed + * relation, no data is returned. + * + * If set to COPY_NO_COMPRESSED_DATA, then only uncompressed data is copied + * (if any). This behavior is compatible with compression without hypercore. + */ +typedef enum HypercoreCopyToBehavior +{ + HYPERCORE_COPY_ALL_DATA, + HYPERCORE_COPY_NO_COMPRESSED_DATA, +} HypercoreCopyToBehavior; + +extern TSDLLEXPORT HypercoreCopyToBehavior ts_guc_hypercore_copy_to_behavior; + void _guc_init(void); typedef enum diff --git a/test/sql/utils/pg_dump_aux_dump.sh b/test/sql/utils/pg_dump_aux_dump.sh index 56015b26f58..1ad3c14351e 100755 --- a/test/sql/utils/pg_dump_aux_dump.sh +++ b/test/sql/utils/pg_dump_aux_dump.sh @@ -1,6 +1,7 @@ DUMPFILE=${DUMPFILE:-$1} +EXTRA_PGOPTIONS=${EXTRA_PGOPTIONS:-$2} # Override PGOPTIONS to remove verbose output -PGOPTIONS='--client-min-messages=warning' +PGOPTIONS="--client-min-messages=warning $EXTRA_PGOPTIONS" export PGOPTIONS diff --git a/tsl/src/compression/api.c b/tsl/src/compression/api.c index dcb5d769077..a6d268d5bc9 100644 --- a/tsl/src/compression/api.c +++ b/tsl/src/compression/api.c @@ -1116,13 +1116,11 @@ fetch_unmatched_uncompressed_chunk_into_tuplesort(Tuplesortstate *segment_tuples TableScanDesc scan; TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL); Snapshot snapshot = GetLatestSnapshot(); - ScanKeyData scankey = { - /* Let compression TAM know it should only return tuples from the - * non-compressed relation. No actual scankey necessary */ - .sk_flags = SK_NO_COMPRESSED, - }; - scan = table_beginscan(uncompressed_chunk_rel, snapshot, 0, &scankey); + scan = table_beginscan(uncompressed_chunk_rel, snapshot, 0, NULL); + /* If scan is using Hypercore, configure the scan to only return + * compressed data */ + hypercore_scan_set_skip_compressed(scan); while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) { @@ -1189,10 +1187,12 @@ fetch_matching_uncompressed_chunk_into_tuplesort(Tuplesortstate *segment_tupleso } snapshot = GetLatestSnapshot(); - /* Let compression TAM know it should only return tuples from the - * non-compressed relation. */ - scankey->sk_flags = SK_NO_COMPRESSED; + scan = table_beginscan(uncompressed_chunk_rel, snapshot, nsegbycols_nonnull, scankey); + /* If scan is using Hypercore, configure the scan to only return + * compressed data */ + hypercore_scan_set_skip_compressed(scan); + TupleTableSlot *slot = table_slot_create(uncompressed_chunk_rel, NULL); while (table_scan_getnextslot(scan, ForwardScanDirection, slot)) diff --git a/tsl/src/hypercore/hypercore_handler.c b/tsl/src/hypercore/hypercore_handler.c index 4b6f240a0ed..a4a59fc2dce 100644 --- a/tsl/src/hypercore/hypercore_handler.c +++ b/tsl/src/hypercore/hypercore_handler.c @@ -62,6 +62,7 @@ #include "compression/compression.h" #include "compression/create.h" #include "debug_assert.h" +#include "extension.h" #include "guc.h" #include "hypercore_handler.h" #include "process_utility.h" @@ -81,6 +82,20 @@ static void convert_to_hypercore_finish(Oid relid); static List *partially_compressed_relids = NIL; /* Relids that needs to have * updated status set at end of * transaction */ +/* + * For COPY TO commands, track the relid of the hypercore + * being copied from. It is needed to filter out compressed data in the COPY + * scan so that pg_dump does not dump compressed data twice: once in + * uncompressed format via the hypercore rel and once in compressed format in + * the internal compressed rel that gets dumped separately. + */ +static Oid hypercore_skip_compressed_data_relid = InvalidOid; + +void +hypercore_skip_compressed_data_for_relation(Oid relid) +{ + hypercore_skip_compressed_data_relid = relid; +} #define HYPERCORE_AM_INFO_SIZE(natts) \ (sizeof(HypercoreInfo) + (sizeof(ColumnCompressionSettings) * (natts))) @@ -167,7 +182,7 @@ static HypercoreInfo * lazy_build_hypercore_info_cache(Relation rel, bool create_chunk_constraints, bool *compressed_relation_created) { - Assert(OidIsValid(rel->rd_id) && !ts_is_hypertable(rel->rd_id)); + Assert(OidIsValid(rel->rd_id) && (!ts_extension_is_loaded() || !ts_is_hypertable(rel->rd_id))); HypercoreInfo *hsinfo; CompressionSettings *settings; @@ -372,6 +387,18 @@ static bool hypercore_getnextslot_noncompressed(HypercoreScanDesc scan, ScanDire static bool hypercore_getnextslot_compressed(HypercoreScanDesc scan, ScanDirection direction, TupleTableSlot *slot); +void +hypercore_scan_set_skip_compressed(TableScanDesc scan) +{ + HypercoreScanDesc hscan; + + if (scan->rs_rd->rd_tableam != hypercore_routine()) + return; + + hscan = (HypercoreScanDesc) scan; + hscan->hs_scan_state = HYPERCORE_SCAN_NON_COMPRESSED; +} + #if PG17_GE static int compute_targrows(Relation rel) @@ -468,6 +495,26 @@ get_scan_type(uint32 flags) } #endif +static inline bool +should_skip_compressed_data(const Relation rel) +{ + /* + * Skip compressed data in a scan if any of these apply: + * + * 1. Transaparent decompression (DecompressChunk) is enabled for + * hypercore. + * + * 2. The scan was started with a flag indicating no compressed data + * should be returned. + * + * 3. A COPY TO on the hypercore is executed and we + * want to ensure such commands issued by pg_dump doesn't lead to + * dumping compressed data twice. + */ + return (ts_guc_enable_transparent_decompression == 2) || + RelationGetRelid(rel) == hypercore_skip_compressed_data_relid; +} + static TableScanDesc hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey keys, ParallelTableScanDesc parallel_scan, uint32 flags) @@ -504,8 +551,7 @@ hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key HypercoreInfo *hsinfo = RelationGetHypercoreInfo(relation); scan->compressed_rel = table_open(hsinfo->compressed_relid, AccessShareLock); - if ((ts_guc_enable_transparent_decompression == 2) || - (keys && keys->sk_flags & SK_NO_COMPRESSED)) + if (should_skip_compressed_data(relation)) { /* * Don't read compressed data if transparent decompression is enabled @@ -514,7 +560,7 @@ hypercore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key * Transparent decompression reads compressed data itself, directly * from the compressed chunk, so avoid reading it again here. */ - scan->hs_scan_state = HYPERCORE_SCAN_NON_COMPRESSED; + hypercore_scan_set_skip_compressed(&scan->rs_base); } initscan(scan, keys, nkeys); @@ -606,6 +652,9 @@ hypercore_endscan(TableScanDesc sscan) pfree(scan->rs_base.rs_key); pfree(scan); + + /* Clear the COPY TO filter state */ + hypercore_skip_compressed_data_relid = InvalidOid; } static bool @@ -3374,6 +3423,7 @@ hypercore_xact_event(XactEvent event, void *arg) Ensure(OidIsValid(hsinfo->compressed_relid), "hypercore \"%s\" has no compressed data relation", get_rel_name(relid)); + Chunk *chunk = ts_chunk_get_by_relid(relid, true); ts_chunk_set_partial(chunk); table_close(rel, NoLock); diff --git a/tsl/src/hypercore/hypercore_handler.h b/tsl/src/hypercore/hypercore_handler.h index 3cc5d5a68fe..91604c9f911 100644 --- a/tsl/src/hypercore/hypercore_handler.h +++ b/tsl/src/hypercore/hypercore_handler.h @@ -13,11 +13,6 @@ #include "hypertable.h" -/* Scan key flag (skey.h) to indicate that a table scan should only return - * tuples from the non-compressed relation. Bits 16-31 are reserved for - * individual access methods, so use bit 16. */ -#define SK_NO_COMPRESSED 0x8000 - extern void hypercore_set_analyze_relid(Oid relid); extern const TableAmRoutine *hypercore_routine(void); extern void hypercore_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Hypertable *ht); @@ -25,6 +20,8 @@ extern void hypercore_alter_access_method_begin(Oid relid, bool to_other_am); extern void hypercore_alter_access_method_finish(Oid relid, bool to_other_am); extern Datum hypercore_handler(PG_FUNCTION_ARGS); extern void hypercore_xact_event(XactEvent event, void *arg); +extern void hypercore_skip_compressed_data_for_relation(Oid relid); +extern void hypercore_scan_set_skip_compressed(TableScanDesc scan); typedef struct ColumnCompressionSettings { diff --git a/tsl/src/nodes/columnar_scan/columnar_scan.c b/tsl/src/nodes/columnar_scan/columnar_scan.c index 1231eeb85d3..d6f7b87e1b0 100644 --- a/tsl/src/nodes/columnar_scan/columnar_scan.c +++ b/tsl/src/nodes/columnar_scan/columnar_scan.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include "columnar_scan.h" #include "compression/arrow_c_data_interface.h" #include "compression/compression.h" +#include "guc.h" #include "hypercore/arrow_tts.h" #include "hypercore/hypercore_handler.h" #include "import/ts_explain.h" @@ -52,6 +54,7 @@ typedef struct ColumnarScanState List *scankey_quals; List *vectorized_quals_orig; SimpleProjInfo sprojinfo; + bool only_scan; } ColumnarScanState; static bool @@ -425,6 +428,10 @@ columnar_scan_exec(CustomScanState *state) cstate->nscankeys, cstate->scankeys); state->ss.ss_currentScanDesc = scandesc; + + if (cstate->only_scan && + (ts_guc_hypercore_copy_to_behavior == HYPERCORE_COPY_NO_COMPRESSED_DATA)) + hypercore_scan_set_skip_compressed(scandesc); } /* @@ -836,6 +843,7 @@ columnar_scan_state_create(CustomScan *cscan) #if PG16_GE cstate->css.slotOps = &TTSOpsArrowTuple; #endif + cstate->only_scan = linitial_int(cscan->custom_private); return (Node *) cstate; } @@ -897,6 +905,8 @@ columnar_scan_plan_create(PlannerInfo *root, RelOptInfo *rel, CustomPath *best_p columnar_scan_plan->methods = &columnar_scan_plan_methods; columnar_scan_plan->scan.scanrelid = rel->relid; + bool only_scan = (rel->reloptkind == RELOPT_BASEREL) && !ts_rte_is_marked_for_expansion(rte); + columnar_scan_plan->custom_private = list_make1_int(only_scan); /* output target list */ columnar_scan_plan->scan.plan.targetlist = tlist; @@ -989,6 +999,15 @@ columnar_scan_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Hypertable *h { ColumnarScanPath *cspath; Relids required_outer; + RangeTblEntry *rte = planner_rt_fetch(rel->relid, root); + + /* If the rel is NOT marked for expansion, it means this is a SELECT * + * FROM ONLY query and it is necessary to always do a ColumnarScan even if + * it is disabled. Only ColumnarScan has the functionality to tell the TAM + * to only return non-compressed data. */ + if (!ts_guc_enable_columnarscan && rel->reloptkind != RELOPT_BASEREL && + !ts_rte_is_marked_for_expansion(rte)) + return; /* * We don't support pushing join clauses into the quals of a seqscan, but diff --git a/tsl/src/planner.c b/tsl/src/planner.c index 7ec6359cb4b..afcc44383e8 100644 --- a/tsl/src/planner.c +++ b/tsl/src/planner.c @@ -161,7 +161,7 @@ tsl_set_rel_pathlist_query(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeT */ else if (ts_is_hypercore_am(chunk->amoid)) { - if (ts_guc_enable_columnarscan) + if (ts_guc_enable_transparent_decompression != 2) columnar_scan_set_rel_pathlist(root, rel, ht); hypercore_set_rel_pathlist(root, rel, ht); diff --git a/tsl/src/process_utility.c b/tsl/src/process_utility.c index bb368f7e047..e1b77152d25 100644 --- a/tsl/src/process_utility.c +++ b/tsl/src/process_utility.c @@ -16,12 +16,84 @@ #include "compression/create.h" #include "continuous_aggs/create.h" +#include "guc.h" #include "hypercore/hypercore_handler.h" #include "hypercore/utils.h" #include "hypertable_cache.h" #include "process_utility.h" #include "ts_catalog/continuous_agg.h" +static DDLResult +process_copy(ProcessUtilityArgs *args) +{ + CopyStmt *stmt = castNode(CopyStmt, args->parsetree); + + if (!stmt->relation || stmt->is_from) + return DDL_CONTINUE; + + Oid relid = RangeVarGetRelid(stmt->relation, NoLock, true); + + if (!OidIsValid(relid)) + return DDL_CONTINUE; + + Oid amoid = ts_get_rel_am(relid); + + if (ts_is_hypercore_am(amoid)) + { + if (ts_guc_hypercore_copy_to_behavior == HYPERCORE_COPY_NO_COMPRESSED_DATA) + { + hypercore_skip_compressed_data_for_relation(relid); + ereport(NOTICE, + (errmsg("COPY: skipping compressed data for hypercore \"%s\"", + get_rel_name(relid)), + errdetail( + "Use timescaledb.hypercore_copy_to_behavior to change this behavior."))); + } + } + else if (ts_guc_hypercore_copy_to_behavior == HYPERCORE_COPY_ALL_DATA) + { + const Chunk *chunk = ts_chunk_get_by_relid(relid, false); + + if (!chunk) + return DDL_CONTINUE; + + const Chunk *parent = ts_chunk_get_compressed_chunk_parent(chunk); + Oid parent_amoid = ts_get_rel_am(parent->table_id); + + if (parent && ts_is_hypercore_am(parent_amoid)) + { + /* To avoid returning compressed data twice in a pg_dump, replace + * the 'COPY TO' with 'COPY (select where false) TO' so + * that the COPY on the internal compressed relation returns no + * data. The data is instead returned in uncompressed form via the + * parent hypercore relation. */ + SelectStmt *select = makeNode(SelectStmt); + A_Const *aconst = makeNode(A_Const); +#if PG15_LT + aconst->val.type = T_Integer; + aconst->val.val.ival = 0; +#else + aconst->val.boolval.boolval = false; + aconst->val.boolval.type = T_Boolean; +#endif + select->whereClause = (Node *) aconst; + stmt->relation = NULL; + stmt->attlist = NIL; + stmt->query = (Node *) select; + ereport(NOTICE, + (errmsg("COPY: skipping data for internal compression relation \"%s\"", + get_rel_name(chunk->table_id)), + errdetail("Use COPY TO on hypercore relation \"%s\" to return data in " + "uncompressed form" + " or use timescaledb.hypercore_copy_to_behavior " + "to change this behavior.", + get_rel_name(parent->table_id)))); + } + } + + return DDL_CONTINUE; +} + DDLResult tsl_ddl_command_start(ProcessUtilityArgs *args) { @@ -98,6 +170,9 @@ tsl_ddl_command_start(ProcessUtilityArgs *args) result = DDL_DONE; break; } + case T_CopyStmt: + result = process_copy(args); + break; default: break; } diff --git a/tsl/test/expected/hypercore_dump_restore.out b/tsl/test/expected/hypercore_dump_restore.out new file mode 100644 index 00000000000..704f964f7a9 --- /dev/null +++ b/tsl/test/expected/hypercore_dump_restore.out @@ -0,0 +1,316 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +select setseed(0.3); + setseed +--------- + +(1 row) + +create table hyperdump (time timestamptz, device int, tempc float, tempf float generated always as (tempc * 2 + 30) stored, status text default 'idle'); +select create_hypertable('hyperdump', by_range('time'), create_default_indexes => false); +NOTICE: adding not-null constraint to column "time" + create_hypertable +------------------- + (1,t) +(1 row) + +insert into hyperdump (time, device, tempc) +select t, ceil(random()*10), random()*60 +from generate_series('2022-06-01'::timestamptz, '2022-07-01', '5m') t; +create index time_device_idx on hyperdump (device, time desc); +alter table hyperdump set ( + timescaledb.compress_orderby='time', + timescaledb.compress_segmentby='device'); +\set TEST_BASE_NAME hypercore_pgdump +SELECT + format('%s/results/%s_results_original.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_ORIGINAL", + format('%s/results/%s_results_restored.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset +-- Save uncompressed table query output as a reference to compare against +\o :TEST_RESULTS_ORIGINAL +select * from hyperdump order by time, device; +\o +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Append + -> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) + -> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) +(5 rows) + +-- Convert to hypercore +select compress_chunk(ch, compress_using=>'hypercore') from show_chunks('hyperdump') ch; + compress_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk + _timescaledb_internal._hyper_1_2_chunk + _timescaledb_internal._hyper_1_3_chunk + _timescaledb_internal._hyper_1_4_chunk + _timescaledb_internal._hyper_1_5_chunk + _timescaledb_internal._hyper_1_6_chunk +(6 rows) + +reindex table hyperdump; +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Append + -> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) + -> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) +(5 rows) + +set timescaledb.hypercore_copy_to_behavior='all_data'; +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset +\d+ :chunk + Table "_timescaledb_internal._hyper_1_3_chunk" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+------------- + time | timestamp with time zone | | not null | | plain | | + device | integer | | | | plain | | + tempc | double precision | | | | plain | | + tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | | + status | text | | | 'idle'::text | extended | | +Indexes: + "_hyper_1_3_chunk_time_device_idx" btree (device, "time" DESC) +Check constraints: + "constraint_3" CHECK ("time" >= 'Wed Jun 08 17:00:00 2022 PDT'::timestamp with time zone AND "time" < 'Wed Jun 15 17:00:00 2022 PDT'::timestamp with time zone) +Inherits: hyperdump + +SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk +FROM _timescaledb_catalog.chunk c1 +INNER JOIN _timescaledb_catalog.chunk c2 +ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset +-- This should not return any data when in this COPY mode. +copy :cchunk to stdout; +NOTICE: COPY: skipping data for internal compression relation "compress_hyper_2_9_chunk" +--- +-- Create a "compressed" dump where only uncompressed data is +-- returned dumped via the TAM relation. The rest of the data is +-- dumped via the internal compressed relation. This is compatible +-- with compression without TAM. +-- +-- When restoring from the compressed dump, it will create hypercore +-- relations that are also compressed. +-- +\c postgres :ROLE_SUPERUSER +\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-compress.sql -ctimescaledb.hypercore_copy_to_behavior='no_compressed_data' +\c :TEST_DBNAME +create extension timescaledb; +select timescaledb_pre_restore(); + timescaledb_pre_restore +------------------------- + t +(1 row) + +\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-compress.sql +select timescaledb_post_restore(); + timescaledb_post_restore +-------------------------- + t +(1 row) + +-- Background workers restarted by post restore, so stop again +select _timescaledb_functions.stop_background_workers(); + stop_background_workers +------------------------- + t +(1 row) + +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER +\o :TEST_RESULTS_RESTORED +select * from hyperdump order by time, device; +\o +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset +\d+ hyperdump + Table "public.hyperdump" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+------------- + time | timestamp with time zone | | not null | | plain | | + device | integer | | | | plain | | + tempc | double precision | | | | plain | | + tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | | + status | text | | | 'idle'::text | extended | | +Indexes: + "time_device_idx" btree (device, "time" DESC) +Triggers: + ts_insert_blocker BEFORE INSERT ON hyperdump FOR EACH ROW EXECUTE FUNCTION _timescaledb_functions.insert_blocker() +Child tables: _timescaledb_internal._hyper_1_1_chunk, + _timescaledb_internal._hyper_1_2_chunk, + _timescaledb_internal._hyper_1_3_chunk, + _timescaledb_internal._hyper_1_4_chunk, + _timescaledb_internal._hyper_1_5_chunk, + _timescaledb_internal._hyper_1_6_chunk + +\d+ :chunk + Table "_timescaledb_internal._hyper_1_3_chunk" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+--------------------------+-----------+----------+---------------------------------------------------------------------------------+----------+--------------+------------- + time | timestamp with time zone | | not null | | plain | | + device | integer | | | | plain | | + tempc | double precision | | | | plain | | + tempf | double precision | | | generated always as (tempc * 2::double precision + 30::double precision) stored | plain | | + status | text | | | 'idle'::text | extended | | +Indexes: + "_hyper_1_3_chunk_time_device_idx" btree (device, "time" DESC) +Check constraints: + "constraint_3" CHECK ("time" >= 'Wed Jun 08 17:00:00 2022 PDT'::timestamp with time zone AND "time" < 'Wed Jun 15 17:00:00 2022 PDT'::timestamp with time zone) +Inherits: hyperdump + +explain (costs off) +select * from hyperdump where time < '2022-06-03'; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Append + -> Custom Scan (ColumnarScan) on _hyper_1_1_chunk + Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone) + -> Custom Scan (ColumnarScan) on _hyper_1_2_chunk + Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone) +(5 rows) + +reindex table hyperdump; +explain (costs off) +select * from hyperdump where time < '2022-06-03'; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Append + -> Custom Scan (ColumnarScan) on _hyper_1_1_chunk + Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone) + -> Custom Scan (ColumnarScan) on _hyper_1_2_chunk + Vectorized Filter: ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone) +(5 rows) + +select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset +-- Original output and restored output should be the same, i.e., no +-- diff +:DIFF_CMD +SELECT format('%s/results/%s_results_restored_2.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset +reindex table hyperdump; +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Append + -> Index Scan using _hyper_1_1_chunk_time_device_idx on _hyper_1_1_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) + -> Index Scan using _hyper_1_2_chunk_time_device_idx on _hyper_1_2_chunk + Index Cond: ((device = 2) AND ("time" < 'Fri Jun 03 00:00:00 2022 PDT'::timestamp with time zone)) +(5 rows) + +--- +-- Create an "uncompressed" dump where _all_ data is dumped via the +-- TAM relation. No data is dumped via the internal compressed +-- relation. This dump is compatible with uncompressed hypertables. +-- +-- When restoring from the uncompressed dump, it will create +-- hypercore relations that are also uncompressed. +-- +\c postgres :ROLE_SUPERUSER +\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-uncompress.sql -ctimescaledb.hypercore_copy_to_behavior='all_data' +\c :TEST_DBNAME +create extension timescaledb; +select timescaledb_pre_restore(); + timescaledb_pre_restore +------------------------- + t +(1 row) + +\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-uncompress.sql +select timescaledb_post_restore(); + timescaledb_post_restore +-------------------------- + t +(1 row) + +-- Background workers restarted by post restore, so stop again +select _timescaledb_functions.stop_background_workers(); + stop_background_workers +------------------------- + t +(1 row) + +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset +select pg_relation_size(:'chunk'); + pg_relation_size +------------------ + 147456 +(1 row) + +-- If restore is OK, and TAM is used, we should see a ColumnarScan +explain (costs off) +select * from hyperdump order by time, device limit 10; + QUERY PLAN +-------------------------------------------------------------------- + Limit + -> Sort + Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device + -> Append + -> Custom Scan (ColumnarScan) on _hyper_1_1_chunk + -> Custom Scan (ColumnarScan) on _hyper_1_2_chunk + -> Custom Scan (ColumnarScan) on _hyper_1_3_chunk + -> Custom Scan (ColumnarScan) on _hyper_1_4_chunk + -> Custom Scan (ColumnarScan) on _hyper_1_5_chunk + -> Custom Scan (ColumnarScan) on _hyper_1_6_chunk +(10 rows) + +-- +-- After restore, the status of the compressed chunks should be +-- partial since, with an uncompressed dump, the restore inserts data +-- via the hypercore relation in uncompressed form. +select c1.table_name, c1.status from _timescaledb_catalog.chunk c1 +join _timescaledb_catalog.chunk c2 on (c1.compressed_chunk_id = c2.id) +order by c2.table_name; + table_name | status +------------------+-------- + _hyper_1_4_chunk | 9 + _hyper_1_5_chunk | 9 + _hyper_1_6_chunk | 9 + _hyper_1_1_chunk | 9 + _hyper_1_2_chunk | 9 + _hyper_1_3_chunk | 9 +(6 rows) + +-- Check that a compressed chunk holds no data +SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk +FROM _timescaledb_catalog.chunk c1 +INNER JOIN _timescaledb_catalog.chunk c2 +ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset +-- Compressed relation should hold no data +select count(*) from :cchunk; + count +------- + 0 +(1 row) + +-- Compress all chunks +select compress_chunk(ch) from show_chunks('hyperdump') ch; + compress_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk + _timescaledb_internal._hyper_1_2_chunk + _timescaledb_internal._hyper_1_3_chunk + _timescaledb_internal._hyper_1_4_chunk + _timescaledb_internal._hyper_1_5_chunk + _timescaledb_internal._hyper_1_6_chunk +(6 rows) + +-- Data should now be compressed +select count(*) from :cchunk; + count +------- + 10 +(1 row) + +-- Output data to a file for comparison with original contents +\o :TEST_RESULTS_RESTORED +select * from hyperdump order by time, device; +\o +select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset +-- Outputs should be the same, i.e., no diff +:DIFF_CMD diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index 6d0fbc412ac..5339ddb1ac6 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -122,6 +122,7 @@ if((${PG_VERSION_MAJOR} GREATER_EQUAL "15")) hypercore_cursor.sql hypercore_ddl.sql hypercore_delete.sql + hypercore_dump_restore.sql hypercore_index_btree.sql hypercore_index_hash.sql hypercore_insert.sql diff --git a/tsl/test/sql/hypercore_dump_restore.sql b/tsl/test/sql/hypercore_dump_restore.sql new file mode 100644 index 00000000000..9bb45ee23d4 --- /dev/null +++ b/tsl/test/sql/hypercore_dump_restore.sql @@ -0,0 +1,161 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +select setseed(0.3); +create table hyperdump (time timestamptz, device int, tempc float, tempf float generated always as (tempc * 2 + 30) stored, status text default 'idle'); +select create_hypertable('hyperdump', by_range('time'), create_default_indexes => false); + +insert into hyperdump (time, device, tempc) +select t, ceil(random()*10), random()*60 +from generate_series('2022-06-01'::timestamptz, '2022-07-01', '5m') t; + +create index time_device_idx on hyperdump (device, time desc); + +alter table hyperdump set ( + timescaledb.compress_orderby='time', + timescaledb.compress_segmentby='device'); + +\set TEST_BASE_NAME hypercore_pgdump + +SELECT + format('%s/results/%s_results_original.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_ORIGINAL", + format('%s/results/%s_results_restored.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset + +-- Save uncompressed table query output as a reference to compare against +\o :TEST_RESULTS_ORIGINAL +select * from hyperdump order by time, device; +\o + +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + +-- Convert to hypercore +select compress_chunk(ch, compress_using=>'hypercore') from show_chunks('hyperdump') ch; + + +reindex table hyperdump; +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + +set timescaledb.hypercore_copy_to_behavior='all_data'; + +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset +\d+ :chunk + +SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk +FROM _timescaledb_catalog.chunk c1 +INNER JOIN _timescaledb_catalog.chunk c2 +ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset + +-- This should not return any data when in this COPY mode. +copy :cchunk to stdout; + +--- +-- Create a "compressed" dump where only uncompressed data is +-- returned dumped via the TAM relation. The rest of the data is +-- dumped via the internal compressed relation. This is compatible +-- with compression without TAM. +-- +-- When restoring from the compressed dump, it will create hypercore +-- relations that are also compressed. +-- +\c postgres :ROLE_SUPERUSER +\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-compress.sql -ctimescaledb.hypercore_copy_to_behavior='no_compressed_data' +\c :TEST_DBNAME +create extension timescaledb; +select timescaledb_pre_restore(); +\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-compress.sql + +select timescaledb_post_restore(); +-- Background workers restarted by post restore, so stop again +select _timescaledb_functions.stop_background_workers(); + +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER + +\o :TEST_RESULTS_RESTORED +select * from hyperdump order by time, device; +\o + +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset +\d+ hyperdump +\d+ :chunk +explain (costs off) +select * from hyperdump where time < '2022-06-03'; +reindex table hyperdump; +explain (costs off) +select * from hyperdump where time < '2022-06-03'; + +select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset + +-- Original output and restored output should be the same, i.e., no +-- diff +:DIFF_CMD + +SELECT format('%s/results/%s_results_restored_2.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_RESTORED" \gset + +reindex table hyperdump; +explain (costs off) +select * from hyperdump where device = 2 and time < '2022-06-03'; + + +--- +-- Create an "uncompressed" dump where _all_ data is dumped via the +-- TAM relation. No data is dumped via the internal compressed +-- relation. This dump is compatible with uncompressed hypertables. +-- +-- When restoring from the uncompressed dump, it will create +-- hypercore relations that are also uncompressed. +-- +\c postgres :ROLE_SUPERUSER +\! utils/pg_dump_aux_dump.sh dump/hypercore-dump-uncompress.sql -ctimescaledb.hypercore_copy_to_behavior='all_data' +\c :TEST_DBNAME +create extension timescaledb; +select timescaledb_pre_restore(); +\! utils/pg_dump_aux_restore.sh dump/hypercore-dump-uncompress.sql + +select timescaledb_post_restore(); +-- Background workers restarted by post restore, so stop again +select _timescaledb_functions.stop_background_workers(); + +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER + +select chunk from show_chunks('hyperdump') chunk offset 2 limit 1 \gset + +select pg_relation_size(:'chunk'); + +-- If restore is OK, and TAM is used, we should see a ColumnarScan +explain (costs off) +select * from hyperdump order by time, device limit 10; + +-- +-- After restore, the status of the compressed chunks should be +-- partial since, with an uncompressed dump, the restore inserts data +-- via the hypercore relation in uncompressed form. +select c1.table_name, c1.status from _timescaledb_catalog.chunk c1 +join _timescaledb_catalog.chunk c2 on (c1.compressed_chunk_id = c2.id) +order by c2.table_name; + +-- Check that a compressed chunk holds no data +SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk +FROM _timescaledb_catalog.chunk c1 +INNER JOIN _timescaledb_catalog.chunk c2 +ON (c1.compressed_chunk_id = c2.id) offset 2 limit 1 \gset + +-- Compressed relation should hold no data +select count(*) from :cchunk; + +-- Compress all chunks +select compress_chunk(ch) from show_chunks('hyperdump') ch; +-- Data should now be compressed +select count(*) from :cchunk; + +-- Output data to a file for comparison with original contents +\o :TEST_RESULTS_RESTORED +select * from hyperdump order by time, device; +\o + +select format('\! diff -u --label "hypercore original" --label "hypercore restored" %s %s', :'TEST_RESULTS_ORIGINAL', :'TEST_RESULTS_RESTORED') as "DIFF_CMD" \gset + +-- Outputs should be the same, i.e., no diff +:DIFF_CMD