diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 1756f1a4b67..b246ddc6341 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5444,6 +5444,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i the pg_stat_bgwriter view, archiver to reset all the counters shown in the pg_stat_archiver view, + io to reset all the counters shown in the + pg_stat_io view, wal to reset all the counters shown in the pg_stat_wal view or recovery_prefetch to reset all the counters shown diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a80eda3cf43..7d7482dde02 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index a2b872c24bf..518ee3f798e 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -9,6 +9,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 0fa5370bcd2..60fc4e761f7 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -72,6 +72,7 @@ * - pgstat_checkpointer.c * - pgstat_database.c * - pgstat_function.c + * - pgstat_io.c * - pgstat_relation.c * - pgstat_replslot.c * - pgstat_slru.c @@ -359,6 +360,15 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { .snapshot_cb = pgstat_checkpointer_snapshot_cb, }, + [PGSTAT_KIND_IO] = { + .name = "io", + + .fixed_amount = true, + + .reset_all_cb = pgstat_io_reset_all_cb, + .snapshot_cb = pgstat_io_snapshot_cb, + }, + [PGSTAT_KIND_SLRU] = { .name = "slru", @@ -582,6 +592,7 @@ pgstat_report_stat(bool force) /* Don't expend a clock check if nothing to do */ if (dlist_is_empty(&pgStatPending) && + !have_iostats && !have_slrustats && !pgstat_have_pending_wal()) { @@ -628,6 +639,9 @@ pgstat_report_stat(bool force) /* flush database / relation / function / ... stats */ partial_flush |= pgstat_flush_pending_entries(nowait); + /* flush IO stats */ + partial_flush |= pgstat_flush_io(nowait); + /* flush wal stats */ partial_flush |= pgstat_flush_wal(nowait); @@ -1322,6 +1336,12 @@ pgstat_write_statsfile(void) pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER); write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer); + /* + * Write IO stats struct + */ + pgstat_build_snapshot_fixed(PGSTAT_KIND_IO); + write_chunk_s(fpout, &pgStatLocal.snapshot.io); + /* * Write SLRU stats struct */ @@ -1496,6 +1516,12 @@ pgstat_read_statsfile(void) if (!read_chunk_s(fpin, &shmem->checkpointer.stats)) goto error; + /* + * Read IO stats struct + */ + if (!read_chunk_s(fpin, &shmem->io.stats)) + goto error; + /* * Read SLRU stats struct */ diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c index 9247f2dda2c..92be384b0d0 100644 --- a/src/backend/utils/activity/pgstat_bgwriter.c +++ b/src/backend/utils/activity/pgstat_bgwriter.c @@ -24,7 +24,7 @@ PgStat_BgWriterStats PendingBgWriterStats = {0}; /* - * Report bgwriter statistics + * Report bgwriter and IO statistics */ void pgstat_report_bgwriter(void) @@ -56,6 +56,11 @@ pgstat_report_bgwriter(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index 3e9ab451033..26dec112f6c 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -24,7 +24,7 @@ PgStat_CheckpointerStats PendingCheckpointerStats = {0}; /* - * Report checkpointer statistics + * Report checkpointer and IO statistics */ void pgstat_report_checkpointer(void) @@ -62,6 +62,11 @@ pgstat_report_checkpointer(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c new file mode 100644 index 00000000000..0e07e0848d3 --- /dev/null +++ b/src/backend/utils/activity/pgstat_io.c @@ -0,0 +1,391 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io.c + * Implementation of IO statistics. + * + * This file contains the implementation of IO statistics. It is kept separate + * from pgstat.c to enforce the line between the statistics access / storage + * implementation and the details about individual types of statistics. + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + + +static PgStat_BktypeIO PendingIOStats; +bool have_iostats = false; + + +/* + * Check that stats have not been counted for any combination of IOObject, + * IOContext, and IOOp which are not tracked for the passed-in BackendType. The + * passed-in PgStat_BktypeIO must contain stats from the BackendType specified + * by the second parameter. Caller is responsible for locking the passed-in + * PgStat_BktypeIO, if needed. + */ +bool +pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, + BackendType bktype) +{ + bool bktype_tracked = pgstat_tracks_io_bktype(bktype); + + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + /* + * Don't bother trying to skip to the next loop iteration if + * pgstat_tracks_io_object() would return false here. We still + * need to validate that each counter is zero anyway. + */ + for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++) + { + /* No stats, so nothing to validate */ + if (backend_io->data[io_object][io_context][io_op] == 0) + continue; + + /* There are stats and there shouldn't be */ + if (!bktype_tracked || + !pgstat_tracks_io_op(bktype, io_object, io_context, io_op)) + return false; + } + } + } + + return true; +} + +void +pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) +{ + Assert(io_object < IOOBJECT_NUM_TYPES); + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + PendingIOStats.data[io_object][io_context][io_op]++; + + have_iostats = true; +} + +PgStat_IO * +pgstat_fetch_stat_io(void) +{ + pgstat_snapshot_fixed(PGSTAT_KIND_IO); + + return &pgStatLocal.snapshot.io; +} + +/* + * Flush out locally pending IO statistics + * + * If no stats have been recorded, this function returns false. + * + * If nowait is true, this function returns true if the lock could not be + * acquired. Otherwise, return false. + */ +bool +pgstat_flush_io(bool nowait) +{ + LWLock *bktype_lock; + PgStat_BktypeIO *bktype_shstats; + + if (!have_iostats) + return false; + + bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType]; + bktype_shstats = + &pgStatLocal.shmem->io.stats.stats[MyBackendType]; + + if (!nowait) + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE)) + return true; + + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + for (IOOp io_op = IOOP_FIRST; + io_op < IOOP_NUM_TYPES; io_op++) + bktype_shstats->data[io_object][io_context][io_op] += + PendingIOStats.data[io_object][io_context][io_op]; + } + } + + Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType)); + + LWLockRelease(bktype_lock); + + memset(&PendingIOStats, 0, sizeof(PendingIOStats)); + + have_iostats = false; + + return false; +} + +const char * +pgstat_get_io_context_name(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_NORMAL: + return "normal"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); + pg_unreachable(); +} + +const char * +pgstat_get_io_object_name(IOObject io_object) +{ + switch (io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); + pg_unreachable(); +} + +void +pgstat_io_reset_all_cb(TimestampTz ts) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + + /* + * Use the lock in the first BackendType's PgStat_BktypeIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts; + + memset(bktype_shstats, 0, sizeof(*bktype_shstats)); + LWLockRelease(bktype_lock); + } +} + +void +pgstat_io_snapshot_cb(void) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i]; + + LWLockAcquire(bktype_lock, LW_SHARED); + + /* + * Use the lock in the first BackendType's PgStat_BktypeIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.snapshot.io.stat_reset_timestamp = + pgStatLocal.shmem->io.stats.stat_reset_timestamp; + + /* using struct assignment due to better type safety */ + *bktype_snap = *bktype_shstats; + LWLockRelease(bktype_lock); + } +} + +/* +* IO statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not perform IO on which we currently track: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO and false if it does not. +* +* When adding a new BackendType, also consider adding relevant restrictions to +* pgstat_tracks_io_object() and pgstat_tracks_io_op(). +*/ +bool +pgstat_tracks_io_bktype(BackendType bktype) +{ + /* + * List every type so that new backend types trigger a warning about + * needing to adjust this switch. + */ + switch (bktype) + { + case B_INVALID: + case B_ARCHIVER: + case B_LOGGER: + case B_WAL_RECEIVER: + case B_WAL_WRITER: + return false; + + case B_AUTOVAC_LAUNCHER: + case B_AUTOVAC_WORKER: + case B_BACKEND: + case B_BG_WORKER: + case B_BG_WRITER: + case B_CHECKPOINTER: + case B_STANDALONE_BACKEND: + case B_STARTUP: + case B_WAL_SENDER: + return true; + } + + return false; +} + +/* + * Some BackendTypes do not perform IO on certain IOObjects or in certain + * IOContexts. Some IOObjects are never operated on in some IOContexts. Check + * that the given BackendType is expected to do IO in the given IOContext and + * on the given IOObject and that the given IOObject is expected to be operated + * on in the given IOContext. + */ +bool +pgstat_tracks_io_object(BackendType bktype, IOObject io_object, + IOContext io_context) +{ + bool no_temp_rel; + + /* + * Some BackendTypes should never track IO statistics. + */ + if (!pgstat_tracks_io_bktype(bktype)) + return false; + + /* + * Currently, IO on temporary relations can only occur in the + * IOCONTEXT_NORMAL IOContext. + */ + if (io_context != IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for + * BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || + bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || + io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts or on certain IOObjects. Check that the given + * IOOp is valid for the given BackendType in the given IOContext and on the + * given IOObject. Note that there are currently no cases of an IOOp being + * invalid for a particular BackendType only within a certain IOContext and/or + * only on a certain IOObject. + */ +bool +pgstat_tracks_io_op(BackendType bktype, IOObject io_object, + IOContext io_context, IOOp io_op) +{ + bool strategy_io_context; + + /* if (io_context, io_object) will never collect stats, we're done */ + if (!pgstat_tracks_io_object(bktype, io_object, io_context)) + return false; + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_NORMAL IOContext. See comment in + * register_dirty_segment() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2e20b93c202..f793ac15165 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -206,7 +206,7 @@ pgstat_drop_relation(Relation rel) } /* - * Report that the table was just vacuumed. + * Report that the table was just vacuumed and flush IO statistics. */ void pgstat_report_vacuum(Oid tableoid, bool shared, @@ -258,10 +258,18 @@ pgstat_report_vacuum(Oid tableoid, bool shared, } pgstat_unlock_entry(entry_ref); + + /* + * Flush IO statistics now. pgstat_report_stat() will flush IO stats, + * however this will not be called until after an entire autovacuum cycle + * is done -- which will likely vacuum many relations -- or until the + * VACUUM command has processed all tables and committed. + */ + pgstat_flush_io(false); } /* - * Report that the table was just analyzed. + * Report that the table was just analyzed and flush IO statistics. * * Caller must provide new live- and dead-tuples estimates, as well as a * flag indicating whether to reset the mod_since_analyze counter. @@ -341,6 +349,9 @@ pgstat_report_analyze(Relation rel, } pgstat_unlock_entry(entry_ref); + + /* see pgstat_report_vacuum() */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index c1506b53d08..09fffd0e82a 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -202,6 +202,10 @@ StatsShmemInit(void) LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA); + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + LWLockInitialize(&ctl->io.locks[i], + LWTRANCHE_PGSTATS_DATA); } else { diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index e7a82b5feda..e8598b2f4e0 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -34,7 +34,7 @@ static WalUsage prevWalUsage; /* * Calculate how much WAL usage counters have increased and update - * shared statistics. + * shared WAL and IO statistics. * * Must be called by processes that generate WAL, that do not call * pgstat_report_stat(), like walwriter. @@ -43,6 +43,8 @@ void pgstat_report_wal(bool force) { pgstat_flush_wal(force); + + pgstat_flush_io(force); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 67374934022..924698e6ae4 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1587,7 +1587,12 @@ pg_stat_reset(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -/* Reset some shared cluster-wide counters */ +/* + * Reset some shared cluster-wide counters + * + * When adding a new reset target, ideally the name should match that in + * pgstat_kind_infos, if relevant. + */ Datum pg_stat_reset_shared(PG_FUNCTION_ARGS) { @@ -1604,6 +1609,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER); pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER); } + else if (strcmp(target, "io") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_IO); else if (strcmp(target, "recovery_prefetch") == 0) XLogPrefetchResetStats(); else if (strcmp(target, "wal") == 0) @@ -1612,7 +1619,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized reset target: \"%s\"", target), - errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); + errhint("Target must be \"archiver\", \"bgwriter\", \"io\", \"recovery_prefetch\", or \"wal\"."))); PG_RETURN_VOID(); } diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 96b3a1e1a07..c309e0233d6 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -332,6 +332,8 @@ typedef enum BackendType B_WAL_WRITER, } BackendType; +#define BACKEND_NUM_TYPES (B_WAL_WRITER + 1) + extern PGDLLIMPORT BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5e3326a3b91..db9675884f3 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -48,6 +48,7 @@ typedef enum PgStat_Kind PGSTAT_KIND_ARCHIVER, PGSTAT_KIND_BGWRITER, PGSTAT_KIND_CHECKPOINTER, + PGSTAT_KIND_IO, PGSTAT_KIND_SLRU, PGSTAT_KIND_WAL, } PgStat_Kind; @@ -242,7 +243,7 @@ typedef struct PgStat_TableXactStatus * ------------------------------------------------------------ */ -#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA9 +#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAA typedef struct PgStat_ArchiverStats { @@ -276,6 +277,55 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; + +/* + * Types related to counting IO operations + */ +typedef enum IOObject +{ + IOOBJECT_RELATION, + IOOBJECT_TEMP_RELATION, +} IOObject; + +#define IOOBJECT_FIRST IOOBJECT_RELATION +#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1) + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD, + IOCONTEXT_BULKWRITE, + IOCONTEXT_NORMAL, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_FIRST IOCONTEXT_BULKREAD +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef enum IOOp +{ + IOOP_EVICT, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_FIRST IOOP_EVICT +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef struct PgStat_BktypeIO +{ + PgStat_Counter data[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; +} PgStat_BktypeIO; + +typedef struct PgStat_IO +{ + TimestampTz stat_reset_timestamp; + PgStat_BktypeIO stats[BACKEND_NUM_TYPES]; +} PgStat_IO; + + typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -453,6 +503,24 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io.c + */ + +extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops, + BackendType bktype); +extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op); +extern PgStat_IO *pgstat_fetch_stat_io(void); +extern const char *pgstat_get_io_context_name(IOContext io_context); +extern const char *pgstat_get_io_object_name(IOObject io_object); + +extern bool pgstat_tracks_io_bktype(BackendType bktype); +extern bool pgstat_tracks_io_object(BackendType bktype, + IOObject io_object, IOContext io_context); +extern bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object, + IOContext io_context, IOOp io_op); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 12fd51f1ae3..6badb2fde4b 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -329,6 +329,17 @@ typedef struct PgStatShared_Checkpointer PgStat_CheckpointerStats reset_offset; } PgStatShared_Checkpointer; +/* Shared-memory ready PgStat_IO */ +typedef struct PgStatShared_IO +{ + /* + * locks[i] protects stats.stats[i]. locks[0] also protects + * stats.stat_reset_timestamp. + */ + LWLock locks[BACKEND_NUM_TYPES]; + PgStat_IO stats; +} PgStatShared_IO; + typedef struct PgStatShared_SLRU { /* lock protects ->stats */ @@ -419,6 +430,7 @@ typedef struct PgStat_ShmemControl PgStatShared_Archiver archiver; PgStatShared_BgWriter bgwriter; PgStatShared_Checkpointer checkpointer; + PgStatShared_IO io; PgStatShared_SLRU slru; PgStatShared_Wal wal; } PgStat_ShmemControl; @@ -442,6 +454,8 @@ typedef struct PgStat_Snapshot PgStat_CheckpointerStats checkpointer; + PgStat_IO io; + PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS]; PgStat_WalStats wal; @@ -549,6 +563,15 @@ extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, Time extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +/* + * Functions in pgstat_io.c + */ + +extern bool pgstat_flush_io(bool nowait); +extern void pgstat_io_reset_all_cb(TimestampTz ts); +extern void pgstat_io_snapshot_cb(void); + + /* * Functions in pgstat_relation.c */ @@ -643,6 +666,13 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid) extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Variables in pgstat_io.c + */ + +extern PGDLLIMPORT bool have_iostats; + + /* * Variables in pgstat_slru.c */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d3224dfc36e..36d1dc01177 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1108,7 +1108,10 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOObject +IOOp IPCompareMethod ITEM IV @@ -2017,6 +2020,7 @@ PgStatShared_Common PgStatShared_Database PgStatShared_Function PgStatShared_HashEntry +PgStatShared_IO PgStatShared_Relation PgStatShared_ReplSlot PgStatShared_SLRU @@ -2026,6 +2030,7 @@ PgStat_ArchiverStats PgStat_BackendFunctionEntry PgStat_BackendSubEntry PgStat_BgWriterStats +PgStat_BktypeIO PgStat_CheckpointerStats PgStat_Counter PgStat_EntryRef @@ -2034,6 +2039,7 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IO PgStat_Kind PgStat_KindInfo PgStat_LocalState