diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 1756f1a4b67..b246ddc6341 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -5444,6 +5444,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
the pg_stat_bgwriter
view, archiver to reset all the counters shown in
the pg_stat_archiver view,
+ io to reset all the counters shown in the
+ pg_stat_io view,
wal to reset all the counters shown in the
pg_stat_wal view or
recovery_prefetch to reset all the counters shown
diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile
index a80eda3cf43..7d7482dde02 100644
--- a/src/backend/utils/activity/Makefile
+++ b/src/backend/utils/activity/Makefile
@@ -22,6 +22,7 @@ OBJS = \
pgstat_checkpointer.o \
pgstat_database.o \
pgstat_function.o \
+ pgstat_io.o \
pgstat_relation.o \
pgstat_replslot.o \
pgstat_shmem.o \
diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build
index a2b872c24bf..518ee3f798e 100644
--- a/src/backend/utils/activity/meson.build
+++ b/src/backend/utils/activity/meson.build
@@ -9,6 +9,7 @@ backend_sources += files(
'pgstat_checkpointer.c',
'pgstat_database.c',
'pgstat_function.c',
+ 'pgstat_io.c',
'pgstat_relation.c',
'pgstat_replslot.c',
'pgstat_shmem.c',
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 0fa5370bcd2..60fc4e761f7 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -72,6 +72,7 @@
* - pgstat_checkpointer.c
* - pgstat_database.c
* - pgstat_function.c
+ * - pgstat_io.c
* - pgstat_relation.c
* - pgstat_replslot.c
* - pgstat_slru.c
@@ -359,6 +360,15 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
.snapshot_cb = pgstat_checkpointer_snapshot_cb,
},
+ [PGSTAT_KIND_IO] = {
+ .name = "io",
+
+ .fixed_amount = true,
+
+ .reset_all_cb = pgstat_io_reset_all_cb,
+ .snapshot_cb = pgstat_io_snapshot_cb,
+ },
+
[PGSTAT_KIND_SLRU] = {
.name = "slru",
@@ -582,6 +592,7 @@ pgstat_report_stat(bool force)
/* Don't expend a clock check if nothing to do */
if (dlist_is_empty(&pgStatPending) &&
+ !have_iostats &&
!have_slrustats &&
!pgstat_have_pending_wal())
{
@@ -628,6 +639,9 @@ pgstat_report_stat(bool force)
/* flush database / relation / function / ... stats */
partial_flush |= pgstat_flush_pending_entries(nowait);
+ /* flush IO stats */
+ partial_flush |= pgstat_flush_io(nowait);
+
/* flush wal stats */
partial_flush |= pgstat_flush_wal(nowait);
@@ -1322,6 +1336,12 @@ pgstat_write_statsfile(void)
pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer);
+ /*
+ * Write IO stats struct
+ */
+ pgstat_build_snapshot_fixed(PGSTAT_KIND_IO);
+ write_chunk_s(fpout, &pgStatLocal.snapshot.io);
+
/*
* Write SLRU stats struct
*/
@@ -1496,6 +1516,12 @@ pgstat_read_statsfile(void)
if (!read_chunk_s(fpin, &shmem->checkpointer.stats))
goto error;
+ /*
+ * Read IO stats struct
+ */
+ if (!read_chunk_s(fpin, &shmem->io.stats))
+ goto error;
+
/*
* Read SLRU stats struct
*/
diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c
index 9247f2dda2c..92be384b0d0 100644
--- a/src/backend/utils/activity/pgstat_bgwriter.c
+++ b/src/backend/utils/activity/pgstat_bgwriter.c
@@ -24,7 +24,7 @@ PgStat_BgWriterStats PendingBgWriterStats = {0};
/*
- * Report bgwriter statistics
+ * Report bgwriter and IO statistics
*/
void
pgstat_report_bgwriter(void)
@@ -56,6 +56,11 @@ pgstat_report_bgwriter(void)
* Clear out the statistics buffer, so it can be re-used.
*/
MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats));
+
+ /*
+ * Report IO statistics
+ */
+ pgstat_flush_io(false);
}
/*
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index 3e9ab451033..26dec112f6c 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -24,7 +24,7 @@ PgStat_CheckpointerStats PendingCheckpointerStats = {0};
/*
- * Report checkpointer statistics
+ * Report checkpointer and IO statistics
*/
void
pgstat_report_checkpointer(void)
@@ -62,6 +62,11 @@ pgstat_report_checkpointer(void)
* Clear out the statistics buffer, so it can be re-used.
*/
MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats));
+
+ /*
+ * Report IO statistics
+ */
+ pgstat_flush_io(false);
}
/*
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
new file mode 100644
index 00000000000..0e07e0848d3
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -0,0 +1,391 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_io.c
+ * Implementation of IO statistics.
+ *
+ * This file contains the implementation of IO statistics. It is kept separate
+ * from pgstat.c to enforce the line between the statistics access / storage
+ * implementation and the details about individual types of statistics.
+ *
+ * Copyright (c) 2021-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/activity/pgstat_io.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+
+
+static PgStat_BktypeIO PendingIOStats;
+bool have_iostats = false;
+
+
+/*
+ * Check that stats have not been counted for any combination of IOObject,
+ * IOContext, and IOOp which are not tracked for the passed-in BackendType. The
+ * passed-in PgStat_BktypeIO must contain stats from the BackendType specified
+ * by the second parameter. Caller is responsible for locking the passed-in
+ * PgStat_BktypeIO, if needed.
+ */
+bool
+pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
+ BackendType bktype)
+{
+ bool bktype_tracked = pgstat_tracks_io_bktype(bktype);
+
+ for (IOObject io_object = IOOBJECT_FIRST;
+ io_object < IOOBJECT_NUM_TYPES; io_object++)
+ {
+ for (IOContext io_context = IOCONTEXT_FIRST;
+ io_context < IOCONTEXT_NUM_TYPES; io_context++)
+ {
+ /*
+ * Don't bother trying to skip to the next loop iteration if
+ * pgstat_tracks_io_object() would return false here. We still
+ * need to validate that each counter is zero anyway.
+ */
+ for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++)
+ {
+ /* No stats, so nothing to validate */
+ if (backend_io->data[io_object][io_context][io_op] == 0)
+ continue;
+
+ /* There are stats and there shouldn't be */
+ if (!bktype_tracked ||
+ !pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void
+pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
+{
+ Assert(io_object < IOOBJECT_NUM_TYPES);
+ Assert(io_context < IOCONTEXT_NUM_TYPES);
+ Assert(io_op < IOOP_NUM_TYPES);
+ Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
+
+ PendingIOStats.data[io_object][io_context][io_op]++;
+
+ have_iostats = true;
+}
+
+PgStat_IO *
+pgstat_fetch_stat_io(void)
+{
+ pgstat_snapshot_fixed(PGSTAT_KIND_IO);
+
+ return &pgStatLocal.snapshot.io;
+}
+
+/*
+ * Flush out locally pending IO statistics
+ *
+ * If no stats have been recorded, this function returns false.
+ *
+ * If nowait is true, this function returns true if the lock could not be
+ * acquired. Otherwise, return false.
+ */
+bool
+pgstat_flush_io(bool nowait)
+{
+ LWLock *bktype_lock;
+ PgStat_BktypeIO *bktype_shstats;
+
+ if (!have_iostats)
+ return false;
+
+ bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType];
+ bktype_shstats =
+ &pgStatLocal.shmem->io.stats.stats[MyBackendType];
+
+ if (!nowait)
+ LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
+ else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE))
+ return true;
+
+ for (IOObject io_object = IOOBJECT_FIRST;
+ io_object < IOOBJECT_NUM_TYPES; io_object++)
+ {
+ for (IOContext io_context = IOCONTEXT_FIRST;
+ io_context < IOCONTEXT_NUM_TYPES; io_context++)
+ {
+ for (IOOp io_op = IOOP_FIRST;
+ io_op < IOOP_NUM_TYPES; io_op++)
+ bktype_shstats->data[io_object][io_context][io_op] +=
+ PendingIOStats.data[io_object][io_context][io_op];
+ }
+ }
+
+ Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType));
+
+ LWLockRelease(bktype_lock);
+
+ memset(&PendingIOStats, 0, sizeof(PendingIOStats));
+
+ have_iostats = false;
+
+ return false;
+}
+
+const char *
+pgstat_get_io_context_name(IOContext io_context)
+{
+ switch (io_context)
+ {
+ case IOCONTEXT_BULKREAD:
+ return "bulkread";
+ case IOCONTEXT_BULKWRITE:
+ return "bulkwrite";
+ case IOCONTEXT_NORMAL:
+ return "normal";
+ case IOCONTEXT_VACUUM:
+ return "vacuum";
+ }
+
+ elog(ERROR, "unrecognized IOContext value: %d", io_context);
+ pg_unreachable();
+}
+
+const char *
+pgstat_get_io_object_name(IOObject io_object)
+{
+ switch (io_object)
+ {
+ case IOOBJECT_RELATION:
+ return "relation";
+ case IOOBJECT_TEMP_RELATION:
+ return "temp relation";
+ }
+
+ elog(ERROR, "unrecognized IOObject value: %d", io_object);
+ pg_unreachable();
+}
+
+void
+pgstat_io_reset_all_cb(TimestampTz ts)
+{
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ {
+ LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
+ PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
+
+ LWLockAcquire(bktype_lock, LW_EXCLUSIVE);
+
+ /*
+ * Use the lock in the first BackendType's PgStat_BktypeIO to protect
+ * the reset timestamp as well.
+ */
+ if (i == 0)
+ pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts;
+
+ memset(bktype_shstats, 0, sizeof(*bktype_shstats));
+ LWLockRelease(bktype_lock);
+ }
+}
+
+void
+pgstat_io_snapshot_cb(void)
+{
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ {
+ LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i];
+ PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i];
+ PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i];
+
+ LWLockAcquire(bktype_lock, LW_SHARED);
+
+ /*
+ * Use the lock in the first BackendType's PgStat_BktypeIO to protect
+ * the reset timestamp as well.
+ */
+ if (i == 0)
+ pgStatLocal.snapshot.io.stat_reset_timestamp =
+ pgStatLocal.shmem->io.stats.stat_reset_timestamp;
+
+ /* using struct assignment due to better type safety */
+ *bktype_snap = *bktype_shstats;
+ LWLockRelease(bktype_lock);
+ }
+}
+
+/*
+* IO statistics are not collected for all BackendTypes.
+*
+* The following BackendTypes do not participate in the cumulative stats
+* subsystem or do not perform IO on which we currently track:
+* - Syslogger because it is not connected to shared memory
+* - Archiver because most relevant archiving IO is delegated to a
+* specialized command or module
+* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now
+*
+* Function returns true if BackendType participates in the cumulative stats
+* subsystem for IO and false if it does not.
+*
+* When adding a new BackendType, also consider adding relevant restrictions to
+* pgstat_tracks_io_object() and pgstat_tracks_io_op().
+*/
+bool
+pgstat_tracks_io_bktype(BackendType bktype)
+{
+ /*
+ * List every type so that new backend types trigger a warning about
+ * needing to adjust this switch.
+ */
+ switch (bktype)
+ {
+ case B_INVALID:
+ case B_ARCHIVER:
+ case B_LOGGER:
+ case B_WAL_RECEIVER:
+ case B_WAL_WRITER:
+ return false;
+
+ case B_AUTOVAC_LAUNCHER:
+ case B_AUTOVAC_WORKER:
+ case B_BACKEND:
+ case B_BG_WORKER:
+ case B_BG_WRITER:
+ case B_CHECKPOINTER:
+ case B_STANDALONE_BACKEND:
+ case B_STARTUP:
+ case B_WAL_SENDER:
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Some BackendTypes do not perform IO on certain IOObjects or in certain
+ * IOContexts. Some IOObjects are never operated on in some IOContexts. Check
+ * that the given BackendType is expected to do IO in the given IOContext and
+ * on the given IOObject and that the given IOObject is expected to be operated
+ * on in the given IOContext.
+ */
+bool
+pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
+ IOContext io_context)
+{
+ bool no_temp_rel;
+
+ /*
+ * Some BackendTypes should never track IO statistics.
+ */
+ if (!pgstat_tracks_io_bktype(bktype))
+ return false;
+
+ /*
+ * Currently, IO on temporary relations can only occur in the
+ * IOCONTEXT_NORMAL IOContext.
+ */
+ if (io_context != IOCONTEXT_NORMAL &&
+ io_object == IOOBJECT_TEMP_RELATION)
+ return false;
+
+ /*
+ * In core Postgres, only regular backends and WAL Sender processes
+ * executing queries will use local buffers and operate on temporary
+ * relations. Parallel workers will not use local buffers (see
+ * InitLocalBuffers()); however, extensions leveraging background workers
+ * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for
+ * BackendType B_BG_WORKER.
+ */
+ no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
+ bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
+ bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
+
+ if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
+ io_object == IOOBJECT_TEMP_RELATION)
+ return false;
+
+ /*
+ * Some BackendTypes do not currently perform any IO in certain
+ * IOContexts, and, while it may not be inherently incorrect for them to
+ * do so, excluding those rows from the view makes the view easier to use.
+ */
+ if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) &&
+ (io_context == IOCONTEXT_BULKREAD ||
+ io_context == IOCONTEXT_BULKWRITE ||
+ io_context == IOCONTEXT_VACUUM))
+ return false;
+
+ if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM)
+ return false;
+
+ if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) &&
+ io_context == IOCONTEXT_BULKWRITE)
+ return false;
+
+ return true;
+}
+
+/*
+ * Some BackendTypes will never do certain IOOps and some IOOps should not
+ * occur in certain IOContexts or on certain IOObjects. Check that the given
+ * IOOp is valid for the given BackendType in the given IOContext and on the
+ * given IOObject. Note that there are currently no cases of an IOOp being
+ * invalid for a particular BackendType only within a certain IOContext and/or
+ * only on a certain IOObject.
+ */
+bool
+pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
+ IOContext io_context, IOOp io_op)
+{
+ bool strategy_io_context;
+
+ /* if (io_context, io_object) will never collect stats, we're done */
+ if (!pgstat_tracks_io_object(bktype, io_object, io_context))
+ return false;
+
+ /*
+ * Some BackendTypes will not do certain IOOps.
+ */
+ if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
+ (io_op == IOOP_READ || io_op == IOOP_EVICT))
+ return false;
+
+ if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
+ bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
+ return false;
+
+ /*
+ * Some IOOps are not valid in certain IOContexts and some IOOps are only
+ * valid in certain contexts.
+ */
+ if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND)
+ return false;
+
+ strategy_io_context = io_context == IOCONTEXT_BULKREAD ||
+ io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM;
+
+ /*
+ * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use.
+ */
+ if (!strategy_io_context && io_op == IOOP_REUSE)
+ return false;
+
+ /*
+ * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
+ * counted in the IOCONTEXT_NORMAL IOContext. See comment in
+ * register_dirty_segment() for more details.
+ */
+ if (strategy_io_context && io_op == IOOP_FSYNC)
+ return false;
+
+ /*
+ * Temporary tables are not logged and thus do not require fsync'ing.
+ */
+ if (io_context == IOCONTEXT_NORMAL &&
+ io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC)
+ return false;
+
+ return true;
+}
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index 2e20b93c202..f793ac15165 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -206,7 +206,7 @@ pgstat_drop_relation(Relation rel)
}
/*
- * Report that the table was just vacuumed.
+ * Report that the table was just vacuumed and flush IO statistics.
*/
void
pgstat_report_vacuum(Oid tableoid, bool shared,
@@ -258,10 +258,18 @@ pgstat_report_vacuum(Oid tableoid, bool shared,
}
pgstat_unlock_entry(entry_ref);
+
+ /*
+ * Flush IO statistics now. pgstat_report_stat() will flush IO stats,
+ * however this will not be called until after an entire autovacuum cycle
+ * is done -- which will likely vacuum many relations -- or until the
+ * VACUUM command has processed all tables and committed.
+ */
+ pgstat_flush_io(false);
}
/*
- * Report that the table was just analyzed.
+ * Report that the table was just analyzed and flush IO statistics.
*
* Caller must provide new live- and dead-tuples estimates, as well as a
* flag indicating whether to reset the mod_since_analyze counter.
@@ -341,6 +349,9 @@ pgstat_report_analyze(Relation rel,
}
pgstat_unlock_entry(entry_ref);
+
+ /* see pgstat_report_vacuum() */
+ pgstat_flush_io(false);
}
/*
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index c1506b53d08..09fffd0e82a 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -202,6 +202,10 @@ StatsShmemInit(void)
LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
+
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ LWLockInitialize(&ctl->io.locks[i],
+ LWTRANCHE_PGSTATS_DATA);
}
else
{
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
index e7a82b5feda..e8598b2f4e0 100644
--- a/src/backend/utils/activity/pgstat_wal.c
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -34,7 +34,7 @@ static WalUsage prevWalUsage;
/*
* Calculate how much WAL usage counters have increased and update
- * shared statistics.
+ * shared WAL and IO statistics.
*
* Must be called by processes that generate WAL, that do not call
* pgstat_report_stat(), like walwriter.
@@ -43,6 +43,8 @@ void
pgstat_report_wal(bool force)
{
pgstat_flush_wal(force);
+
+ pgstat_flush_io(force);
}
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 67374934022..924698e6ae4 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1587,7 +1587,12 @@ pg_stat_reset(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
-/* Reset some shared cluster-wide counters */
+/*
+ * Reset some shared cluster-wide counters
+ *
+ * When adding a new reset target, ideally the name should match that in
+ * pgstat_kind_infos, if relevant.
+ */
Datum
pg_stat_reset_shared(PG_FUNCTION_ARGS)
{
@@ -1604,6 +1609,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS)
pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER);
pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER);
}
+ else if (strcmp(target, "io") == 0)
+ pgstat_reset_of_kind(PGSTAT_KIND_IO);
else if (strcmp(target, "recovery_prefetch") == 0)
XLogPrefetchResetStats();
else if (strcmp(target, "wal") == 0)
@@ -1612,7 +1619,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized reset target: \"%s\"", target),
- errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\".")));
+ errhint("Target must be \"archiver\", \"bgwriter\", \"io\", \"recovery_prefetch\", or \"wal\".")));
PG_RETURN_VOID();
}
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 96b3a1e1a07..c309e0233d6 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -332,6 +332,8 @@ typedef enum BackendType
B_WAL_WRITER,
} BackendType;
+#define BACKEND_NUM_TYPES (B_WAL_WRITER + 1)
+
extern PGDLLIMPORT BackendType MyBackendType;
extern const char *GetBackendTypeDesc(BackendType backendType);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5e3326a3b91..db9675884f3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -48,6 +48,7 @@ typedef enum PgStat_Kind
PGSTAT_KIND_ARCHIVER,
PGSTAT_KIND_BGWRITER,
PGSTAT_KIND_CHECKPOINTER,
+ PGSTAT_KIND_IO,
PGSTAT_KIND_SLRU,
PGSTAT_KIND_WAL,
} PgStat_Kind;
@@ -242,7 +243,7 @@ typedef struct PgStat_TableXactStatus
* ------------------------------------------------------------
*/
-#define PGSTAT_FILE_FORMAT_ID 0x01A5BCA9
+#define PGSTAT_FILE_FORMAT_ID 0x01A5BCAA
typedef struct PgStat_ArchiverStats
{
@@ -276,6 +277,55 @@ typedef struct PgStat_CheckpointerStats
PgStat_Counter buf_fsync_backend;
} PgStat_CheckpointerStats;
+
+/*
+ * Types related to counting IO operations
+ */
+typedef enum IOObject
+{
+ IOOBJECT_RELATION,
+ IOOBJECT_TEMP_RELATION,
+} IOObject;
+
+#define IOOBJECT_FIRST IOOBJECT_RELATION
+#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1)
+
+typedef enum IOContext
+{
+ IOCONTEXT_BULKREAD,
+ IOCONTEXT_BULKWRITE,
+ IOCONTEXT_NORMAL,
+ IOCONTEXT_VACUUM,
+} IOContext;
+
+#define IOCONTEXT_FIRST IOCONTEXT_BULKREAD
+#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1)
+
+typedef enum IOOp
+{
+ IOOP_EVICT,
+ IOOP_EXTEND,
+ IOOP_FSYNC,
+ IOOP_READ,
+ IOOP_REUSE,
+ IOOP_WRITE,
+} IOOp;
+
+#define IOOP_FIRST IOOP_EVICT
+#define IOOP_NUM_TYPES (IOOP_WRITE + 1)
+
+typedef struct PgStat_BktypeIO
+{
+ PgStat_Counter data[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+} PgStat_BktypeIO;
+
+typedef struct PgStat_IO
+{
+ TimestampTz stat_reset_timestamp;
+ PgStat_BktypeIO stats[BACKEND_NUM_TYPES];
+} PgStat_IO;
+
+
typedef struct PgStat_StatDBEntry
{
PgStat_Counter xact_commit;
@@ -453,6 +503,24 @@ extern void pgstat_report_checkpointer(void);
extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
+/*
+ * Functions in pgstat_io.c
+ */
+
+extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops,
+ BackendType bktype);
+extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
+extern PgStat_IO *pgstat_fetch_stat_io(void);
+extern const char *pgstat_get_io_context_name(IOContext io_context);
+extern const char *pgstat_get_io_object_name(IOObject io_object);
+
+extern bool pgstat_tracks_io_bktype(BackendType bktype);
+extern bool pgstat_tracks_io_object(BackendType bktype,
+ IOObject io_object, IOContext io_context);
+extern bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
+ IOContext io_context, IOOp io_op);
+
+
/*
* Functions in pgstat_database.c
*/
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 12fd51f1ae3..6badb2fde4b 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -329,6 +329,17 @@ typedef struct PgStatShared_Checkpointer
PgStat_CheckpointerStats reset_offset;
} PgStatShared_Checkpointer;
+/* Shared-memory ready PgStat_IO */
+typedef struct PgStatShared_IO
+{
+ /*
+ * locks[i] protects stats.stats[i]. locks[0] also protects
+ * stats.stat_reset_timestamp.
+ */
+ LWLock locks[BACKEND_NUM_TYPES];
+ PgStat_IO stats;
+} PgStatShared_IO;
+
typedef struct PgStatShared_SLRU
{
/* lock protects ->stats */
@@ -419,6 +430,7 @@ typedef struct PgStat_ShmemControl
PgStatShared_Archiver archiver;
PgStatShared_BgWriter bgwriter;
PgStatShared_Checkpointer checkpointer;
+ PgStatShared_IO io;
PgStatShared_SLRU slru;
PgStatShared_Wal wal;
} PgStat_ShmemControl;
@@ -442,6 +454,8 @@ typedef struct PgStat_Snapshot
PgStat_CheckpointerStats checkpointer;
+ PgStat_IO io;
+
PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS];
PgStat_WalStats wal;
@@ -549,6 +563,15 @@ extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, Time
extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+/*
+ * Functions in pgstat_io.c
+ */
+
+extern bool pgstat_flush_io(bool nowait);
+extern void pgstat_io_reset_all_cb(TimestampTz ts);
+extern void pgstat_io_snapshot_cb(void);
+
+
/*
* Functions in pgstat_relation.c
*/
@@ -643,6 +666,13 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid)
extern PGDLLIMPORT PgStat_LocalState pgStatLocal;
+/*
+ * Variables in pgstat_io.c
+ */
+
+extern PGDLLIMPORT bool have_iostats;
+
+
/*
* Variables in pgstat_slru.c
*/
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index d3224dfc36e..36d1dc01177 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1108,7 +1108,10 @@ ID
INFIX
INT128
INTERFACE_INFO
+IOContext
IOFuncSelector
+IOObject
+IOOp
IPCompareMethod
ITEM
IV
@@ -2017,6 +2020,7 @@ PgStatShared_Common
PgStatShared_Database
PgStatShared_Function
PgStatShared_HashEntry
+PgStatShared_IO
PgStatShared_Relation
PgStatShared_ReplSlot
PgStatShared_SLRU
@@ -2026,6 +2030,7 @@ PgStat_ArchiverStats
PgStat_BackendFunctionEntry
PgStat_BackendSubEntry
PgStat_BgWriterStats
+PgStat_BktypeIO
PgStat_CheckpointerStats
PgStat_Counter
PgStat_EntryRef
@@ -2034,6 +2039,7 @@ PgStat_FetchConsistency
PgStat_FunctionCallUsage
PgStat_FunctionCounts
PgStat_HashKey
+PgStat_IO
PgStat_Kind
PgStat_KindInfo
PgStat_LocalState