diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b246ddc6341..dca50707ad4 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -469,6 +469,16 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
+
+ pg_stat_iopg_stat_io
+
+ One row for each combination of backend type, context, and target object
+ containing cluster-wide I/O statistics.
+ See
+ pg_stat_io for details.
+
+
+
pg_stat_replication_slotspg_stat_replication_slotsOne row per replication slot, showing statistics about the
@@ -665,20 +675,16 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
- The pg_statio_ views are primarily useful to
- determine the effectiveness of the buffer cache. When the number
- of actual disk reads is much smaller than the number of buffer
- hits, then the cache is satisfying most read requests without
- invoking a kernel call. However, these statistics do not give the
- entire story: due to the way in which PostgreSQL
- handles disk I/O, data that is not in the
- PostgreSQL buffer cache might still reside in the
- kernel's I/O cache, and might therefore still be fetched without
- requiring a physical read. Users interested in obtaining more
- detailed information on PostgreSQL I/O behavior are
- advised to use the PostgreSQL statistics views
- in combination with operating system utilities that allow insight
- into the kernel's handling of I/O.
+ The pg_stat_io and
+ pg_statio_ set of views are useful for determining
+ the effectiveness of the buffer cache. They can be used to calculate a cache
+ hit ratio. Note that while PostgreSQL's I/O
+ statistics capture most instances in which the kernel was invoked in order
+ to perform I/O, they do not differentiate between data which had to be
+ fetched from disk and that which already resided in the kernel page cache.
+ Users are advised to use the PostgreSQL
+ statistics views in combination with operating system utilities for a more
+ complete picture of their database's I/O performance.
@@ -3669,6 +3675,293 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
last_archived_wal have also been successfully
archived.
+
+
+
+ pg_stat_io
+
+
+ pg_stat_io
+
+
+
+ The pg_stat_io view will contain one row for each
+ combination of backend type, target I/O object, and I/O context, showing
+ cluster-wide I/O statistics. Combinations which do not make sense are
+ omitted.
+
+
+
+ Currently, I/O on relations (e.g. tables, indexes) is tracked. However,
+ relation I/O which bypasses shared buffers (e.g. when moving a table from one
+ tablespace to another) is currently not tracked.
+
+
+
+ pg_stat_io View
+
+
+
+
+
+ Column Type
+
+
+ Description
+
+
+
+
+
+
+
+
+ backend_typetext
+
+
+ Type of backend (e.g. background worker, autovacuum worker). See
+ pg_stat_activity for more information
+ on backend_types. Some
+ backend_types do not accumulate I/O operation
+ statistics and will not be included in the view.
+
+
+
+
+
+
+
+ io_objecttext
+
+
+ Target object of an I/O operation. Possible values are:
+
+
+
+ relation: Permanent relations.
+
+
+
+
+ temp relation: Temporary relations.
+
+
+
+
+
+
+
+
+
+
+ io_contexttext
+
+
+ The context of an I/O operation. Possible values are:
+
+
+
+
+ normal: The default or standard
+ io_context for a type of I/O operation. For
+ example, by default, relation data is read into and written out from
+ shared buffers. Thus, reads and writes of relation data to and from
+ shared buffers are tracked in io_context
+ normal.
+
+
+
+
+ vacuum: I/O operations performed outside of shared
+ buffers while vacuuming and analyzing permanent relations. Temporary
+ table vacuums use the same local buffer pool as other temporary table
+ IO operations and are tracked in io_context
+ normal.
+
+
+
+
+ bulkread: Certain large read I/O operations
+ done outside of shared buffers, for example, a sequential scan of a
+ large table.
+
+
+
+
+ bulkwrite: Certain large write I/O operations
+ done outside of shared buffers, such as COPY.
+
+
+
+
+
+
+
+
+
+ readsbigint
+
+
+ Number of read operations, each of the size specified in
+ op_bytes.
+
+
+
+
+
+
+
+ writesbigint
+
+
+ Number of write operations, each of the size specified in
+ op_bytes.
+
+
+
+
+
+
+
+ extendsbigint
+
+
+ Number of relation extend operations, each of the size specified in
+ op_bytes.
+
+
+
+
+
+
+
+ op_bytesbigint
+
+
+ The number of bytes per unit of I/O read, written, or extended.
+
+
+ Relation data reads, writes, and extends are done in
+ block_size units, derived from the build-time
+ parameter BLCKSZ, which is 8192 by
+ default.
+
+
+
+
+
+
+
+ evictionsbigint
+
+
+ Number of times a block has been written out from a shared or local
+ buffer in order to make it available for another use.
+
+
+ In io_contextnormal, this counts
+ the number of times a block was evicted from a buffer and replaced with
+ another block. In io_contexts
+ bulkwrite, bulkread, and
+ vacuum, this counts the number of times a block was
+ evicted from shared buffers in order to add the shared buffer to a
+ separate, size-limited ring buffer for use in a bulk I/O operation.
+
+
+
+
+
+
+
+ reusesbigint
+
+
+ The number of times an existing buffer in a size-limited ring buffer
+ outside of shared buffers was reused as part of an I/O operation in the
+ bulkread, bulkwrite, or
+ vacuumio_contexts.
+
+
+
+
+
+
+
+ fsyncsbigint
+
+
+ Number of fsync calls. These are only tracked in
+ io_contextnormal.
+
+
+
+
+
+
+
+ stats_resettimestamp with time zone
+
+
+ Time at which these statistics were last reset.
+
+
+
+
+
+
+
+
+ Some backend types never perform I/O operations on some I/O objects and/or
+ in some I/O contexts. These rows are omitted from the view. For example, the
+ checkpointer does not checkpoint temporary tables, so there will be no rows
+ for backend_typecheckpointer and
+ io_objecttemp relation.
+
+
+
+ In addition, some I/O operations will never be performed either by certain
+ backend types or on certain I/O objects and/or in certain I/O contexts.
+ These cells will be NULL. For example, temporary tables are not
+ fsynced, so fsyncs will be NULL for
+ io_objecttemp relation. Also, the
+ background writer does not perform reads, so reads will
+ be NULL in rows for backend_typebackground
+ writer.
+
+
+
+ pg_stat_io can be used to inform database tuning.
+ For example:
+
+
+
+ A high evictions count can indicate that shared
+ buffers should be increased.
+
+
+
+
+ Client backends rely on the checkpointer to ensure data is persisted to
+ permanent storage. Large numbers of fsyncs by
+ client backends could indicate a misconfiguration of
+ shared buffers or of the checkpointer. More information on configuring
+ the checkpointer can be found in .
+
+
+
+
+ Normally, client backends should be able to rely on auxiliary processes
+ like the checkpointer and the background writer to write out dirty data
+ as much as possible. Large numbers of writes by client backends could
+ indicate a misconfiguration of shared buffers or of the checkpointer.
+ More information on configuring the checkpointer can be found in .
+
+
+
+
+
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 8608e3fa5b1..34ca0e739f5 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1117,6 +1117,21 @@ CREATE VIEW pg_stat_bgwriter AS
pg_stat_get_buf_alloc() AS buffers_alloc,
pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
+CREATE VIEW pg_stat_io AS
+SELECT
+ b.backend_type,
+ b.io_object,
+ b.io_context,
+ b.reads,
+ b.writes,
+ b.extends,
+ b.op_bytes,
+ b.evictions,
+ b.reuses,
+ b.fsyncs,
+ b.stats_reset
+FROM pg_stat_get_io() b;
+
CREATE VIEW pg_stat_wal AS
SELECT
w.wal_records,
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 924698e6ae4..9d707c35216 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1245,6 +1245,147 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc);
}
+/*
+* When adding a new column to the pg_stat_io view, add a new enum value
+* here above IO_NUM_COLUMNS.
+*/
+typedef enum io_stat_col
+{
+ IO_COL_BACKEND_TYPE,
+ IO_COL_IO_OBJECT,
+ IO_COL_IO_CONTEXT,
+ IO_COL_READS,
+ IO_COL_WRITES,
+ IO_COL_EXTENDS,
+ IO_COL_CONVERSION,
+ IO_COL_EVICTIONS,
+ IO_COL_REUSES,
+ IO_COL_FSYNCS,
+ IO_COL_RESET_TIME,
+ IO_NUM_COLUMNS,
+} io_stat_col;
+
+/*
+ * When adding a new IOOp, add a new io_stat_col and add a case to this
+ * function returning the corresponding io_stat_col.
+ */
+static io_stat_col
+pgstat_get_io_op_index(IOOp io_op)
+{
+ switch (io_op)
+ {
+ case IOOP_EVICT:
+ return IO_COL_EVICTIONS;
+ case IOOP_READ:
+ return IO_COL_READS;
+ case IOOP_REUSE:
+ return IO_COL_REUSES;
+ case IOOP_WRITE:
+ return IO_COL_WRITES;
+ case IOOP_EXTEND:
+ return IO_COL_EXTENDS;
+ case IOOP_FSYNC:
+ return IO_COL_FSYNCS;
+ }
+
+ elog(ERROR, "unrecognized IOOp value: %d", io_op);
+ pg_unreachable();
+}
+
+Datum
+pg_stat_get_io(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo;
+ PgStat_IO *backends_io_stats;
+ Datum reset_time;
+
+ InitMaterializedSRF(fcinfo, 0);
+ rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ backends_io_stats = pgstat_fetch_stat_io();
+
+ reset_time = TimestampTzGetDatum(backends_io_stats->stat_reset_timestamp);
+
+ for (BackendType bktype = B_INVALID; bktype < BACKEND_NUM_TYPES; bktype++)
+ {
+ Datum bktype_desc = CStringGetTextDatum(GetBackendTypeDesc(bktype));
+ PgStat_BktypeIO *bktype_stats = &backends_io_stats->stats[bktype];
+
+ /*
+ * In Assert builds, we can afford an extra loop through all of the
+ * counters checking that only expected stats are non-zero, since it
+ * keeps the non-Assert code cleaner.
+ */
+ Assert(pgstat_bktype_io_stats_valid(bktype_stats, bktype));
+
+ /*
+ * For those BackendTypes without IO Operation stats, skip
+ * representing them in the view altogether.
+ */
+ if (!pgstat_tracks_io_bktype(bktype))
+ continue;
+
+ for (IOObject io_obj = IOOBJECT_FIRST;
+ io_obj < IOOBJECT_NUM_TYPES; io_obj++)
+ {
+ const char *obj_name = pgstat_get_io_object_name(io_obj);
+
+ for (IOContext io_context = IOCONTEXT_FIRST;
+ io_context < IOCONTEXT_NUM_TYPES; io_context++)
+ {
+ const char *context_name = pgstat_get_io_context_name(io_context);
+
+ Datum values[IO_NUM_COLUMNS] = {0};
+ bool nulls[IO_NUM_COLUMNS] = {0};
+
+ /*
+ * Some combinations of BackendType, IOObject, and IOContext
+ * are not valid for any type of IOOp. In such cases, omit the
+ * entire row from the view.
+ */
+ if (!pgstat_tracks_io_object(bktype, io_obj, io_context))
+ continue;
+
+ values[IO_COL_BACKEND_TYPE] = bktype_desc;
+ values[IO_COL_IO_CONTEXT] = CStringGetTextDatum(context_name);
+ values[IO_COL_IO_OBJECT] = CStringGetTextDatum(obj_name);
+ values[IO_COL_RESET_TIME] = TimestampTzGetDatum(reset_time);
+
+ /*
+ * Hard-code this to the value of BLCKSZ for now. Future
+ * values could include XLOG_BLCKSZ, once WAL IO is tracked,
+ * and constant multipliers, once non-block-oriented IO (e.g.
+ * temporary file IO) is tracked.
+ */
+ values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ);
+
+ for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++)
+ {
+ int col_idx = pgstat_get_io_op_index(io_op);
+
+ /*
+ * Some combinations of BackendType and IOOp, of IOContext
+ * and IOOp, and of IOObject and IOOp are not tracked. Set
+ * these cells in the view NULL.
+ */
+ nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op);
+
+ if (nulls[col_idx])
+ continue;
+
+ values[col_idx] =
+ Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]);
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ }
+ }
+
+ return (Datum) 0;
+}
+
/*
* Returns statistics of WAL activity
*/
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index c1ce0b76e14..9c298cb1253 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202301301
+#define CATALOG_VERSION_NO 202302111
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index c0f2a8a77c8..66b73c3900d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5717,6 +5717,15 @@
proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
+{ oid => '8459', descr => 'statistics: per backend type IO statistics',
+ proname => 'pg_stat_get_io', provolatile => 'v',
+ prorows => '30', proretset => 't',
+ proparallel => 'r', prorettype => 'record', proargtypes => '',
+ proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
+ proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
+ prosrc => 'pg_stat_get_io' },
+
{ oid => '1136', descr => 'statistics: information about WAL activity',
proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e7a2f5856aa..174b725fff1 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1876,6 +1876,18 @@ pg_stat_gssapi| SELECT pid,
gss_enc AS encrypted
FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, query_id)
WHERE (client_port IS NOT NULL);
+pg_stat_io| SELECT backend_type,
+ io_object,
+ io_context,
+ reads,
+ writes,
+ extends,
+ op_bytes,
+ evictions,
+ reuses,
+ fsyncs,
+ stats_reset
+ FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
pg_stat_progress_analyze| SELECT s.pid,
s.datid,
d.datname,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 36d1dc01177..45fc5759ced 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3378,6 +3378,7 @@ intset_internal_node
intset_leaf_node
intset_node
intvKEY
+io_stat_col
itemIdCompact
itemIdCompactData
iterator