From 77947c51c08179b8bc12347a7fbcb2c8d7908302 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 27 May 2007 03:50:39 +0000 Subject: [PATCH] Fix up pgstats counting of live and dead tuples to recognize that committed and aborted transactions have different effects; also teach it not to assume that prepared transactions are always committed. Along the way, simplify the pgstats API by tying counting directly to Relations; I cannot detect any redeeming social value in having stats pointers in HeapScanDesc and IndexScanDesc structures. And fix a few corner cases in which counts might be missed because the relation's pgstat_info pointer hadn't been set. --- src/backend/access/gin/ginscan.c | 4 +- src/backend/access/gist/gistget.c | 4 +- src/backend/access/hash/hashsearch.c | 4 +- src/backend/access/heap/heapam.c | 40 +- src/backend/access/index/genam.c | 4 +- src/backend/access/index/indexam.c | 12 +- src/backend/access/nbtree/nbtsearch.c | 4 +- src/backend/access/transam/twophase.c | 5 +- src/backend/access/transam/twophase_rmgr.c | 12 +- src/backend/access/transam/xact.c | 15 +- src/backend/executor/nodeBitmapHeapscan.c | 8 +- src/backend/postmaster/bgwriter.c | 14 +- src/backend/postmaster/pgstat.c | 836 +++++++++++++++------ src/backend/storage/buffer/bufmgr.c | 12 +- src/backend/utils/cache/relcache.c | 4 +- src/include/access/heapam.h | 6 +- src/include/access/relscan.h | 6 +- src/include/access/twophase_rmgr.h | 5 +- src/include/pgstat.h | 206 +++-- src/include/utils/rel.h | 15 +- 20 files changed, 802 insertions(+), 414 deletions(-) diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 22896bc5d77..2eb1ba95b4b 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.9 2007/01/31 15:09:45 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.10 2007/05/27 03:50:38 tgl Exp $ *------------------------------------------------------------------------- */ @@ -189,7 +189,7 @@ newScanKey(IndexScanDesc scan) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("GIN index does not support search with void query"))); - pgstat_count_index_scan(&scan->xs_pgstat_info); + pgstat_count_index_scan(scan->indexRelation); } Datum diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index 226812322aa..ed839de4034 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.65 2007/04/06 22:33:41 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.66 2007/05/27 03:50:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -165,7 +165,7 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, stk->next = NULL; stk->block = GIST_ROOT_BLKNO; - pgstat_count_index_scan(&scan->xs_pgstat_info); + pgstat_count_index_scan(scan->indexRelation); } else if (so->curbuf == InvalidBuffer) { diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c index 5de0f402297..104a0c14de3 100644 --- a/src/backend/access/hash/hashsearch.c +++ b/src/backend/access/hash/hashsearch.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.49 2007/05/03 16:45:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -127,7 +127,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) ItemPointer current; OffsetNumber offnum; - pgstat_count_index_scan(&scan->xs_pgstat_info); + pgstat_count_index_scan(rel); current = &(so->hashso_curpos); ItemPointerSetInvalid(current); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index ee2be7cfdb1..9edeaff1306 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.232 2007/04/08 01:26:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $ * * * INTERFACE ROUTINES @@ -100,7 +100,7 @@ initscan(HeapScanDesc scan, ScanKey key) if (key != NULL) memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); - pgstat_count_heap_scan(&scan->rs_pgstat_info); + pgstat_count_heap_scan(scan->rs_rd); } /* @@ -701,6 +701,8 @@ relation_open(Oid relationId, LOCKMODE lockmode) if (!RelationIsValid(r)) elog(ERROR, "could not open relation with OID %u", relationId); + pgstat_initstats(r); + return r; } @@ -743,6 +745,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode) if (!RelationIsValid(r)) elog(ERROR, "could not open relation with OID %u", relationId); + pgstat_initstats(r); + return r; } @@ -787,6 +791,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode) if (!RelationIsValid(r)) elog(ERROR, "could not open relation with OID %u", relationId); + pgstat_initstats(r); + return r; } @@ -873,8 +879,6 @@ heap_open(Oid relationId, LOCKMODE lockmode) errmsg("\"%s\" is a composite type", RelationGetRelationName(r)))); - pgstat_initstats(&r->pgstat_info, r); - return r; } @@ -903,8 +907,6 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode) errmsg("\"%s\" is a composite type", RelationGetRelationName(r)))); - pgstat_initstats(&r->pgstat_info, r); - return r; } @@ -954,8 +956,6 @@ heap_beginscan(Relation relation, Snapshot snapshot, else scan->rs_key = NULL; - pgstat_initstats(&scan->rs_pgstat_info, relation); - initscan(scan, key); return scan; @@ -1059,7 +1059,7 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) */ HEAPDEBUG_3; /* heap_getnext returning tuple */ - pgstat_count_heap_getnext(&scan->rs_pgstat_info); + pgstat_count_heap_getnext(scan->rs_rd); return &(scan->rs_ctup); } @@ -1086,6 +1086,10 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) * and return it in *userbuf (so the caller must eventually unpin it); when * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer. * + * stats_relation is the relation to charge the heap_fetch operation against + * for statistical purposes. (This could be the heap rel itself, an + * associated index, or NULL to not count the fetch at all.) + * * It is somewhat inconsistent that we ereport() on invalid block number but * return false on invalid item number. There are a couple of reasons though. * One is that the caller can relatively easily check the block number for @@ -1101,12 +1105,12 @@ heap_fetch(Relation relation, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info) + Relation stats_relation) { /* Assume *userbuf is undefined on entry */ *userbuf = InvalidBuffer; return heap_release_fetch(relation, snapshot, tuple, - userbuf, keep_buf, pgstat_info); + userbuf, keep_buf, stats_relation); } /* @@ -1125,7 +1129,7 @@ heap_release_fetch(Relation relation, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info) + Relation stats_relation) { ItemPointer tid = &(tuple->t_self); ItemId lp; @@ -1210,9 +1214,9 @@ heap_release_fetch(Relation relation, */ *userbuf = buffer; - /* Count the successful fetch in *pgstat_info, if given. */ - if (pgstat_info != NULL) - pgstat_count_heap_fetch(pgstat_info); + /* Count the successful fetch against appropriate rel, if any */ + if (stats_relation != NULL) + pgstat_count_heap_fetch(stats_relation); return true; } @@ -1517,7 +1521,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CacheInvalidateHeapTuple(relation, heaptup); - pgstat_count_heap_insert(&relation->pgstat_info); + pgstat_count_heap_insert(relation); /* * If heaptup is a private copy, release it. Don't forget to copy t_self @@ -1807,7 +1811,7 @@ heap_delete(Relation relation, ItemPointer tid, if (have_tuple_lock) UnlockTuple(relation, &(tp.t_self), ExclusiveLock); - pgstat_count_heap_delete(&relation->pgstat_info); + pgstat_count_heap_delete(relation); return HeapTupleMayBeUpdated; } @@ -2269,7 +2273,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, if (have_tuple_lock) UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock); - pgstat_count_heap_update(&relation->pgstat_info); + pgstat_count_heap_update(relation); /* * If heaptup is a private copy, release it. Don't forget to copy t_self diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 49ffff6e51d..0009739180c 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.61 2007/01/20 18:43:35 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.62 2007/05/27 03:50:38 tgl Exp $ * * NOTES * many of the old access method routines have been turned into @@ -96,8 +96,6 @@ RelationGetIndexScan(Relation indexRelation, scan->xs_ctup.t_data = NULL; scan->xs_cbuf = InvalidBuffer; - pgstat_initstats(&scan->xs_pgstat_info, indexRelation); - /* * Let the AM fill in the key and any opaque data it wants. */ diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 23522ba740e..d905013a5fc 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.97 2007/01/05 22:19:23 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.98 2007/05/27 03:50:38 tgl Exp $ * * INTERFACE ROUTINES * index_open - open an index relation by relation OID @@ -145,8 +145,6 @@ index_open(Oid relationId, LOCKMODE lockmode) errmsg("\"%s\" is not an index", RelationGetRelationName(r)))); - pgstat_initstats(&r->pgstat_info, r); - return r; } @@ -433,14 +431,14 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) return NULL; /* failure exit */ } - pgstat_count_index_tuples(&scan->xs_pgstat_info, 1); + pgstat_count_index_tuples(scan->indexRelation, 1); /* * Fetch the heap tuple and see if it matches the snapshot. */ if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot, heapTuple, &scan->xs_cbuf, true, - &scan->xs_pgstat_info)) + scan->indexRelation)) break; /* Skip if no undeleted tuple at this location */ @@ -502,7 +500,7 @@ index_getnext_indexitem(IndexScanDesc scan, Int32GetDatum(direction))); if (found) - pgstat_count_index_tuples(&scan->xs_pgstat_info, 1); + pgstat_count_index_tuples(scan->indexRelation, 1); return found; } @@ -543,7 +541,7 @@ index_getmulti(IndexScanDesc scan, Int32GetDatum(max_tids), PointerGetDatum(returned_tids))); - pgstat_count_index_tuples(&scan->xs_pgstat_info, *returned_tids); + pgstat_count_index_tuples(scan->indexRelation, *returned_tids); return found; } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 036a97a8d04..b947d770aa2 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.112 2007/04/06 22:33:42 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.113 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -453,7 +453,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) int i; StrategyNumber strat_total; - pgstat_count_index_scan(&scan->xs_pgstat_info); + pgstat_count_index_scan(rel); /* * Examine the scan keys and eliminate any redundant keys; also mark the diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 6f495a84087..7fdf5a7eed3 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.30 2007/04/30 21:01:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.31 2007/05/27 03:50:39 tgl Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -1211,7 +1211,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) else ProcessRecords(bufptr, xid, twophase_postabort_callbacks); - pgstat_count_xact_commit(); + /* Count the prepared xact as committed or aborted */ + AtEOXact_PgStat(isCommit); /* * And now we can clean up our mess. diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c index e93bac7b2d8..9c2f14a1a38 100644 --- a/src/backend/access/transam/twophase_rmgr.c +++ b/src/backend/access/transam/twophase_rmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.4 2007/01/05 22:19:23 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.5 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "access/twophase_rmgr.h" #include "commands/async.h" +#include "pgstat.h" #include "storage/lock.h" #include "utils/flatfiles.h" #include "utils/inval.h" @@ -27,7 +28,8 @@ const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = lock_twophase_recover, /* Lock */ NULL, /* Inval */ NULL, /* flat file update */ - NULL /* notify/listen */ + NULL, /* notify/listen */ + NULL /* pgstat */ }; const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = @@ -36,7 +38,8 @@ const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = lock_twophase_postcommit, /* Lock */ inval_twophase_postcommit, /* Inval */ flatfile_twophase_postcommit, /* flat file update */ - notify_twophase_postcommit /* notify/listen */ + notify_twophase_postcommit, /* notify/listen */ + pgstat_twophase_postcommit /* pgstat */ }; const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = @@ -45,5 +48,6 @@ const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = lock_twophase_postabort, /* Lock */ NULL, /* Inval */ NULL, /* flat file update */ - NULL /* notify/listen */ + NULL, /* notify/listen */ + pgstat_twophase_postabort /* pgstat */ }; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index f2685ee0b34..c16b4fa6be9 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.242 2007/04/30 21:01:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.243 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1661,8 +1661,7 @@ CommitTransaction(void) AtEOXact_Files(); AtEOXact_ComboCid(); AtEOXact_HashTables(true); - pgstat_clear_snapshot(); - pgstat_count_xact_commit(); + AtEOXact_PgStat(true); pgstat_report_txn_timestamp(0); CurrentResourceOwner = NULL; @@ -1796,6 +1795,7 @@ PrepareTransaction(void) AtPrepare_UpdateFlatFiles(); AtPrepare_Inval(); AtPrepare_Locks(); + AtPrepare_PgStat(); /* * Here is where we really truly prepare. @@ -1853,6 +1853,8 @@ PrepareTransaction(void) /* notify and flatfiles don't need a postprepare call */ + PostPrepare_PgStat(); + PostPrepare_Inval(); PostPrepare_smgr(); @@ -1880,7 +1882,7 @@ PrepareTransaction(void) AtEOXact_Files(); AtEOXact_ComboCid(); AtEOXact_HashTables(true); - pgstat_clear_snapshot(); + /* don't call AtEOXact_PgStat here */ CurrentResourceOwner = NULL; ResourceOwnerDelete(TopTransactionResourceOwner); @@ -2035,8 +2037,7 @@ AbortTransaction(void) AtEOXact_Files(); AtEOXact_ComboCid(); AtEOXact_HashTables(false); - pgstat_clear_snapshot(); - pgstat_count_xact_rollback(); + AtEOXact_PgStat(false); pgstat_report_txn_timestamp(0); /* @@ -3749,6 +3750,7 @@ CommitSubTransaction(void) AtEOSubXact_Files(true, s->subTransactionId, s->parent->subTransactionId); AtEOSubXact_HashTables(true, s->nestingLevel); + AtEOSubXact_PgStat(true, s->nestingLevel); /* * We need to restore the upper transaction's read-only state, in case the @@ -3861,6 +3863,7 @@ AbortSubTransaction(void) AtEOSubXact_Files(false, s->subTransactionId, s->parent->subTransactionId); AtEOSubXact_HashTables(false, s->nestingLevel); + AtEOSubXact_PgStat(false, s->nestingLevel); } /* diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 3e9a91de2f5..07729da2be6 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -21,7 +21,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.16 2007/01/05 22:19:28 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.17 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -189,7 +189,7 @@ BitmapHeapNext(BitmapHeapScanState *node) scan->rs_ctup.t_len = ItemIdGetLength(lp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); - pgstat_count_heap_fetch(&scan->rs_pgstat_info); + pgstat_count_heap_fetch(scan->rs_rd); /* * Set up the result slot to point to this tuple. Note that the slot @@ -389,7 +389,7 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt) heap_rescan(node->ss.ss_currentScanDesc, NULL); /* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */ - pgstat_discount_heap_scan(&node->ss.ss_currentScanDesc->rs_pgstat_info); + pgstat_discount_heap_scan(node->ss.ss_currentScanDesc->rs_rd); if (node->tbm) tbm_free(node->tbm); @@ -535,7 +535,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) * when we actually aren't doing any such thing. Reverse out the added * scan count. (Eventually we may want to count bitmap scans separately.) */ - pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info); + pgstat_discount_heap_scan(scanstate->ss.ss_currentScanDesc->rs_rd); /* * get the scan type from the relation descriptor. diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 273588424eb..10f57f00b8f 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.37 2007/03/30 18:34:55 mha Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.38 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -125,13 +125,6 @@ typedef struct static BgWriterShmemStruct *BgWriterShmem; -/* - * BgWriter statistics counters. - * Stored directly in a stats message structure so it can be sent - * without needing to copy things around. - */ -PgStat_MsgBgWriter BgWriterStats; - /* * GUC parameters */ @@ -250,11 +243,6 @@ BackgroundWriterMain(void) ALLOCSET_DEFAULT_MAXSIZE); MemoryContextSwitchTo(bgwriter_context); - /* - * Initialize statistics counters to zero - */ - memset(&BgWriterStats, 0, sizeof(BgWriterStats)); - /* * If an exception is encountered, processing resumes here. * diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 1fac5af284b..b41a16de44c 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -13,7 +13,7 @@ * * Copyright (c) 2001-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.155 2007/04/30 16:37:08 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.156 2007/05/27 03:50:39 tgl Exp $ * ---------- */ #include "postgres.h" @@ -39,6 +39,7 @@ #include "access/heapam.h" #include "access/transam.h" +#include "access/twophase_rmgr.h" #include "access/xact.h" #include "catalog/pg_database.h" #include "libpq/ip.h" @@ -98,6 +99,13 @@ bool pgstat_collect_tuplelevel = false; bool pgstat_collect_blocklevel = false; bool pgstat_collect_querystring = false; +/* + * BgWriter global statistics counters (unused in other processes). + * Stored directly in a stats message structure so it can be sent + * without needing to copy things around. We assume this inits to zeroes. + */ +PgStat_MsgBgWriter BgWriterStats; + /* ---------- * Local data * ---------- @@ -111,43 +119,63 @@ static time_t last_pgstat_start_time; static bool pgStatRunningInCollector = false; /* - * Place where backends store per-table info to be sent to the collector. - * We store shared relations separately from non-shared ones, to be able to - * send them in separate messages. + * Structures in which backends store per-table info that's waiting to be + * sent to the collector. * - * NOTE: once allocated, a PgStat_MsgTabstat struct belonging to a - * TabStatArray is never moved or deleted for the life of the backend. - * Also, we zero out the t_id fields of the contained PgStat_TableEntry - * structs whenever they are not actively in use. This allows PgStat_Info - * pointers to be treated as long-lived data, avoiding repeated searches in - * pgstat_initstats() when a relation is repeatedly heap_open'd or - * index_open'd during a transaction. + * NOTE: once allocated, TabStatusArray structures are never moved or deleted + * for the life of the backend. Also, we zero out the t_id fields of the + * contained PgStat_TableStatus structs whenever they are not actively in use. + * This allows relcache pgstat_info pointers to be treated as long-lived data, + * avoiding repeated searches in pgstat_initstats() when a relation is + * repeatedly opened during a transaction. */ -typedef struct TabStatArray +#define TABSTAT_QUANTUM 100 /* we alloc this many at a time */ + +typedef struct TabStatusArray { - int tsa_alloc; /* num allocated */ - int tsa_used; /* num actually used */ - PgStat_MsgTabstat **tsa_messages; /* the array itself */ -} TabStatArray; + struct TabStatusArray *tsa_next; /* link to next array, if any */ + int tsa_used; /* # entries currently used */ + PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */ +} TabStatusArray; -#define TABSTAT_QUANTUM 4 /* we alloc this many at a time */ +static TabStatusArray *pgStatTabList = NULL; + +/* + * Tuple insertion/deletion counts for an open transaction can't be propagated + * into PgStat_TableStatus counters until we know if it is going to commit + * or abort. Hence, we keep these counts in per-subxact structs that live + * in TopTransactionContext. This data structure is designed on the assumption + * that subxacts won't usually modify very many tables. + */ +typedef struct PgStat_SubXactStatus +{ + int nest_level; /* subtransaction nest level */ + struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */ + PgStat_TableXactStatus *first; /* head of list for this subxact */ +} PgStat_SubXactStatus; -static TabStatArray RegularTabStat = {0, 0, NULL}; -static TabStatArray SharedTabStat = {0, 0, NULL}; +static PgStat_SubXactStatus *pgStatXactStack = NULL; static int pgStatXactCommit = 0; static int pgStatXactRollback = 0; +/* Record that's written to 2PC state file when pgstat state is persisted */ +typedef struct TwoPhasePgStatRecord +{ + PgStat_Counter tuples_inserted; /* tuples inserted in xact */ + PgStat_Counter tuples_deleted; /* tuples deleted in xact */ + Oid t_id; /* table's OID */ + bool t_shared; /* is it a shared catalog? */ +} TwoPhasePgStatRecord; + +/* + * Info about current "snapshot" of stats file + */ static MemoryContext pgStatLocalContext = NULL; static HTAB *pgStatDBHash = NULL; static PgBackendStatus *localBackendStatusTable = NULL; static int localNumBackends = 0; -/* - * BgWriter global statistics counters, from bgwriter.c - */ -extern PgStat_MsgBgWriter BgWriterStats; - /* * Cluster wide statistics, kept in the stats collector. * Contains statistics that are not collected per database @@ -177,9 +205,12 @@ static void pgstat_write_statsfile(void); static HTAB *pgstat_read_statsfile(Oid onlydb); static void backend_read_statsfile(void); static void pgstat_read_current_status(void); -static void pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid); + +static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg); static HTAB *pgstat_collect_oids(Oid catalogid); +static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared); + static void pgstat_setup_memcxt(void); static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype); @@ -617,12 +648,19 @@ void allow_immediate_pgstat_restart(void) void pgstat_report_tabstat(bool force) { + /* we assume this inits to all zeroes: */ + static const PgStat_TableCounts all_zeroes; static TimestampTz last_report = 0; + TimestampTz now; + PgStat_MsgTabstat regular_msg; + PgStat_MsgTabstat shared_msg; + TabStatusArray *tsa; + int i; /* Don't expend a clock check if nothing to do */ - if (RegularTabStat.tsa_used == 0 && - SharedTabStat.tsa_used == 0) + if (pgStatTabList == NULL || + pgStatTabList->tsa_used == 0) return; /* @@ -636,51 +674,101 @@ pgstat_report_tabstat(bool force) last_report = now; /* - * For each message buffer used during the last queries, set the header - * fields and send it out; then mark the entries unused. + * Scan through the TabStatusArray struct(s) to find tables that actually + * have counts, and build messages to send. We have to separate shared + * relations from regular ones because the databaseid field in the + * message header has to depend on that. */ - pgstat_report_one_tabstat(&RegularTabStat, MyDatabaseId); - pgstat_report_one_tabstat(&SharedTabStat, InvalidOid); + regular_msg.m_databaseid = MyDatabaseId; + shared_msg.m_databaseid = InvalidOid; + regular_msg.m_nentries = 0; + shared_msg.m_nentries = 0; + + for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next) + { + for (i = 0; i < tsa->tsa_used; i++) + { + PgStat_TableStatus *entry = &tsa->tsa_entries[i]; + PgStat_MsgTabstat *this_msg; + PgStat_TableEntry *this_ent; + + /* Shouldn't have any pending transaction-dependent counts */ + Assert(entry->trans == NULL); + + /* + * Ignore entries that didn't accumulate any actual counts, + * such as indexes that were opened by the planner but not used. + */ + if (memcmp(&entry->t_counts, &all_zeroes, + sizeof(PgStat_TableCounts)) == 0) + continue; + /* + * OK, insert data into the appropriate message, and send if full. + */ + this_msg = entry->t_shared ? &shared_msg : ®ular_msg; + this_ent = &this_msg->m_entry[this_msg->m_nentries]; + this_ent->t_id = entry->t_id; + memcpy(&this_ent->t_counts, &entry->t_counts, + sizeof(PgStat_TableCounts)); + if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES) + { + pgstat_send_tabstat(this_msg); + this_msg->m_nentries = 0; + } + } + /* zero out TableStatus structs after use */ + MemSet(tsa->tsa_entries, 0, + tsa->tsa_used * sizeof(PgStat_TableStatus)); + tsa->tsa_used = 0; + } + + /* + * Send partial messages. If force is true, make sure that any pending + * xact commit/abort gets counted, even if no table stats to send. + */ + if (regular_msg.m_nentries > 0 || + (force && (pgStatXactCommit > 0 || pgStatXactRollback > 0))) + pgstat_send_tabstat(®ular_msg); + if (shared_msg.m_nentries > 0) + pgstat_send_tabstat(&shared_msg); } +/* + * Subroutine for pgstat_report_tabstat: finish and send a tabstat message + */ static void -pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid) +pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg) { - int i; - - for (i = 0; i < tsarr->tsa_used; i++) - { - PgStat_MsgTabstat *tsmsg = tsarr->tsa_messages[i]; - int n; - int len; + int n; + int len; - n = tsmsg->m_nentries; - len = offsetof(PgStat_MsgTabstat, m_entry[0]) + - n * sizeof(PgStat_TableEntry); + /* It's unlikely we'd get here with no socket, but maybe not impossible */ + if (pgStatSock < 0) + return; + /* + * Report accumulated xact commit/rollback whenever we send a normal + * tabstat message + */ + if (OidIsValid(tsmsg->m_databaseid)) + { tsmsg->m_xact_commit = pgStatXactCommit; tsmsg->m_xact_rollback = pgStatXactRollback; pgStatXactCommit = 0; pgStatXactRollback = 0; + } + else + { + tsmsg->m_xact_commit = 0; + tsmsg->m_xact_rollback = 0; + } - /* - * It's unlikely we'd get here with no socket, but maybe not - * impossible - */ - if (pgStatSock >= 0) - { - pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT); - tsmsg->m_databaseid = dbid; - pgstat_send(tsmsg, len); - } + n = tsmsg->m_nentries; + len = offsetof(PgStat_MsgTabstat, m_entry[0]) + + n * sizeof(PgStat_TableEntry); - /* - * Zero out the entries, to mark them unused and prepare them - * for next use. - */ - MemSet(tsmsg, 0, len); - } - tsarr->tsa_used = 0; + pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT); + pgstat_send(tsmsg, len); } @@ -1016,209 +1104,489 @@ pgstat_ping(void) pgstat_send(&msg, sizeof(msg)); } -/* - * Enlarge a TabStatArray - */ -static void -more_tabstat_space(TabStatArray *tsarr) -{ - PgStat_MsgTabstat *newMessages; - PgStat_MsgTabstat **msgArray; - int newAlloc; - int i; - - AssertArg(PointerIsValid(tsarr)); - - newAlloc = tsarr->tsa_alloc + TABSTAT_QUANTUM; - - /* Create (another) quantum of message buffers, and zero them */ - newMessages = (PgStat_MsgTabstat *) - MemoryContextAllocZero(TopMemoryContext, - sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM); - - /* Create or enlarge the pointer array */ - if (tsarr->tsa_messages == NULL) - msgArray = (PgStat_MsgTabstat **) - MemoryContextAlloc(TopMemoryContext, - sizeof(PgStat_MsgTabstat *) * newAlloc); - else - msgArray = (PgStat_MsgTabstat **) - repalloc(tsarr->tsa_messages, - sizeof(PgStat_MsgTabstat *) * newAlloc); - - for (i = 0; i < TABSTAT_QUANTUM; i++) - msgArray[tsarr->tsa_alloc + i] = newMessages++; - tsarr->tsa_messages = msgArray; - tsarr->tsa_alloc = newAlloc; - - Assert(tsarr->tsa_used < tsarr->tsa_alloc); -} /* ---------- * pgstat_initstats() - * - * Called from various places usually dealing with initialization - * of Relation or Scan structures. The data placed into these - * structures from here tell where later to count for buffer reads, - * scans and tuples fetched. - * - * NOTE: PgStat_Info pointers in scan structures are really redundant - * with those in relcache entries. The passed stats pointer might point - * either to the Relation struct's own pgstat_info field, or to one in - * a scan structure; we'll set the Relation pg_statinfo and copy it to - * the scan struct. + * Initialize a relcache entry to count access statistics. + * Called whenever a relation is opened. * * We assume that a relcache entry's pgstat_info field is zeroed by * relcache.c when the relcache entry is made; thereafter it is long-lived - * data. We can avoid repeated searches of the TabStat arrays when the + * data. We can avoid repeated searches of the TabStatus arrays when the * same relation is touched repeatedly within a transaction. * ---------- */ void -pgstat_initstats(PgStat_Info *stats, Relation rel) +pgstat_initstats(Relation rel) { Oid rel_id = rel->rd_id; - PgStat_TableEntry *useent; - TabStatArray *tsarr; - PgStat_MsgTabstat *tsmsg; - int mb; - int i; + char relkind = rel->rd_rel->relkind; + + /* We only count stats for things that have storage */ + if (!(relkind == RELKIND_RELATION || + relkind == RELKIND_INDEX || + relkind == RELKIND_TOASTVALUE)) + { + rel->pgstat_info = NULL; + return; + } if (pgStatSock < 0 || !(pgstat_collect_tuplelevel || pgstat_collect_blocklevel)) { - /* We're not counting at all. */ - stats->tabentry = NULL; + /* We're not counting at all */ + rel->pgstat_info = NULL; return; } /* * If we already set up this relation in the current transaction, - * just copy the pointer. + * nothing to do. */ - if (rel->pgstat_info.tabentry != NULL && - ((PgStat_TableEntry *) rel->pgstat_info.tabentry)->t_id == rel_id) - { - stats->tabentry = rel->pgstat_info.tabentry; + if (rel->pgstat_info != NULL && + rel->pgstat_info->t_id == rel_id) return; - } + + /* Else find or make the PgStat_TableStatus entry, and update link */ + rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared); +} + +/* + * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel + */ +static PgStat_TableStatus * +get_tabstat_entry(Oid rel_id, bool isshared) +{ + PgStat_TableStatus *entry; + TabStatusArray *tsa; + TabStatusArray *prev_tsa; + int i; /* - * Search the already-used message slots for this relation. + * Search the already-used tabstat slots for this relation. */ - tsarr = rel->rd_rel->relisshared ? &SharedTabStat : &RegularTabStat; - - for (mb = 0; mb < tsarr->tsa_used; mb++) + prev_tsa = NULL; + for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next) { - tsmsg = tsarr->tsa_messages[mb]; - - for (i = tsmsg->m_nentries; --i >= 0;) + for (i = 0; i < tsa->tsa_used; i++) { - if (tsmsg->m_entry[i].t_id == rel_id) - { - rel->pgstat_info.tabentry = (void *) &(tsmsg->m_entry[i]); - stats->tabentry = rel->pgstat_info.tabentry; - return; - } + entry = &tsa->tsa_entries[i]; + if (entry->t_id == rel_id) + return entry; } - if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES) - continue; - - /* - * Not found, but found a message buffer with an empty slot instead. - * Fine, let's use this one. We assume the entry was already zeroed, - * either at creation or after last use. - */ - i = tsmsg->m_nentries++; - useent = &tsmsg->m_entry[i]; - useent->t_id = rel_id; - rel->pgstat_info.tabentry = (void *) useent; - stats->tabentry = rel->pgstat_info.tabentry; - return; + if (tsa->tsa_used < TABSTAT_QUANTUM) + { + /* + * It must not be present, but we found a free slot instead. + * Fine, let's use this one. We assume the entry was already + * zeroed, either at creation or after last use. + */ + entry = &tsa->tsa_entries[tsa->tsa_used++]; + entry->t_id = rel_id; + entry->t_shared = isshared; + return entry; + } } /* - * If we ran out of message buffers, we just allocate more. + * We ran out of tabstat slots, so allocate more. Be sure they're zeroed. */ - if (tsarr->tsa_used >= tsarr->tsa_alloc) - more_tabstat_space(tsarr); + tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, + sizeof(TabStatusArray)); + if (prev_tsa) + prev_tsa->tsa_next = tsa; + else + pgStatTabList = tsa; + + /* + * Use the first entry of the new TabStatusArray. + */ + entry = &tsa->tsa_entries[tsa->tsa_used++]; + entry->t_id = rel_id; + entry->t_shared = isshared; + return entry; +} + +/* + * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed + */ +static PgStat_SubXactStatus * +get_tabstat_stack_level(int nest_level) +{ + PgStat_SubXactStatus *xact_state; + + xact_state = pgStatXactStack; + if (xact_state == NULL || xact_state->nest_level != nest_level) + { + xact_state = (PgStat_SubXactStatus *) + MemoryContextAlloc(TopTransactionContext, + sizeof(PgStat_SubXactStatus)); + xact_state->nest_level = nest_level; + xact_state->prev = pgStatXactStack; + xact_state->first = NULL; + pgStatXactStack = xact_state; + } + return xact_state; +} + +/* + * add_tabstat_xact_level - add a new (sub)transaction state record + */ +static void +add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level) +{ + PgStat_SubXactStatus *xact_state; + PgStat_TableXactStatus *trans; /* - * Use the first entry of the next message buffer. + * If this is the first rel to be modified at the current nest level, + * we first have to push a transaction stack entry. */ - mb = tsarr->tsa_used++; - tsmsg = tsarr->tsa_messages[mb]; - tsmsg->m_nentries = 1; - useent = &tsmsg->m_entry[0]; - useent->t_id = rel_id; - rel->pgstat_info.tabentry = (void *) useent; - stats->tabentry = rel->pgstat_info.tabentry; + xact_state = get_tabstat_stack_level(nest_level); + + /* Now make a per-table stack entry */ + trans = (PgStat_TableXactStatus *) + MemoryContextAllocZero(TopTransactionContext, + sizeof(PgStat_TableXactStatus)); + trans->nest_level = nest_level; + trans->upper = pgstat_info->trans; + trans->parent = pgstat_info; + trans->next = xact_state->first; + xact_state->first = trans; + pgstat_info->trans = trans; +} + +/* + * pgstat_count_heap_insert - count a tuple insertion + */ +void +pgstat_count_heap_insert(Relation rel) +{ + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + if (pgstat_collect_tuplelevel && pgstat_info != NULL) + { + int nest_level = GetCurrentTransactionNestLevel(); + + /* t_tuples_inserted is nontransactional, so just advance it */ + pgstat_info->t_counts.t_tuples_inserted++; + + /* We have to log the transactional effect at the proper level */ + if (pgstat_info->trans == NULL || + pgstat_info->trans->nest_level != nest_level) + add_tabstat_xact_level(pgstat_info, nest_level); + + pgstat_info->trans->tuples_inserted++; + } +} + +/* + * pgstat_count_heap_update - count a tuple update + */ +void +pgstat_count_heap_update(Relation rel) +{ + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + if (pgstat_collect_tuplelevel && pgstat_info != NULL) + { + int nest_level = GetCurrentTransactionNestLevel(); + + /* t_tuples_updated is nontransactional, so just advance it */ + pgstat_info->t_counts.t_tuples_updated++; + + /* We have to log the transactional effect at the proper level */ + if (pgstat_info->trans == NULL || + pgstat_info->trans->nest_level != nest_level) + add_tabstat_xact_level(pgstat_info, nest_level); + + /* An UPDATE both inserts a new tuple and deletes the old */ + pgstat_info->trans->tuples_inserted++; + pgstat_info->trans->tuples_deleted++; + } +} + +/* + * pgstat_count_heap_delete - count a tuple deletion + */ +void +pgstat_count_heap_delete(Relation rel) +{ + PgStat_TableStatus *pgstat_info = rel->pgstat_info; + + if (pgstat_collect_tuplelevel && pgstat_info != NULL) + { + int nest_level = GetCurrentTransactionNestLevel(); + + /* t_tuples_deleted is nontransactional, so just advance it */ + pgstat_info->t_counts.t_tuples_deleted++; + + /* We have to log the transactional effect at the proper level */ + if (pgstat_info->trans == NULL || + pgstat_info->trans->nest_level != nest_level) + add_tabstat_xact_level(pgstat_info, nest_level); + + pgstat_info->trans->tuples_deleted++; + } } /* ---------- - * pgstat_count_xact_commit() - + * AtEOXact_PgStat * - * Called from access/transam/xact.c to count transaction commits. + * Called from access/transam/xact.c at top-level transaction commit/abort. * ---------- */ void -pgstat_count_xact_commit(void) +AtEOXact_PgStat(bool isCommit) { - if (!pgstat_collect_tuplelevel && - !pgstat_collect_blocklevel) - return; - - pgStatXactCommit++; + PgStat_SubXactStatus *xact_state; /* - * If there was no relation activity yet, just make one existing message - * buffer used without slots, causing the next report to tell new - * xact-counters. + * Count transaction commit or abort. (We use counters, not just bools, + * in case the reporting message isn't sent right away.) */ - if (RegularTabStat.tsa_alloc == 0) - more_tabstat_space(&RegularTabStat); + if (isCommit) + pgStatXactCommit++; + else + pgStatXactRollback++; - if (RegularTabStat.tsa_used == 0) + /* + * Transfer transactional insert/update counts into the base tabstat + * entries. We don't bother to free any of the transactional state, + * since it's all in TopTransactionContext and will go away anyway. + */ + xact_state = pgStatXactStack; + if (xact_state != NULL) { - RegularTabStat.tsa_used++; - RegularTabStat.tsa_messages[0]->m_nentries = 0; + PgStat_TableXactStatus *trans; + + Assert(xact_state->nest_level == 1); + Assert(xact_state->prev == NULL); + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + + Assert(trans->nest_level == 1); + Assert(trans->upper == NULL); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + if (isCommit) + { + tabstat->t_counts.t_new_live_tuples += trans->tuples_inserted; + tabstat->t_counts.t_new_dead_tuples += trans->tuples_deleted; + } + else + { + /* inserted tuples are dead, deleted tuples are unaffected */ + tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted; + } + tabstat->trans = NULL; + } } -} + pgStatXactStack = NULL; + /* Make sure any stats snapshot is thrown away */ + pgstat_clear_snapshot(); +} /* ---------- - * pgstat_count_xact_rollback() - + * AtEOSubXact_PgStat * - * Called from access/transam/xact.c to count transaction rollbacks. + * Called from access/transam/xact.c at subtransaction commit/abort. * ---------- */ void -pgstat_count_xact_rollback(void) +AtEOSubXact_PgStat(bool isCommit, int nestDepth) { - if (!pgstat_collect_tuplelevel && - !pgstat_collect_blocklevel) - return; - - pgStatXactRollback++; + PgStat_SubXactStatus *xact_state; /* - * If there was no relation activity yet, just make one existing message - * buffer used without slots, causing the next report to tell new - * xact-counters. + * Transfer transactional insert/update counts into the next higher + * subtransaction state. */ - if (RegularTabStat.tsa_alloc == 0) - more_tabstat_space(&RegularTabStat); + xact_state = pgStatXactStack; + if (xact_state != NULL && + xact_state->nest_level >= nestDepth) + { + PgStat_TableXactStatus *trans; + PgStat_TableXactStatus *next_trans; + + /* delink xact_state from stack immediately to simplify reuse case */ + pgStatXactStack = xact_state->prev; + + for (trans = xact_state->first; trans != NULL; trans = next_trans) + { + PgStat_TableStatus *tabstat; + + next_trans = trans->next; + Assert(trans->nest_level == nestDepth); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + if (isCommit) + { + if (trans->upper && trans->upper->nest_level == nestDepth - 1) + { + trans->upper->tuples_inserted += trans->tuples_inserted; + trans->upper->tuples_deleted += trans->tuples_deleted; + tabstat->trans = trans->upper; + pfree(trans); + } + else + { + /* + * When there isn't an immediate parent state, we can + * just reuse the record instead of going through a + * palloc/pfree pushup (this works since it's all in + * TopTransactionContext anyway). We have to re-link + * it into the parent level, though, and that might mean + * pushing a new entry into the pgStatXactStack. + */ + PgStat_SubXactStatus *upper_xact_state; + + upper_xact_state = get_tabstat_stack_level(nestDepth - 1); + trans->next = upper_xact_state->first; + upper_xact_state->first = trans; + trans->nest_level = nestDepth - 1; + } + } + else + { + /* + * On abort, inserted tuples are dead (and can be bounced out + * to the top-level tabstat), deleted tuples are unaffected + */ + tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted; + tabstat->trans = trans->upper; + pfree(trans); + } + } + pfree(xact_state); + } +} + + +/* + * AtPrepare_PgStat + * Save the transactional stats state at 2PC transaction prepare. + * + * In this phase we just generate 2PC records for all the pending + * transaction-dependent stats work. + */ +void +AtPrepare_PgStat(void) +{ + PgStat_SubXactStatus *xact_state; - if (RegularTabStat.tsa_used == 0) + xact_state = pgStatXactStack; + if (xact_state != NULL) { - RegularTabStat.tsa_used++; - RegularTabStat.tsa_messages[0]->m_nentries = 0; + PgStat_TableXactStatus *trans; + + Assert(xact_state->nest_level == 1); + Assert(xact_state->prev == NULL); + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + TwoPhasePgStatRecord record; + + Assert(trans->nest_level == 1); + Assert(trans->upper == NULL); + tabstat = trans->parent; + Assert(tabstat->trans == trans); + + record.tuples_inserted = trans->tuples_inserted; + record.tuples_deleted = trans->tuples_deleted; + record.t_id = tabstat->t_id; + record.t_shared = tabstat->t_shared; + + RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0, + &record, sizeof(TwoPhasePgStatRecord)); + } } } +/* + * PostPrepare_PgStat + * Clean up after successful PREPARE. + * + * All we need do here is unlink the transaction stats state from the + * nontransactional state. The nontransactional action counts will be + * reported to the stats collector immediately, while the effects on live + * and dead tuple counts are preserved in the 2PC state file. + * + * Note: AtEOXact_PgStat is not called during PREPARE. + */ +void +PostPrepare_PgStat(void) +{ + PgStat_SubXactStatus *xact_state; + + /* + * We don't bother to free any of the transactional state, + * since it's all in TopTransactionContext and will go away anyway. + */ + xact_state = pgStatXactStack; + if (xact_state != NULL) + { + PgStat_TableXactStatus *trans; + + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat; + + tabstat = trans->parent; + tabstat->trans = NULL; + } + } + pgStatXactStack = NULL; + + /* Make sure any stats snapshot is thrown away */ + pgstat_clear_snapshot(); +} + +/* + * 2PC processing routine for COMMIT PREPARED case. + * + * Load the saved counts into our local pgstats state. + */ +void +pgstat_twophase_postcommit(TransactionId xid, uint16 info, + void *recdata, uint32 len) +{ + TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; + PgStat_TableStatus *pgstat_info; + + /* Find or create a tabstat entry for the rel */ + pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); + + pgstat_info->t_counts.t_new_live_tuples += rec->tuples_inserted; + pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_deleted; +} + +/* + * 2PC processing routine for ROLLBACK PREPARED case. + * + * Load the saved counts into our local pgstats state, but treat them + * as aborted. + */ +void +pgstat_twophase_postabort(TransactionId xid, uint16 info, + void *recdata, uint32 len) +{ + TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; + PgStat_TableStatus *pgstat_info; + + /* Find or create a tabstat entry for the rel */ + pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared); + + /* inserted tuples are dead, deleted tuples are no-ops */ + pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_inserted; +} + /* ---------- * pgstat_fetch_stat_dbentry() - @@ -1725,18 +2093,15 @@ pgstat_send(void *msg, int len) void pgstat_send_bgwriter(void) { + /* We assume this initializes to zeroes */ + static const PgStat_MsgBgWriter all_zeroes; + /* * This function can be called even if nothing at all has happened. * In this case, avoid sending a completely empty message to * the stats collector. */ - if (BgWriterStats.m_timed_checkpoints == 0 && - BgWriterStats.m_requested_checkpoints == 0 && - BgWriterStats.m_buf_written_checkpoints == 0 && - BgWriterStats.m_buf_written_lru == 0 && - BgWriterStats.m_buf_written_all == 0 && - BgWriterStats.m_maxwritten_lru == 0 && - BgWriterStats.m_maxwritten_all == 0) + if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0) return; /* @@ -1746,10 +2111,9 @@ pgstat_send_bgwriter(void) pgstat_send(&BgWriterStats, sizeof(BgWriterStats)); /* - * Clear out the bgwriter statistics buffer, so it can be - * re-used. + * Clear out the statistics buffer, so it can be re-used. */ - memset(&BgWriterStats, 0, sizeof(BgWriterStats)); + MemSet(&BgWriterStats, 0, sizeof(BgWriterStats)); } @@ -2509,60 +2873,50 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len) * If it's a new table entry, initialize counters to the values we * just got. */ - tabentry->numscans = tabmsg[i].t_numscans; - tabentry->tuples_returned = tabmsg[i].t_tuples_returned; - tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched; - tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted; - tabentry->tuples_updated = tabmsg[i].t_tuples_updated; - tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted; - - tabentry->n_live_tuples = tabmsg[i].t_tuples_inserted; - tabentry->n_dead_tuples = tabmsg[i].t_tuples_updated + - tabmsg[i].t_tuples_deleted; + tabentry->numscans = tabmsg[i].t_counts.t_numscans; + tabentry->tuples_returned = tabmsg[i].t_counts.t_tuples_returned; + tabentry->tuples_fetched = tabmsg[i].t_counts.t_tuples_fetched; + tabentry->tuples_inserted = tabmsg[i].t_counts.t_tuples_inserted; + tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated; + tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted; + tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples; + tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples; + tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched; + tabentry->blocks_hit = tabmsg[i].t_counts.t_blocks_hit; + tabentry->last_anl_tuples = 0; tabentry->vacuum_timestamp = 0; tabentry->autovac_vacuum_timestamp = 0; tabentry->analyze_timestamp = 0; tabentry->autovac_analyze_timestamp = 0; - - tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched; - tabentry->blocks_hit = tabmsg[i].t_blocks_hit; } else { /* * Otherwise add the values to the existing entry. */ - tabentry->numscans += tabmsg[i].t_numscans; - tabentry->tuples_returned += tabmsg[i].t_tuples_returned; - tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched; - tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted; - tabentry->tuples_updated += tabmsg[i].t_tuples_updated; - tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted; - - tabentry->n_live_tuples += tabmsg[i].t_tuples_inserted - - tabmsg[i].t_tuples_deleted; - tabentry->n_dead_tuples += tabmsg[i].t_tuples_updated + - tabmsg[i].t_tuples_deleted; - - tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched; - tabentry->blocks_hit += tabmsg[i].t_blocks_hit; + tabentry->numscans += tabmsg[i].t_counts.t_numscans; + tabentry->tuples_returned += tabmsg[i].t_counts.t_tuples_returned; + tabentry->tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched; + tabentry->tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted; + tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated; + tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted; + tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples; + tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples; + tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched; + tabentry->blocks_hit += tabmsg[i].t_counts.t_blocks_hit; } /* - * Add table stats to the database entry. - */ - dbentry->n_tuples_returned += tabmsg[i].t_tuples_returned; - dbentry->n_tuples_fetched += tabmsg[i].t_tuples_fetched; - dbentry->n_tuples_inserted += tabmsg[i].t_tuples_inserted; - dbentry->n_tuples_updated += tabmsg[i].t_tuples_updated; - dbentry->n_tuples_deleted += tabmsg[i].t_tuples_deleted; - - /* - * And add the block IO to the database entry. + * Add per-table stats to the per-database entry, too. */ - dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched; - dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit; + dbentry->n_tuples_returned += tabmsg[i].t_counts.t_tuples_returned; + dbentry->n_tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched; + dbentry->n_tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted; + dbentry->n_tuples_updated += tabmsg[i].t_counts.t_tuples_updated; + dbentry->n_tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted; + dbentry->n_blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched; + dbentry->n_blocks_hit += tabmsg[i].t_counts.t_blocks_hit; } } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 9f4876a6050..e2cfc870e2e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.218 2007/05/02 23:34:48 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.219 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -88,12 +88,6 @@ static bool IsForInput; /* local state for LockBufferForCleanup */ static volatile BufferDesc *PinCountWaitBuf = NULL; -/* - * Global statistics for the bgwriter. The contents of this variable - * only makes sense in the bgwriter process. - */ -extern PgStat_MsgBgWriter BgWriterStats; - static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage); @@ -174,7 +168,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage) if (isExtend) blockNum = smgrnblocks(reln->rd_smgr); - pgstat_count_buffer_read(&reln->pgstat_info, reln); + pgstat_count_buffer_read(reln); if (isLocalBuf) { @@ -204,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage) if (!isExtend) { /* Just need to update stats before we exit */ - pgstat_count_buffer_hit(&reln->pgstat_info, reln); + pgstat_count_buffer_hit(reln); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageHit; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 7d554c2ada2..45cb103adee 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.260 2007/05/02 21:08:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.261 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1802,6 +1802,7 @@ RelationClearRelation(Relation relation, bool rebuild) int old_refcnt = relation->rd_refcnt; SubTransactionId old_createSubid = relation->rd_createSubid; SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid; + struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info; TupleDesc old_att = relation->rd_att; RuleLock *old_rules = relation->rd_rules; MemoryContext old_rulescxt = relation->rd_rulescxt; @@ -1821,6 +1822,7 @@ RelationClearRelation(Relation relation, bool rebuild) relation->rd_refcnt = old_refcnt; relation->rd_createSubid = old_createSubid; relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid; + relation->pgstat_info = old_pgstat_info; if (equalTupleDescs(old_att, relation->rd_att)) { diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 5ea66e74672..ebb2e984c24 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.123 2007/04/08 01:26:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); extern bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info); + Relation stats_relation); extern bool heap_release_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info); + Relation stats_relation); extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 77bca6be482..7a1ea39352a 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.52 2007/01/20 18:43:35 neilc Exp $ + * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,8 +37,6 @@ typedef struct HeapScanDescData /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ ItemPointerData rs_mctid; /* marked scan position, if any */ - PgStat_Info rs_pgstat_info; /* statistics collector hook */ - /* these fields only used in page-at-a-time mode */ int rs_cindex; /* current tuple's index in vistuples */ int rs_mindex; /* marked tuple's saved index */ @@ -78,8 +76,6 @@ typedef struct IndexScanDescData HeapTupleData xs_ctup; /* current heap tuple, if any */ Buffer xs_cbuf; /* current heap buffer in scan, if any */ /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ - - PgStat_Info xs_pgstat_info; /* statistics collector hook */ } IndexScanDescData; typedef IndexScanDescData *IndexScanDesc; diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index 0dbcd226fbd..e98ad7cb375 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.4 2007/01/05 22:19:51 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.5 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId; #define TWOPHASE_RM_INVAL_ID 2 #define TWOPHASE_RM_FLATFILES_ID 3 #define TWOPHASE_RM_NOTIFY_ID 4 -#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID +#define TWOPHASE_RM_PGSTAT_ID 5 +#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_PGSTAT_ID extern const TwoPhaseCallback twophase_recover_callbacks[]; extern const TwoPhaseCallback twophase_postcommit_callbacks[]; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 694ee44db19..476fd47dc7b 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -5,7 +5,7 @@ * * Copyright (c) 2001-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.58 2007/04/30 16:37:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $ * ---------- */ #ifndef PGSTAT_H @@ -40,6 +40,90 @@ typedef enum StatMsgType */ typedef int64 PgStat_Counter; +/* ---------- + * PgStat_TableCounts The actual per-table counts kept by a backend + * + * This struct should contain only actual event counters, because we memcmp + * it against zeroes to detect whether there are any counts to transmit. + * It is a component of PgStat_TableStatus (within-backend state) and + * PgStat_TableEntry (the transmitted message format). + * + * Note: for a table, tuples_returned is the number of tuples successfully + * fetched by heap_getnext, while tuples_fetched is the number of tuples + * successfully fetched by heap_fetch under the control of bitmap indexscans. + * For an index, tuples_returned is the number of index entries returned by + * the index AM, while tuples_fetched is the number of tuples successfully + * fetched by heap_fetch under the control of simple indexscans for this index. + * + * tuples_inserted/tuples_updated/tuples_deleted count attempted actions, + * regardless of whether the transaction committed. new_live_tuples and + * new_dead_tuples are properly adjusted depending on commit or abort. + * ---------- + */ +typedef struct PgStat_TableCounts +{ + PgStat_Counter t_numscans; + + PgStat_Counter t_tuples_returned; + PgStat_Counter t_tuples_fetched; + + PgStat_Counter t_tuples_inserted; + PgStat_Counter t_tuples_updated; + PgStat_Counter t_tuples_deleted; + + PgStat_Counter t_new_live_tuples; + PgStat_Counter t_new_dead_tuples; + + PgStat_Counter t_blocks_fetched; + PgStat_Counter t_blocks_hit; +} PgStat_TableCounts; + + +/* ------------------------------------------------------------ + * Structures kept in backend local memory while accumulating counts + * ------------------------------------------------------------ + */ + + +/* ---------- + * PgStat_TableStatus Per-table status within a backend + * + * Most of the event counters are nontransactional, ie, we count events + * in committed and aborted transactions alike. For these, we just count + * directly in the PgStat_TableStatus. However, new_live_tuples and + * new_dead_tuples must be derived from tuple insertion and deletion counts + * with awareness of whether the transaction or subtransaction committed or + * aborted. Hence, we also keep a stack of per-(sub)transaction status + * records for every table modified in the current transaction. At commit + * or abort, we propagate tuples_inserted and tuples_deleted up to the + * parent subtransaction level, or out to the parent PgStat_TableStatus, + * as appropriate. + * ---------- + */ +typedef struct PgStat_TableStatus +{ + Oid t_id; /* table's OID */ + bool t_shared; /* is it a shared catalog? */ + struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */ + PgStat_TableCounts t_counts; /* event counts to be sent */ +} PgStat_TableStatus; + +/* ---------- + * PgStat_TableXactStatus Per-table, per-subtransaction status + * ---------- + */ +typedef struct PgStat_TableXactStatus +{ + PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */ + PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */ + int nest_level; /* subtransaction nest level */ + /* links to other structs for same relation: */ + struct PgStat_TableXactStatus *upper; /* next higher subxact if any */ + PgStat_TableStatus *parent; /* per-table status */ + /* structs of same subxact level are linked here: */ + struct PgStat_TableXactStatus *next; /* next of same subxact */ +} PgStat_TableXactStatus; + /* ------------------------------------------------------------ * Message formats follow @@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy /* ---------- * PgStat_TableEntry Per-table info in a MsgTabstat - * - * Note: for a table, tuples_returned is the number of tuples successfully - * fetched by heap_getnext, while tuples_fetched is the number of tuples - * successfully fetched by heap_fetch under the control of bitmap indexscans. - * For an index, tuples_returned is the number of index entries returned by - * the index AM, while tuples_fetched is the number of tuples successfully - * fetched by heap_fetch under the control of simple indexscans for this index. * ---------- */ typedef struct PgStat_TableEntry { Oid t_id; - - PgStat_Counter t_numscans; - - PgStat_Counter t_tuples_returned; - PgStat_Counter t_tuples_fetched; - - PgStat_Counter t_tuples_inserted; - PgStat_Counter t_tuples_updated; - PgStat_Counter t_tuples_deleted; - - PgStat_Counter t_blocks_fetched; - PgStat_Counter t_blocks_hit; + PgStat_TableCounts t_counts; } PgStat_TableEntry; /* ---------- @@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel; extern bool pgstat_collect_blocklevel; extern bool pgstat_collect_querystring; +/* + * BgWriter statistics counters are updated directly by bgwriter and bufmgr + */ +extern PgStat_MsgBgWriter BgWriterStats; /* ---------- * Functions called from postmaster @@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what); extern void pgstat_report_txn_timestamp(TimestampTz tstamp); extern void pgstat_report_waiting(bool waiting); -extern void pgstat_initstats(PgStat_Info *stats, Relation rel); +extern void pgstat_initstats(Relation rel); +/* nontransactional event counts are simple enough to inline */ -#define pgstat_count_heap_scan(s) \ +#define pgstat_count_heap_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans++; \ } while (0) /* kluge for bitmap scans: */ -#define pgstat_discount_heap_scan(s) \ +#define pgstat_discount_heap_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans--; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans--; \ } while (0) -#define pgstat_count_heap_getnext(s) \ +#define pgstat_count_heap_getnext(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_returned++; \ } while (0) -#define pgstat_count_heap_fetch(s) \ +#define pgstat_count_heap_fetch(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_fetched++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_fetched++; \ } while (0) -#define pgstat_count_heap_insert(s) \ +#define pgstat_count_index_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_inserted++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans++; \ } while (0) -#define pgstat_count_heap_update(s) \ +#define pgstat_count_index_tuples(rel, n) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_updated++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \ } while (0) -#define pgstat_count_heap_delete(s) \ +#define pgstat_count_buffer_read(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_deleted++; \ + if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_blocks_fetched++; \ } while (0) -#define pgstat_count_index_scan(s) \ +#define pgstat_count_buffer_hit(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ - } while (0) -#define pgstat_count_index_tuples(s, n) \ - do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned += (n); \ - } while (0) -#define pgstat_count_buffer_read(s,r) \ - do { \ - if (pgstat_collect_blocklevel) { \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \ - else { \ - pgstat_initstats((s), (r)); \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \ - } \ - } \ - } while (0) -#define pgstat_count_buffer_hit(s,r) \ - do { \ - if (pgstat_collect_blocklevel) { \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \ - else { \ - pgstat_initstats((s), (r)); \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \ - } \ - } \ + if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_blocks_hit++; \ } while (0) +extern void pgstat_count_heap_insert(Relation rel); +extern void pgstat_count_heap_update(Relation rel); +extern void pgstat_count_heap_delete(Relation rel); + +extern void AtEOXact_PgStat(bool isCommit); +extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth); + +extern void AtPrepare_PgStat(void); +extern void PostPrepare_PgStat(void); + +extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, + void *recdata, uint32 len); +extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, + void *recdata, uint32 len); -extern void pgstat_count_xact_commit(void); -extern void pgstat_count_xact_rollback(void); extern void pgstat_send_bgwriter(void); /* ---------- diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 33795de2bf8..bc6bf190b86 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.101 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -89,15 +89,6 @@ typedef struct TriggerDesc } TriggerDesc; -/* - * Same for the statistics collector data in Relation and scan data. - */ -typedef struct PgStat_Info -{ - void *tabentry; -} PgStat_Info; - - /* * Cached lookup information for the index access method functions defined * by the pg_am row associated with an index relation. @@ -200,8 +191,8 @@ typedef struct RelationData List *rd_indpred; /* index predicate tree, if any */ void *rd_amcache; /* available for use by index AM */ - /* statistics collection area */ - PgStat_Info pgstat_info; + /* use "struct" here to avoid needing to include pgstat.h: */ + struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ } RelationData; typedef RelationData *Relation;