diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba30b..c79e02971b4f6 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -901,21 +901,11 @@ pgss_planner(Query *parse, && pgss_track_planning && query_string && parse->queryId != UINT64CONST(0)) { + InstrumentUsage *usage; instr_time start; instr_time duration; - BufferUsage bufusage_start, - bufusage; - WalUsage walusage_start, - walusage; - /* We need to track buffer usage as the planner can access them. */ - bufusage_start = pgBufferUsage; - - /* - * Similarly the planner could write some WAL records in some cases - * (e.g. setting a hint bit with those being WAL-logged) - */ - walusage_start = pgWalUsage; + InstrUsageStart(); INSTR_TIME_SET_CURRENT(start); nesting_level++; @@ -936,14 +926,7 @@ pgss_planner(Query *parse, INSTR_TIME_SET_CURRENT(duration); INSTR_TIME_SUBTRACT(duration, start); - - /* calc differences of buffer counters. */ - memset(&bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); - - /* calc differences of WAL counters. */ - memset(&walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start); + usage = InstrUsageStop(); pgss_store(query_string, parse->queryId, @@ -952,8 +935,8 @@ pgss_planner(Query *parse, PGSS_PLAN, INSTR_TIME_GET_MILLISEC(duration), 0, - &bufusage, - &walusage, + &usage->bufusage, + &usage->walusage, NULL, NULL, 0, @@ -1094,8 +1077,8 @@ pgss_ExecutorEnd(QueryDesc *queryDesc) PGSS_EXEC, queryDesc->totaltime->total * 1000.0, /* convert to msec */ queryDesc->estate->es_total_processed, - &queryDesc->totaltime->bufusage, - &queryDesc->totaltime->walusage, + &queryDesc->totaltime->instrusage.bufusage, + &queryDesc->totaltime->instrusage.walusage, queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL, NULL, queryDesc->estate->es_parallel_workers_to_launch, @@ -1158,16 +1141,12 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, !IsA(parsetree, ExecuteStmt) && !IsA(parsetree, PrepareStmt)) { + InstrumentUsage *usage; instr_time start; instr_time duration; uint64 rows; - BufferUsage bufusage_start, - bufusage; - WalUsage walusage_start, - walusage; - bufusage_start = pgBufferUsage; - walusage_start = pgWalUsage; + InstrUsageStart(); INSTR_TIME_SET_CURRENT(start); nesting_level++; @@ -1200,6 +1179,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, INSTR_TIME_SET_CURRENT(duration); INSTR_TIME_SUBTRACT(duration, start); + usage = InstrUsageStop(); /* * Track the total number of rows retrieved or affected by the utility @@ -1212,14 +1192,6 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ? qc->nprocessed : 0; - /* calc differences of buffer counters. */ - memset(&bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); - - /* calc differences of WAL counters. */ - memset(&walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start); - pgss_store(queryString, saved_queryId, saved_stmt_location, @@ -1227,8 +1199,8 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, PGSS_EXEC, INSTR_TIME_GET_MILLISEC(duration), rows, - &bufusage, - &walusage, + &usage->bufusage, + &usage->walusage, NULL, NULL, 0, diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 01e1db7f856be..8d47978a51c1e 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -47,8 +47,7 @@ #define PARALLEL_KEY_BRIN_SHARED UINT64CONST(0xB000000000000001) #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005) +#define PARALLEL_KEY_INSTR_USAGE UINT64CONST(0xB000000000000004) /* * Status for index builds performed in parallel. This is allocated in a @@ -144,8 +143,7 @@ typedef struct BrinLeader BrinShared *brinshared; Sharedsort *sharedsort; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage; } BrinLeader; /* @@ -2371,8 +2369,7 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, BrinShared *brinshared; Sharedsort *sharedsort; BrinLeader *brinleader = (BrinLeader *) palloc0(sizeof(BrinLeader)); - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage = NULL; bool leaderparticipates = true; int querylen; @@ -2414,19 +2411,14 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, shm_toc_estimate_keys(&pcxt->estimator, 2); /* - * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE - * and PARALLEL_KEY_BUFFER_USAGE. - * - * If there are no extensions loaded that care, we could skip this. We - * have no way of knowing whether anyone's looking at pgWalUsage or - * pgBufferUsage, so do it unconditionally. + * Estimate space for InstrUsage -- PARALLEL_KEY_INSTR_USAGE, if needed. */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); + if (InstrumentUsageActive()) + { + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ if (debug_query_string) @@ -2501,12 +2493,12 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, * Allocate space for each worker's WalUsage and BufferUsage; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + if (InstrumentUsageActive()) + { + instrusage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTR_USAGE, instrusage); + } /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -2517,8 +2509,7 @@ _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index, brinleader->brinshared = brinshared; brinleader->sharedsort = sharedsort; brinleader->snapshot = snapshot; - brinleader->walusage = walusage; - brinleader->bufferusage = bufferusage; + brinleader->instrusage = instrusage; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -2553,11 +2544,14 @@ _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state) WaitForParallelWorkersToFinish(brinleader->pcxt); /* - * Next, accumulate WAL usage. (This must wait for the workers to finish, - * or we might get incomplete data.) + * Next, accumulate usage data. (This must wait for the workers to + * finish, or we might get incomplete data.) */ - for (i = 0; i < brinleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&brinleader->bufferusage[i], &brinleader->walusage[i]); + if (InstrumentUsageActive()) + { + for (i = 0; i < brinleader->pcxt->nworkers_launched; i++) + InstrUsageAddToCurrent(&brinleader->instrusage[i]); + } /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(brinleader->snapshot)) @@ -2870,8 +2864,7 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *shm_usage; int sortmem; /* @@ -2918,8 +2911,10 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc) sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false); tuplesort_attach_shared(sharedsort, seg); - /* Prepare to track buffer usage during parallel execution */ - InstrStartParallelQuery(); + /* Prepare to track buffer usage during parallel execution, if needed */ + shm_usage = shm_toc_lookup(toc, PARALLEL_KEY_INSTR_USAGE, true); + if (shm_usage) + InstrUsageStart(); /* * Might as well use reliable figure when doling out maintenance_work_mem @@ -2932,10 +2927,12 @@ _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc) heapRel, indexRel, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + if (shm_usage) + { + InstrumentUsage *usage = InstrUsageStop(); + + memcpy(&shm_usage[ParallelWorkerNumber], usage, sizeof(InstrumentUsage)); + } index_close(indexRel, indexLockmode); table_close(heapRel, heapLockmode); diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index a65acd8910493..733416faf0e49 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -39,8 +39,7 @@ #define PARALLEL_KEY_GIN_SHARED UINT64CONST(0xB000000000000001) #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xB000000000000002) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xB000000000000003) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xB000000000000004) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xB000000000000005) +#define PARALLEL_KEY_INSTR_USAGE UINT64CONST(0xB000000000000004) /* * Status for index builds performed in parallel. This is allocated in a @@ -132,8 +131,7 @@ typedef struct GinLeader GinBuildShared *ginshared; Sharedsort *sharedsort; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage; } GinLeader; typedef struct @@ -904,8 +902,7 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, GinBuildShared *ginshared; Sharedsort *sharedsort; GinLeader *ginleader = (GinLeader *) palloc0(sizeof(GinLeader)); - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage = NULL; bool leaderparticipates = true; int querylen; @@ -946,19 +943,14 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, shm_toc_estimate_keys(&pcxt->estimator, 2); /* - * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE - * and PARALLEL_KEY_BUFFER_USAGE. - * - * If there are no extensions loaded that care, we could skip this. We - * have no way of knowing whether anyone's looking at pgWalUsage or - * pgBufferUsage, so do it unconditionally. + * Estimate space for InstrUsage -- PARALLEL_KEY_INSTR_USAGE, if needed. */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); + if (InstrumentUsageActive()) + { + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ if (debug_query_string) @@ -1028,12 +1020,12 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, * Allocate space for each worker's WalUsage and BufferUsage; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + if (InstrumentUsageActive()) + { + instrusage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTR_USAGE, instrusage); + } /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -1044,8 +1036,7 @@ _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index, ginleader->ginshared = ginshared; ginleader->sharedsort = sharedsort; ginleader->snapshot = snapshot; - ginleader->walusage = walusage; - ginleader->bufferusage = bufferusage; + ginleader->instrusage = instrusage; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -1083,8 +1074,11 @@ _gin_end_parallel(GinLeader *ginleader, GinBuildState *state) * Next, accumulate WAL usage. (This must wait for the workers to finish, * or we might get incomplete data.) */ - for (i = 0; i < ginleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]); + if (InstrumentUsageActive()) + { + for (i = 0; i < ginleader->pcxt->nworkers_launched; i++) + InstrUsageAddToCurrent(&ginleader->instrusage[i]); + } /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(ginleader->snapshot)) @@ -2079,8 +2073,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *shm_usage; int sortmem; /* @@ -2147,7 +2140,9 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) tuplesort_attach_shared(sharedsort, seg); /* Prepare to track buffer usage during parallel execution */ - InstrStartParallelQuery(); + shm_usage = shm_toc_lookup(toc, PARALLEL_KEY_INSTR_USAGE, true); + if (shm_usage) + InstrUsageStart(); /* * Might as well use reliable figure when doling out maintenance_work_mem @@ -2160,10 +2155,12 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) heapRel, indexRel, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + if (shm_usage) + { + InstrumentUsage *usage = InstrUsageStop(); + + memcpy(&shm_usage[ParallelWorkerNumber], usage, sizeof(InstrumentUsage)); + } index_close(indexRel, indexLockmode); table_close(heapRel, heapLockmode); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f28326bad0951..2b16f291971ab 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -629,8 +629,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, TimestampTz starttime = 0; PgStat_Counter startreadtime = 0, startwritetime = 0; - WalUsage startwalusage = pgWalUsage; - BufferUsage startbufferusage = pgBufferUsage; ErrorContextCallback errcallback; char **indnames = NULL; @@ -639,6 +637,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, params->log_min_duration >= 0)); if (instrument) { + InstrUsageStart(); pg_rusage_init(&ru0); if (track_io_timing) { @@ -945,6 +944,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, if (instrument) { TimestampTz endtime = GetCurrentTimestamp(); + InstrumentUsage *usage; + + /* support summary tracking of utility statements by extensions */ + InstrUsageAccumToPrevious(); + usage = InstrUsageStop(); if (verbose || params->log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, @@ -952,8 +956,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, { long secs_dur; int usecs_dur; - WalUsage walusage; - BufferUsage bufferusage; StringInfoData buf; char *msgfmt; int32 diff; @@ -964,17 +966,13 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, int64 total_blks_dirtied; TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur); - memset(&walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage); - memset(&bufferusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage); - - total_blks_hit = bufferusage.shared_blks_hit + - bufferusage.local_blks_hit; - total_blks_read = bufferusage.shared_blks_read + - bufferusage.local_blks_read; - total_blks_dirtied = bufferusage.shared_blks_dirtied + - bufferusage.local_blks_dirtied; + + total_blks_hit = usage->bufusage.shared_blks_hit + + usage->bufusage.local_blks_hit; + total_blks_read = usage->bufusage.shared_blks_read + + usage->bufusage.local_blks_read; + total_blks_dirtied = usage->bufusage.shared_blks_dirtied + + usage->bufusage.local_blks_dirtied; initStringInfo(&buf); if (verbose) @@ -1136,10 +1134,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, total_blks_dirtied); appendStringInfo(&buf, _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRId64 " buffers full\n"), - walusage.wal_records, - walusage.wal_fpi, - walusage.wal_bytes, - walusage.wal_buffers_full); + usage->walusage.wal_records, + usage->walusage.wal_fpi, + usage->walusage.wal_bytes, + usage->walusage.wal_buffers_full); appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0)); ereport(verbose ? INFO : LOG, diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 3794cc924ad46..fb07219da32e1 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -62,8 +62,7 @@ #define PARALLEL_KEY_TUPLESORT UINT64CONST(0xA000000000000002) #define PARALLEL_KEY_TUPLESORT_SPOOL2 UINT64CONST(0xA000000000000003) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000004) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xA000000000000005) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xA000000000000006) +#define PARALLEL_KEY_INSTR_USAGE UINT64CONST(0xA000000000000005) /* * DISABLE_LEADER_PARTICIPATION disables the leader's participation in @@ -191,8 +190,7 @@ typedef struct BTLeader Sharedsort *sharedsort; Sharedsort *sharedsort2; Snapshot snapshot; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage; } BTLeader; /* @@ -1406,8 +1404,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) Sharedsort *sharedsort2; BTSpool *btspool = buildstate->spool; BTLeader *btleader = (BTLeader *) palloc0(sizeof(BTLeader)); - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *instrusage = NULL; bool leaderparticipates = true; int querylen; @@ -1462,17 +1459,13 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) /* * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE * and PARALLEL_KEY_BUFFER_USAGE. - * - * If there are no extensions loaded that care, we could skip this. We - * have no way of knowing whether anyone's looking at pgWalUsage or - * pgBufferUsage, so do it unconditionally. */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); + if (InstrumentUsageActive()) + { + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */ if (debug_query_string) @@ -1561,12 +1554,12 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) * Allocate space for each worker's WalUsage and BufferUsage; no need to * initialize. */ - walusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage); - bufferusage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage); + if (InstrumentUsageActive()) + { + instrusage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTR_USAGE, instrusage); + } /* Launch workers, saving status for leader/caller */ LaunchParallelWorkers(pcxt); @@ -1578,8 +1571,7 @@ _bt_begin_parallel(BTBuildState *buildstate, bool isconcurrent, int request) btleader->sharedsort = sharedsort; btleader->sharedsort2 = sharedsort2; btleader->snapshot = snapshot; - btleader->walusage = walusage; - btleader->bufferusage = bufferusage; + btleader->instrusage = instrusage; /* If no workers were successfully launched, back out (do serial build) */ if (pcxt->nworkers_launched == 0) @@ -1617,8 +1609,11 @@ _bt_end_parallel(BTLeader *btleader) * Next, accumulate WAL usage. (This must wait for the workers to finish, * or we might get incomplete data.) */ - for (i = 0; i < btleader->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&btleader->bufferusage[i], &btleader->walusage[i]); + if (InstrumentUsageActive()) + { + for (i = 0; i < btleader->pcxt->nworkers_launched; i++) + InstrUsageAddToCurrent(&btleader->instrusage[i]); + } /* Free last reference to MVCC snapshot, if one was used */ if (IsMVCCSnapshot(btleader->snapshot)) @@ -1751,8 +1746,7 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) Relation indexRel; LOCKMODE heapLockmode; LOCKMODE indexLockmode; - WalUsage *walusage; - BufferUsage *bufferusage; + InstrumentUsage *shm_usage; int sortmem; #ifdef BTREE_BUILD_STATS @@ -1825,8 +1819,10 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) tuplesort_attach_shared(sharedsort2, seg); } - /* Prepare to track buffer usage during parallel execution */ - InstrStartParallelQuery(); + /* Prepare to track buffer usage during parallel execution, if needed */ + shm_usage = shm_toc_lookup(toc, PARALLEL_KEY_INSTR_USAGE, true); + if (shm_usage) + InstrUsageStart(); /* Perform sorting of spool, and possibly a spool2 */ sortmem = maintenance_work_mem / btshared->scantuplesortstates; @@ -1834,10 +1830,12 @@ _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc) sharedsort2, sortmem, false); /* Report WAL/buffer usage during parallel execution */ - bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber], - &walusage[ParallelWorkerNumber]); + if (shm_usage) + { + InstrumentUsage *usage = InstrUsageStop(); + + memcpy(&shm_usage[ParallelWorkerNumber], usage, sizeof(InstrumentUsage)); + } #ifdef BTREE_BUILD_STATS if (log_btree_build_stats) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f76541..08106ef8fb7ca 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -4916,6 +4916,25 @@ AbortOutOfAnyTransaction(void) * we need to shut down before doing CleanupTransaction. */ AtAbort_Portals(); + + /* + * Release any resources that were initialized after an + * earlier AbortTransaction and as such would otherwise leak. + * + * AtAbort_Portals initially unsets the portal's resowner + * since it assumes it called inside AbortTransaction, which + * is not the case here. + */ + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_BEFORE_LOCKS, + false, true); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_LOCKS, + false, true); + ResourceOwnerRelease(TopTransactionResourceOwner, + RESOURCE_RELEASE_AFTER_LOCKS, + false, true); + CleanupTransaction(); s->blockState = TBLOCK_DEFAULT; break; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1914859b2eed7..ec067dcfc2ffd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1089,6 +1089,9 @@ XLogInsertRecord(XLogRecData *rdata, /* Report WAL traffic to the instrumentation. */ if (inserted) { + INSTR_WALUSAGE_ADD(wal_bytes, rechdr->xl_tot_len); + INSTR_WALUSAGE_INCR(wal_records); + INSTR_WALUSAGE_ADD(wal_fpi, num_fpi); pgWalUsage.wal_bytes += rechdr->xl_tot_len; pgWalUsage.wal_records++; pgWalUsage.wal_fpi += num_fpi; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 739a92bdcc1ca..1b971c18ccfe7 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2131,6 +2131,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) indexrelid; LOCKTAG heaplocktag; LOCKMODE lockmode; + InstrumentUsage *instrusage = NULL; /* * A temporary relation uses a non-concurrent DROP. Other backends can't @@ -2255,10 +2256,19 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Now we must wait until no running transaction could be using the * index for a query. Use AccessExclusiveLock here to check for @@ -2288,6 +2298,9 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) /* Finish invalidation of index and mark it as dead */ index_concurrently_set_dead(heapId, indexId); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); /* @@ -2297,6 +2310,12 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Wait till every transaction that saw the old index state has * finished. See above about progress reporting. diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4fffb76e55735..19d783a748d68 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -303,9 +303,6 @@ do_analyze_rel(Relation onerel, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; - WalUsage startwalusage = pgWalUsage; - BufferUsage startbufferusage = pgBufferUsage; - BufferUsage bufferusage; PgStat_Counter startreadtime = 0; PgStat_Counter startwritetime = 0; @@ -355,6 +352,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, startwritetime = pgStatBlockWriteTime; } + InstrUsageStart(); pg_rusage_init(&ru0); } @@ -735,13 +733,17 @@ do_analyze_rel(Relation onerel, VacuumParams *params, if (instrument) { TimestampTz endtime = GetCurrentTimestamp(); + InstrumentUsage *usage; + + /* support summary tracking of utility statements by extensions */ + InstrUsageAccumToPrevious(); + usage = InstrUsageStop(); if (verbose || params->log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, params->log_min_duration)) { long delay_in_ms; - WalUsage walusage; double read_rate = 0; double write_rate = 0; char *msgfmt; @@ -750,17 +752,12 @@ do_analyze_rel(Relation onerel, VacuumParams *params, int64 total_blks_read; int64 total_blks_dirtied; - memset(&bufferusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage); - memset(&walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage); - - total_blks_hit = bufferusage.shared_blks_hit + - bufferusage.local_blks_hit; - total_blks_read = bufferusage.shared_blks_read + - bufferusage.local_blks_read; - total_blks_dirtied = bufferusage.shared_blks_dirtied + - bufferusage.local_blks_dirtied; + total_blks_hit = usage->bufusage.shared_blks_hit + + usage->bufusage.local_blks_hit; + total_blks_read = usage->bufusage.shared_blks_read + + usage->bufusage.local_blks_read; + total_blks_dirtied = usage->bufusage.shared_blks_dirtied + + usage->bufusage.local_blks_dirtied; /* * We do not expect an analyze to take > 25 days and it simplifies @@ -833,10 +830,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params, total_blks_dirtied); appendStringInfo(&buf, _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRId64 " buffers full\n"), - walusage.wal_records, - walusage.wal_fpi, - walusage.wal_bytes, - walusage.wal_buffers_full); + usage->walusage.wal_records, + usage->walusage.wal_fpi, + usage->walusage.wal_bytes, + usage->walusage.wal_buffers_full); appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0)); ereport(verbose ? INFO : LOG, diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 54a08e4102e14..57839f61ccc01 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -245,9 +245,6 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) /* Do the job. */ cluster_multiple_rels(rtcs, ¶ms); - /* Start a new transaction for the cleanup work. */ - StartTransactionCommand(); - /* Clean up working storage */ MemoryContextDelete(cluster_context); } @@ -263,6 +260,10 @@ static void cluster_multiple_rels(List *rtcs, ClusterParams *params) { ListCell *lc; + InstrumentUsage *instrusage = NULL; + + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); /* Commit to get out of starting transaction */ PopActiveSnapshot(); @@ -277,6 +278,12 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params) /* Start a new transaction for each relation. */ StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); @@ -286,9 +293,21 @@ cluster_multiple_rels(List *rtcs, ClusterParams *params) cluster_rel(rel, rtc->indexOid, params); /* cluster_rel closes the relation, but keeps lock */ + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); } + + /* Start a new transaction for the cleanup work. */ + StartTransactionCommand(); + + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } } /* diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5fbbcdaabb1d2..0c62e510f0781 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1993,6 +1993,7 @@ movedb(const char *dbname, const char *tblspcname) DIR *dstdir; struct dirent *xlde; movedb_failure_params fparms; + InstrumentUsage *initial_usage = NULL; /* * Look up the target database's OID, and get exclusive lock on it. We @@ -2245,6 +2246,9 @@ movedb(const char *dbname, const char *tblspcname) PG_END_ENSURE_ERROR_CLEANUP(movedb_failure_callback, PointerGetDatum(&fparms)); + if (InstrumentUsageActive()) + initial_usage = InstrUsageStop(); + /* * Commit the transaction so that the pg_database update is committed. If * we crash while removing files, the database won't be corrupt, we'll @@ -2262,6 +2266,12 @@ movedb(const char *dbname, const char *tblspcname) /* Start new transaction for the remaining work; don't need a snapshot */ StartTransactionCommand(); + if (initial_usage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(initial_usage); + } + /* * Remove files from the old tablespace */ diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 786ee865f147d..095f72c51ee5a 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -143,7 +143,7 @@ static void show_instrumentation_count(const char *qlabel, int which, static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage); -static void show_buffer_usage(ExplainState *es, const BufferUsage *usage); +static void show_buffer_usage(ExplainState *es, const BufferUsage *usage, hyperLogLogState *shared_blks_hit_distinct); static void show_wal_usage(ExplainState *es, const WalUsage *usage); static void show_memory_counters(ExplainState *es, const MemoryContextCounters *mem_counters); @@ -323,8 +323,7 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, PlannedStmt *plan; instr_time planstart, planduration; - BufferUsage bufusage_start, - bufusage; + InstrumentUsage *usage = NULL; MemoryContextCounters mem_counters; MemoryContext planner_ctx = NULL; MemoryContext saved_ctx = NULL; @@ -346,7 +345,7 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, } if (es->buffers) - bufusage_start = pgBufferUsage; + InstrUsageStart(); INSTR_TIME_SET_CURRENT(planstart); /* plan the query */ @@ -361,17 +360,17 @@ standard_ExplainOneQuery(Query *query, int cursorOptions, MemoryContextMemConsumed(planner_ctx, &mem_counters); } - /* calc differences of buffer counters. */ if (es->buffers) { - memset(&bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); + /* support summary tracking of utility statements by extensions */ + InstrUsageAccumToPrevious(); + usage = InstrUsageStop(); } /* run it (if needed) and produce output */ ExplainOnePlan(plan, NULL, NULL, -1, into, es, queryString, params, queryEnv, - &planduration, (es->buffers ? &bufusage : NULL), + &planduration, (es->buffers ? &usage->bufusage : NULL), es->memory ? &mem_counters : NULL); } @@ -517,6 +516,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, if (es->buffers) instrument_option |= INSTRUMENT_BUFFERS; + if (es->buffers_distinct) + instrument_option |= INSTRUMENT_SHARED_HIT_DISTINCT; if (es->wal) instrument_option |= INSTRUMENT_WAL; @@ -622,7 +623,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan, } if (bufusage) - show_buffer_usage(es, bufusage); + show_buffer_usage(es, bufusage, NULL); if (mem_counters) show_memory_counters(es, mem_counters); @@ -1043,7 +1044,7 @@ ExplainPrintSerialize(ExplainState *es, SerializeMetrics *metrics) if (es->buffers && peek_buffer_usage(es, &metrics->bufferUsage)) { es->indent++; - show_buffer_usage(es, &metrics->bufferUsage); + show_buffer_usage(es, &metrics->bufferUsage, NULL); es->indent--; } } @@ -1057,7 +1058,7 @@ ExplainPrintSerialize(ExplainState *es, SerializeMetrics *metrics) BYTES_TO_KILOBYTES(metrics->bytesSent), es); ExplainPropertyText("Format", format, es); if (es->buffers) - show_buffer_usage(es, &metrics->bufferUsage); + show_buffer_usage(es, &metrics->bufferUsage, NULL); } ExplainCloseGroup("Serialization", "Serialization", true, es); @@ -2295,9 +2296,9 @@ ExplainNode(PlanState *planstate, List *ancestors, /* Show buffer/WAL usage */ if (es->buffers && planstate->instrument) - show_buffer_usage(es, &planstate->instrument->bufusage); + show_buffer_usage(es, &planstate->instrument->instrusage.bufusage, planstate->instrument->instrusage.shared_blks_hit_distinct); if (es->wal && planstate->instrument) - show_wal_usage(es, &planstate->instrument->walusage); + show_wal_usage(es, &planstate->instrument->instrusage.walusage); /* Prepare per-worker buffer/WAL usage */ if (es->workers_state && (es->buffers || es->wal) && es->verbose) @@ -2314,9 +2315,9 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainOpenWorker(n, es); if (es->buffers) - show_buffer_usage(es, &instrument->bufusage); + show_buffer_usage(es, &instrument->instrusage.bufusage, NULL); if (es->wal) - show_wal_usage(es, &instrument->walusage); + show_wal_usage(es, &instrument->instrusage.walusage); ExplainCloseWorker(n, es); } } @@ -4095,7 +4096,7 @@ peek_buffer_usage(ExplainState *es, const BufferUsage *usage) * Show buffer usage details. This better be sync with peek_buffer_usage. */ static void -show_buffer_usage(ExplainState *es, const BufferUsage *usage) +show_buffer_usage(ExplainState *es, const BufferUsage *usage, hyperLogLogState *shared_blks_hit_distinct) { if (es->format == EXPLAIN_FORMAT_TEXT) { @@ -4128,6 +4129,9 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage) if (usage->shared_blks_hit > 0) appendStringInfo(es->str, " hit=%" PRId64, usage->shared_blks_hit); + if (shared_blks_hit_distinct) + appendStringInfo(es->str, " hit distinct=%lld", + (long long) estimateHyperLogLog(shared_blks_hit_distinct)); if (usage->shared_blks_read > 0) appendStringInfo(es->str, " read=%" PRId64, usage->shared_blks_read); @@ -4218,6 +4222,9 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage) { ExplainPropertyInteger("Shared Hit Blocks", NULL, usage->shared_blks_hit, es); + if (shared_blks_hit_distinct) + ExplainPropertyInteger("Shared Hit Distinct Blocks", NULL, + estimateHyperLogLog(shared_blks_hit_distinct), es); ExplainPropertyInteger("Shared Read Blocks", NULL, usage->shared_blks_read, es); ExplainPropertyInteger("Shared Dirtied Blocks", NULL, diff --git a/src/backend/commands/explain_dr.c b/src/backend/commands/explain_dr.c index 5715546cf437b..ef213c420e4df 100644 --- a/src/backend/commands/explain_dr.c +++ b/src/backend/commands/explain_dr.c @@ -111,13 +111,12 @@ serializeAnalyzeReceive(TupleTableSlot *slot, DestReceiver *self) int natts = typeinfo->natts; instr_time start, end; - BufferUsage instr_start; /* only measure time, buffers if requested */ if (myState->es->timing) INSTR_TIME_SET_CURRENT(start); if (myState->es->buffers) - instr_start = pgBufferUsage; + InstrUsageStart(); /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) @@ -194,9 +193,14 @@ serializeAnalyzeReceive(TupleTableSlot *slot, DestReceiver *self) /* Update buffer metrics */ if (myState->es->buffers) - BufferUsageAccumDiff(&myState->metrics.bufferUsage, - &pgBufferUsage, - &instr_start); + { + InstrumentUsage *usage; + + /* support summary tracking of utility statements by extensions */ + InstrUsageAccumToPrevious(); + usage = InstrUsageStop(); + BufferUsageAdd(&myState->metrics.bufferUsage, &usage->bufusage); + } return true; } diff --git a/src/backend/commands/explain_state.c b/src/backend/commands/explain_state.c index 60d98d63a62e2..5d307ef16729e 100644 --- a/src/backend/commands/explain_state.c +++ b/src/backend/commands/explain_state.c @@ -95,7 +95,13 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate) else if (strcmp(opt->defname, "buffers") == 0) { buffers_set = true; - es->buffers = defGetBoolean(opt); + if (opt->arg != NULL && strcmp(defGetString(opt), "distinct") == 0) + { + es->buffers = true; + es->buffers_distinct = true; + } + else + es->buffers = defGetBoolean(opt); } else if (strcmp(opt->defname, "wal") == 0) es->wal = defGetBoolean(opt); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index d962fe392cd27..75e295828b82c 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -590,6 +590,7 @@ DefineIndex(Oid tableId, Oid root_save_userid; int root_save_sec_context; int root_save_nestlevel; + InstrumentUsage *instrusage = NULL; root_save_nestlevel = NewGUCNestLevel(); @@ -1641,10 +1642,19 @@ DefineIndex(Oid tableId, */ LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* Tell concurrent index builds to ignore us, if index qualifies */ if (safe_index) set_indexsafe_procflags(); @@ -1708,6 +1718,9 @@ DefineIndex(Oid tableId, /* Perform concurrent build of index */ index_concurrently_build(tableId, indexRelationId); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + /* we can do away with our snapshot */ PopActiveSnapshot(); @@ -1717,6 +1730,12 @@ DefineIndex(Oid tableId, CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* Tell concurrent index builds to ignore us, if index qualifies */ if (safe_index) set_indexsafe_procflags(); @@ -1763,6 +1782,9 @@ DefineIndex(Oid tableId, */ limitXmin = snapshot->xmin; + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); UnregisterSnapshot(snapshot); @@ -1777,6 +1799,12 @@ DefineIndex(Oid tableId, CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* Tell concurrent index builds to ignore us, if index qualifies */ if (safe_index) set_indexsafe_procflags(); @@ -3408,6 +3436,10 @@ static void ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const ReindexParams *params) { ListCell *l; + InstrumentUsage *instrusage = NULL; + + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); PopActiveSnapshot(); CommitTransactionCommand(); @@ -3420,12 +3452,20 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); /* check if the relation still exists */ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid))) { + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); PopActiveSnapshot(); CommitTransactionCommand(); continue; @@ -3500,10 +3540,19 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind PopActiveSnapshot(); } + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + CommitTransactionCommand(); } StartTransactionCommand(); + + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } } @@ -3560,6 +3609,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein PROGRESS_CREATEIDX_ACCESS_METHOD_OID }; int64 progress_vals[4]; + InstrumentUsage *instrusage = NULL; /* * Create a memory context that will survive forced transaction commits we @@ -4030,10 +4080,19 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock); } + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Because we don't take a snapshot in this transaction, there's no need * to set the PROC_IN_SAFE_IC flag here. @@ -4052,6 +4111,10 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_WAIT_1); WaitForLockersMultiple(lockTags, ShareLock, true); + + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + CommitTransactionCommand(); foreach(lc, newIndexIds) @@ -4061,6 +4124,12 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein /* Start new transaction for this index's concurrent build */ StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Check for user-requested abort. This is inside a transaction so as * xact.c does not issue a useless WARNING, and ensures that @@ -4089,12 +4158,21 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein /* Perform concurrent build of new index */ index_concurrently_build(newidx->tableId, newidx->indexId); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); } StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Because we don't take a snapshot or Xid in this transaction, there's no * need to set the PROC_IN_SAFE_IC flag here. @@ -4111,6 +4189,10 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE, PROGRESS_CREATEIDX_PHASE_WAIT_2); WaitForLockersMultiple(lockTags, ShareLock, true); + + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + CommitTransactionCommand(); foreach(lc, newIndexIds) @@ -4121,6 +4203,12 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Check for user-requested abort. This is inside a transaction so as * xact.c does not issue a useless WARNING, and ensures that @@ -4166,9 +4254,19 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein * transaction, and do our wait before any snapshot has been taken in * it. */ + + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * The index is now valid in the sense that it contains all currently * interesting tuples. But since it might not contain tuples deleted @@ -4182,6 +4280,9 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein PROGRESS_CREATEIDX_PHASE_WAIT_3); WaitForOlderSnapshots(limitXmin, true); + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + CommitTransactionCommand(); } @@ -4198,6 +4299,12 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Because this transaction only does catalog manipulations and doesn't do * any index operations, we can set the PROC_IN_SAFE_IC flag here @@ -4256,10 +4363,19 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein CommandCounterIncrement(); } + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + /* Commit this transaction and make index swaps visible */ CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no * real need for that, because we only acquire an Xid after the wait is @@ -4300,10 +4416,19 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein PopActiveSnapshot(); } + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + /* Commit this transaction to make the updates visible. */ CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no * real need for that, because we only acquire an Xid after the wait is @@ -4345,6 +4470,9 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL); } + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + PopActiveSnapshot(); CommitTransactionCommand(); @@ -4361,6 +4489,12 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein /* Start a new transaction to finish process properly */ StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* Log what we did */ if ((params->options & REINDEXOPT_VERBOSE) != 0) { diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index bf7d2b2309fc2..863ffc07fb409 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -581,8 +581,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, EState *estate = NULL; instr_time planstart; instr_time planduration; - BufferUsage bufusage_start, - bufusage; + InstrumentUsage *usage = NULL; MemoryContextCounters mem_counters; MemoryContext planner_ctx = NULL; MemoryContext saved_ctx = NULL; @@ -599,7 +598,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, } if (es->buffers) - bufusage_start = pgBufferUsage; + InstrUsageStart(); INSTR_TIME_SET_CURRENT(planstart); /* Look it up in the hash table */ @@ -644,12 +643,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, MemoryContextMemConsumed(planner_ctx, &mem_counters); } - /* calc differences of buffer counters. */ if (es->buffers) - { - memset(&bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start); - } + usage = InstrUsageStop(); plan_list = cplan->stmt_list; @@ -661,7 +656,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, if (pstmt->commandType != CMD_UTILITY) ExplainOnePlan(pstmt, cplan, entry->plansource, query_index, into, es, query_string, paramLI, pstate->p_queryEnv, - &planduration, (es->buffers ? &bufusage : NULL), + &planduration, (es->buffers ? &usage->bufusage : NULL), es->memory ? &mem_counters : NULL); else ExplainOneUtility(pstmt->utilityStmt, into, es, pstate, paramLI); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 54ad38247aa32..d2e973d824ae7 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -20888,6 +20888,7 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, LOCKTAG tag; char *parentrelname; char *partrelname; + InstrumentUsage *instrusage = NULL; /* * Add a new constraint to the partition being detached, which @@ -20920,12 +20921,21 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, table_close(rel, NoLock); tab->rel = NULL; + if (InstrumentUsageActive()) + instrusage = InstrUsageStop(); + /* Make updated catalog entry visible */ PopActiveSnapshot(); CommitTransactionCommand(); StartTransactionCommand(); + if (instrusage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(instrusage); + } + /* * Now wait. This ensures that all queries that were planned * including the partition are finished before we remove the rest of diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 33a33bf6b1cfa..eda0193c385f8 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -501,6 +501,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, const char *stmttype; volatile bool in_outer_xact, use_own_xacts; + InstrumentUsage *initial_usage = NULL; Assert(params != NULL); @@ -608,6 +609,9 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, if (ActiveSnapshotSet()) PopActiveSnapshot(); + if (InstrumentUsageActive()) + initial_usage = InstrUsageStop(); + /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } @@ -700,6 +704,12 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * PostgresMain(). */ StartTransactionCommand(); + + if (initial_usage != NULL) + { + InstrUsageStart(); + InstrUsageAddToCurrent(initial_usage); + } } if ((params->options & VACOPT_VACUUM) && diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 2b9d548cdeb10..dae312ee64f52 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -46,9 +46,8 @@ */ #define PARALLEL_VACUUM_KEY_SHARED 1 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 2 -#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 3 -#define PARALLEL_VACUUM_KEY_WAL_USAGE 4 -#define PARALLEL_VACUUM_KEY_INDEX_STATS 5 +#define PARALLEL_VACUUM_KEY_INSTR_USAGE 3 +#define PARALLEL_VACUUM_KEY_INDEX_STATS 4 /* * Shared information among parallel workers. So this is allocated in the DSM @@ -187,11 +186,8 @@ struct ParallelVacuumState /* Shared dead items space among parallel vacuum workers */ TidStore *dead_items; - /* Points to buffer usage area in DSM */ - BufferUsage *buffer_usage; - - /* Points to WAL usage area in DSM */ - WalUsage *wal_usage; + /* Points to instrument usage area in DSM */ + InstrumentUsage *instrusage; /* * False if the index is totally unsuitable target for all parallel @@ -249,8 +245,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, PVShared *shared; TidStore *dead_items; PVIndStats *indstats; - BufferUsage *buffer_usage; - WalUsage *wal_usage; + InstrumentUsage *instrusage; bool *will_parallel_vacuum; Size est_indstats_len; Size est_shared_len; @@ -303,19 +298,15 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, shm_toc_estimate_keys(&pcxt->estimator, 1); /* - * Estimate space for BufferUsage and WalUsage -- - * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE. - * - * If there are no extensions loaded that care, we could skip this. We - * have no way of knowing whether anyone's looking at pgBufferUsage or - * pgWalUsage, so do it unconditionally. + * Estimate space for InstrumentUsage -- PARALLEL_VACUUM_KEY_INSTR_USAGE, + * if needed. */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); + if (InstrumentUsageActive()) + { + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */ if (debug_query_string) @@ -395,17 +386,15 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, pvs->shared = shared; /* - * Allocate space for each worker's BufferUsage and WalUsage; no need to - * initialize + * Allocate space for each worker's InstrumentUsage; no need to initialize */ - buffer_usage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage); - pvs->buffer_usage = buffer_usage; - wal_usage = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage); - pvs->wal_usage = wal_usage; + if (InstrumentUsageActive()) + { + instrusage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INSTR_USAGE, instrusage); + pvs->instrusage = instrusage; + } /* Store query string for workers */ if (debug_query_string) @@ -737,7 +726,7 @@ parallel_vacuum_process_all_indexes(ParallelVacuumState *pvs, int num_index_scan WaitForParallelWorkersToFinish(pvs->pcxt); for (int i = 0; i < pvs->pcxt->nworkers_launched; i++) - InstrAccumParallelQuery(&pvs->buffer_usage[i], &pvs->wal_usage[i]); + InstrUsageAddToCurrent(&pvs->instrusage[i]); } /* @@ -994,8 +983,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) PVIndStats *indstats; PVShared *shared; TidStore *dead_items; - BufferUsage *buffer_usage; - WalUsage *wal_usage; + InstrumentUsage *shm_usage; int nindexes; char *sharedquery; ErrorContextCallback errcallback; @@ -1083,16 +1071,20 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) error_context_stack = &errcallback; /* Prepare to track buffer usage during parallel execution */ - InstrStartParallelQuery(); + shm_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_INSTR_USAGE, true); + if (shm_usage) + InstrUsageStart(); /* Process indexes to perform vacuum/cleanup */ parallel_vacuum_process_safe_indexes(&pvs); /* Report buffer/WAL usage during parallel execution */ - buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false); - wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], - &wal_usage[ParallelWorkerNumber]); + if (shm_usage) + { + InstrumentUsage *usage = InstrUsageStop(); + + memcpy(&shm_usage[ParallelWorkerNumber], usage, sizeof(InstrumentUsage)); + } /* Report any remaining cost-based vacuum delay time */ if (track_cost_delay_timing) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 7230f968101a6..52e07e89e14b2 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -400,7 +400,7 @@ standard_ExecutorRun(QueryDesc *queryDesc, /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) - InstrStartNode(queryDesc->totaltime); + InstrStart(queryDesc->totaltime, true); /* * extract information from the query descriptor and the query feature. @@ -452,7 +452,11 @@ standard_ExecutorRun(QueryDesc *queryDesc, dest->rShutdown(dest); if (queryDesc->totaltime) - InstrStopNode(queryDesc->totaltime, estate->es_processed); + { + /* allow a potential calling EXPLAIN statement to capture the usage */ + InstrUsageAccumToPrevious(); + InstrStop(queryDesc->totaltime, estate->es_processed, true); + } MemoryContextSwitchTo(oldcontext); } @@ -505,7 +509,7 @@ standard_ExecutorFinish(QueryDesc *queryDesc) /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) - InstrStartNode(queryDesc->totaltime); + InstrStart(queryDesc->totaltime, true); /* Run ModifyTable nodes to completion */ ExecPostprocessPlan(estate); @@ -515,7 +519,11 @@ standard_ExecutorFinish(QueryDesc *queryDesc) AfterTriggerEndQuery(estate); if (queryDesc->totaltime) - InstrStopNode(queryDesc->totaltime, 0); + { + /* allow a potential calling EXPLAIN statement to capture the usage */ + InstrUsageAccumToPrevious(); + InstrStop(queryDesc->totaltime, 0, true); + } MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 39c990ae638d5..e3780db9ae64f 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -58,13 +58,12 @@ #define PARALLEL_KEY_EXECUTOR_FIXED UINT64CONST(0xE000000000000001) #define PARALLEL_KEY_PLANNEDSTMT UINT64CONST(0xE000000000000002) #define PARALLEL_KEY_PARAMLISTINFO UINT64CONST(0xE000000000000003) -#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xE000000000000004) +#define PARALLEL_KEY_INSTR_USAGE UINT64CONST(0xE000000000000004) #define PARALLEL_KEY_TUPLE_QUEUE UINT64CONST(0xE000000000000005) #define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000006) #define PARALLEL_KEY_DSA UINT64CONST(0xE000000000000007) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008) #define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009) -#define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xE00000000000000A) #define PARALLEL_TUPLE_QUEUE_SIZE 65536 @@ -608,8 +607,6 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, char *pstmt_data; char *pstmt_space; char *paramlistinfo_space; - BufferUsage *bufusage_space; - WalUsage *walusage_space; SharedExecutorInstrumentation *instrumentation = NULL; SharedJitInstrumentation *jit_instrumentation = NULL; int pstmt_len; @@ -673,22 +670,14 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_estimate_keys(&pcxt->estimator, 1); /* - * Estimate space for BufferUsage. - * - * If EXPLAIN is not in use and there are no extensions loaded that care, - * we could skip this. But we have no way of knowing whether anyone's - * looking at pgBufferUsage, so do it unconditionally. - */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); - - /* - * Same thing for WalUsage. + * Estimate space for InstrumentUsage, if needed. */ - shm_toc_estimate_chunk(&pcxt->estimator, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_estimate_keys(&pcxt->estimator, 1); + if (InstrumentUsageActive()) + { + shm_toc_estimate_chunk(&pcxt->estimator, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); + shm_toc_estimate_keys(&pcxt->estimator, 1); + } /* Estimate space for tuple queues. */ shm_toc_estimate_chunk(&pcxt->estimator, @@ -773,17 +762,18 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARAMLISTINFO, paramlistinfo_space); SerializeParamList(estate->es_param_list_info, ¶mlistinfo_space); - /* Allocate space for each worker's BufferUsage; no need to initialize. */ - bufusage_space = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(BufferUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufusage_space); - pei->buffer_usage = bufusage_space; + /* + * Allocate space for each worker's InstrumentUsage; no need to + * initialize. + */ + if (InstrumentUsageActive()) + { + InstrumentUsage *instrusage = shm_toc_allocate(pcxt->toc, + mul_size(sizeof(InstrumentUsage), pcxt->nworkers)); - /* Same for WalUsage. */ - walusage_space = shm_toc_allocate(pcxt->toc, - mul_size(sizeof(WalUsage), pcxt->nworkers)); - shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage_space); - pei->wal_usage = walusage_space; + shm_toc_insert(pcxt->toc, PARALLEL_KEY_INSTR_USAGE, instrusage); + pei->instrusage = instrusage; + } /* Set up the tuple queues that the workers will write into. */ pei->tqueue = ExecParallelSetupTupleQueues(pcxt, false); @@ -1193,8 +1183,11 @@ ExecParallelFinish(ParallelExecutorInfo *pei) * Next, accumulate buffer/WAL usage. (This must wait for the workers to * finish, or we might get incomplete data.) */ - for (i = 0; i < nworkers; i++) - InstrAccumParallelQuery(&pei->buffer_usage[i], &pei->wal_usage[i]); + if (InstrumentUsageActive()) + { + for (i = 0; i < nworkers; i++) + InstrUsageAddToCurrent(&pei->instrusage[i]); + } pei->finished = true; } @@ -1436,8 +1429,7 @@ void ParallelQueryMain(dsm_segment *seg, shm_toc *toc) { FixedParallelExecutorState *fpes; - BufferUsage *buffer_usage; - WalUsage *wal_usage; + InstrumentUsage *shm_usage; DestReceiver *receiver; QueryDesc *queryDesc; SharedExecutorInstrumentation *instrumentation; @@ -1497,7 +1489,9 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) * leader, which also doesn't count buffer accesses and WAL activity that * occur during executor startup. */ - InstrStartParallelQuery(); + shm_usage = shm_toc_lookup(toc, PARALLEL_KEY_INSTR_USAGE, true); + if (shm_usage) + InstrUsageStart(); /* * Run the plan. If we specified a tuple bound, be careful not to demand @@ -1511,10 +1505,12 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc) ExecutorFinish(queryDesc); /* Report buffer/WAL usage during parallel execution. */ - buffer_usage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false); - wal_usage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false); - InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], - &wal_usage[ParallelWorkerNumber]); + if (shm_usage) + { + InstrumentUsage *usage = InstrUsageStop(); + + memcpy(&shm_usage[ParallelWorkerNumber], usage, sizeof(InstrumentUsage)); + } /* Report instrumentation data if any instrumentation options are set. */ if (instrumentation != NULL) diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index f5f9cfbeeadad..b6326fb9148c9 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -823,8 +823,21 @@ ExecShutdownNode_walker(PlanState *node, void *context) /* Stop the node if we started it above, reporting 0 tuples. */ if (node->instrument && node->instrument->running) + { InstrStopNode(node->instrument, 0); + /* + * Accumulate usage stats into active one (if any) + * + * This ensures that if we tracked buffer/WAL usage for EXPLAIN ANALYZE, a + * potential extension interested in summary data can also get it. + * + * This is also what's responsible for propagating the stats + * to the parent node (which is where InstrStopNode returned us to). + */ + InstrUsageAddToCurrent(&node->instrument->instrusage); + } + return false; } diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 56e635f47000d..8c1548470e43e 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -16,15 +16,42 @@ #include #include "executor/instrument.h" +#include "tcop/pquery.h" +#include "utils/memutils.h" -BufferUsage pgBufferUsage; -static BufferUsage save_pgBufferUsage; WalUsage pgWalUsage; -static WalUsage save_pgWalUsage; +InstrumentUsage *pgInstrumentUsageStack = NULL; -static void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); -static void WalUsageAdd(WalUsage *dst, WalUsage *add); +static void WalUsageAdd(WalUsage *dst, const WalUsage *add); +/* + * To make sure we don't leak, use ResourceOwner mechanism to reset stack on abort. + */ +static void PushInstrumentUsage(InstrumentUsage * usage); +static void PushInstrumentUsageWithOwner(InstrumentUsage * usage); +static void PopInstrumentUsage(InstrumentUsage * usage); +static void PopInstrumentUsageWithOwner(InstrumentUsageResource * usage); +static void ResOwnerReleaseInstrumentUsage(Datum res); + +static const ResourceOwnerDesc instrument_usage_resowner_desc = +{ + .name = "instrument usage scope", + .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS, + .release_priority = RELEASE_PRIO_FIRST, + .ReleaseResource = ResOwnerReleaseInstrumentUsage, + .DebugPrint = NULL, /* default message is fine */ +}; + +static inline void +ResourceOwnerRememberInstrumentUsage(ResourceOwner owner, InstrumentUsageResource * scope) +{ + ResourceOwnerRemember(owner, PointerGetDatum(scope), &instrument_usage_resowner_desc); +} +static inline void +ResourceOwnerForgetInstrumentUsage(ResourceOwner owner, InstrumentUsageResource * scope) +{ + ResourceOwnerForget(owner, PointerGetDatum(scope), &instrument_usage_resowner_desc); +} /* Allocate new instrumentation structure(s) */ Instrumentation * @@ -34,11 +61,12 @@ InstrAlloc(int n, int instrument_options, bool async_mode) /* initialize all fields to zeroes, then modify as needed */ instr = palloc0(n * sizeof(Instrumentation)); - if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL)) + if (instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_TIMER | INSTRUMENT_WAL | INSTRUMENT_SHARED_HIT_DISTINCT)) { bool need_buffers = (instrument_options & INSTRUMENT_BUFFERS) != 0; bool need_wal = (instrument_options & INSTRUMENT_WAL) != 0; bool need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; + bool need_shared_hit_distinct = (instrument_options & INSTRUMENT_SHARED_HIT_DISTINCT) != 0; int i; for (i = 0; i < n; i++) @@ -46,6 +74,7 @@ InstrAlloc(int n, int instrument_options, bool async_mode) instr[i].need_bufusage = need_buffers; instr[i].need_walusage = need_wal; instr[i].need_timer = need_timer; + instr[i].need_shared_hit_distinct = need_shared_hit_distinct; instr[i].async_mode = async_mode; } } @@ -61,27 +90,40 @@ InstrInit(Instrumentation *instr, int instrument_options) instr->need_bufusage = (instrument_options & INSTRUMENT_BUFFERS) != 0; instr->need_walusage = (instrument_options & INSTRUMENT_WAL) != 0; instr->need_timer = (instrument_options & INSTRUMENT_TIMER) != 0; + instr->need_shared_hit_distinct = (instrument_options & INSTRUMENT_SHARED_HIT_DISTINCT) != 0; } -/* Entry to a plan node */ void -InstrStartNode(Instrumentation *instr) +InstrStart(Instrumentation *instr, bool use_resowner) { if (instr->need_timer && !INSTR_TIME_SET_CURRENT_LAZY(instr->starttime)) elog(ERROR, "InstrStartNode called twice in a row"); - /* save buffer usage totals at node entry, if needed */ - if (instr->need_bufusage) - instr->bufusage_start = pgBufferUsage; + if (instr->need_bufusage || instr->need_walusage || instr->need_shared_hit_distinct) + { + if (use_resowner) + PushInstrumentUsageWithOwner(&instr->instrusage); + else + PushInstrumentUsage(&instr->instrusage); + } - if (instr->need_walusage) - instr->walusage_start = pgWalUsage; + if (instr->need_shared_hit_distinct && !instr->instrusage.shared_blks_hit_distinct) + { + instr->instrusage.shared_blks_hit_distinct = palloc0(sizeof(hyperLogLogState)); + initHyperLogLog(instr->instrusage.shared_blks_hit_distinct, 16); + } } -/* Exit from a plan node */ +/* Entry to a plan node */ void -InstrStopNode(Instrumentation *instr, double nTuples) +InstrStartNode(Instrumentation *instr) +{ + InstrStart(instr, false); +} + +void +InstrStop(Instrumentation *instr, double nTuples, bool use_resowner) { double save_tuplecount = instr->tuplecount; instr_time endtime; @@ -101,14 +143,13 @@ InstrStopNode(Instrumentation *instr, double nTuples) INSTR_TIME_SET_ZERO(instr->starttime); } - /* Add delta of buffer usage since entry to node's totals */ - if (instr->need_bufusage) - BufferUsageAccumDiff(&instr->bufusage, - &pgBufferUsage, &instr->bufusage_start); - - if (instr->need_walusage) - WalUsageAccumDiff(&instr->walusage, - &pgWalUsage, &instr->walusage_start); + if (instr->need_bufusage || instr->need_walusage || instr->need_shared_hit_distinct) + { + if (use_resowner) + PopInstrumentUsageWithOwner(instr->instrusage.res); + else + PopInstrumentUsage(&instr->instrusage); + } /* Is this the first tuple of this cycle? */ if (!instr->running) @@ -127,6 +168,13 @@ InstrStopNode(Instrumentation *instr, double nTuples) } } +/* Exit from a plan node */ +void +InstrStopNode(Instrumentation *instr, double nTuples) +{ + InstrStop(instr, nTuples, false); +} + /* Update tuple count */ void InstrUpdateTupleCount(Instrumentation *instr, double nTuples) @@ -187,42 +235,115 @@ InstrAggNode(Instrumentation *dst, Instrumentation *add) dst->nfiltered1 += add->nfiltered1; dst->nfiltered2 += add->nfiltered2; - /* Add delta of buffer usage since entry to node's totals */ - if (dst->need_bufusage) - BufferUsageAdd(&dst->bufusage, &add->bufusage); + /* Add delta of buffer/WAL usage since entry to node's totals */ + if (dst->need_bufusage || dst->need_walusage) + InstrUsageAdd(&dst->instrusage, &add->instrusage); +} + +static void +PushInstrumentUsage(InstrumentUsage * usage) +{ + usage->previous = pgInstrumentUsageStack; + pgInstrumentUsageStack = usage; +} + +static void +PushInstrumentUsageWithOwner(InstrumentUsage * usage) +{ + InstrumentUsageResource *usageRes = MemoryContextAllocZero(TopMemoryContext, sizeof(InstrumentUsageResource)); + ResourceOwner owner = CurrentResourceOwner; + + Assert(owner != NULL); + + usageRes->previous = pgInstrumentUsageStack; + usageRes->owner = owner; + + ResourceOwnerEnlarge(owner); + ResourceOwnerRememberInstrumentUsage(owner, usageRes); + + usage->res = usageRes; + PushInstrumentUsage(usage); +} - if (dst->need_walusage) - WalUsageAdd(&dst->walusage, &add->walusage); +static void +PopInstrumentUsage(InstrumentUsage * usage) +{ + pgInstrumentUsageStack = usage->previous; } -/* note current values during parallel executor startup */ +static void +PopInstrumentUsageWithOwner(InstrumentUsageResource * usageRes) +{ + pgInstrumentUsageStack = usageRes->previous; + Assert(usageRes != NULL); + if (usageRes->owner != NULL) + ResourceOwnerForgetInstrumentUsage(usageRes->owner, usageRes); + pfree(usageRes); +} + +static void +ResOwnerReleaseInstrumentUsage(Datum res) +{ + InstrumentUsageResource *usageRes = (InstrumentUsageResource *) DatumGetPointer(res); + + usageRes->owner = NULL; + PopInstrumentUsageWithOwner(usageRes); +} + +/* Start buffer/WAL usage measurement */ void -InstrStartParallelQuery(void) +InstrUsageStart() { - save_pgBufferUsage = pgBufferUsage; - save_pgWalUsage = pgWalUsage; + InstrumentUsage *usage = MemoryContextAllocZero(TopMemoryContext, sizeof(InstrumentUsage)); + + PushInstrumentUsageWithOwner(usage); } -/* report usage after parallel executor shutdown */ +/* + * Call this before calling stop to add the usage metrics to the previous item + * on the stack (if it exists) + */ void -InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage) +InstrUsageAccumToPrevious() { - memset(bufusage, 0, sizeof(BufferUsage)); - BufferUsageAccumDiff(bufusage, &pgBufferUsage, &save_pgBufferUsage); - memset(walusage, 0, sizeof(WalUsage)); - WalUsageAccumDiff(walusage, &pgWalUsage, &save_pgWalUsage); + if (!pgInstrumentUsageStack || !pgInstrumentUsageStack->previous) + return; + + InstrUsageAdd(pgInstrumentUsageStack->previous, pgInstrumentUsageStack); +} + +/* Stop usage measurement and return results */ +InstrumentUsage * +InstrUsageStop() +{ + InstrumentUsage *result = pgInstrumentUsageStack; + + Assert(result != NULL); + PopInstrumentUsageWithOwner(result->res); + + /* Avoid returning references that were freed */ + result->res = NULL; + result->previous = NULL; + + return result; +} + +void +InstrUsageAdd(InstrumentUsage * dst, const InstrumentUsage * add) +{ + BufferUsageAdd(&dst->bufusage, &add->bufusage); + WalUsageAdd(&dst->walusage, &add->walusage); } -/* accumulate work done by workers in leader's stats */ void -InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage) +InstrUsageAddToCurrent(InstrumentUsage * instrusage) { - BufferUsageAdd(&pgBufferUsage, bufusage); - WalUsageAdd(&pgWalUsage, walusage); + if (pgInstrumentUsageStack != NULL) + InstrUsageAdd(pgInstrumentUsageStack, instrusage); } /* dst += add */ -static void +void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) { dst->shared_blks_hit += add->shared_blks_hit; @@ -243,39 +364,9 @@ BufferUsageAdd(BufferUsage *dst, const BufferUsage *add) INSTR_TIME_ADD(dst->temp_blk_write_time, add->temp_blk_write_time); } -/* dst += add - sub */ -void -BufferUsageAccumDiff(BufferUsage *dst, - const BufferUsage *add, - const BufferUsage *sub) -{ - dst->shared_blks_hit += add->shared_blks_hit - sub->shared_blks_hit; - dst->shared_blks_read += add->shared_blks_read - sub->shared_blks_read; - dst->shared_blks_dirtied += add->shared_blks_dirtied - sub->shared_blks_dirtied; - dst->shared_blks_written += add->shared_blks_written - sub->shared_blks_written; - dst->local_blks_hit += add->local_blks_hit - sub->local_blks_hit; - dst->local_blks_read += add->local_blks_read - sub->local_blks_read; - dst->local_blks_dirtied += add->local_blks_dirtied - sub->local_blks_dirtied; - dst->local_blks_written += add->local_blks_written - sub->local_blks_written; - dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read; - dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written; - INSTR_TIME_ACCUM_DIFF(dst->shared_blk_read_time, - add->shared_blk_read_time, sub->shared_blk_read_time); - INSTR_TIME_ACCUM_DIFF(dst->shared_blk_write_time, - add->shared_blk_write_time, sub->shared_blk_write_time); - INSTR_TIME_ACCUM_DIFF(dst->local_blk_read_time, - add->local_blk_read_time, sub->local_blk_read_time); - INSTR_TIME_ACCUM_DIFF(dst->local_blk_write_time, - add->local_blk_write_time, sub->local_blk_write_time); - INSTR_TIME_ACCUM_DIFF(dst->temp_blk_read_time, - add->temp_blk_read_time, sub->temp_blk_read_time); - INSTR_TIME_ACCUM_DIFF(dst->temp_blk_write_time, - add->temp_blk_write_time, sub->temp_blk_write_time); -} - /* helper functions for WAL usage accumulation */ static void -WalUsageAdd(WalUsage *dst, WalUsage *add) +WalUsageAdd(WalUsage *dst, const WalUsage *add) { dst->wal_bytes += add->wal_bytes; dst->wal_records += add->wal_records; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0b5652071d119..bef591bf62142 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -1830,6 +1830,7 @@ opt_boolean_or_string: TRUE_P { $$ = "true"; } | FALSE_P { $$ = "false"; } | ON { $$ = "on"; } + | DISTINCT { $$ = "distinct"; } /* * OFF is also accepted as a boolean value, but is handled by * the NonReservedWord rule. The action for booleans and strings diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index f93131a645ea8..cfd30b2c620e1 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -705,7 +705,7 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN { PinLocalBuffer(bufHdr, true); - pgBufferUsage.local_blks_hit++; + INSTR_BUFUSAGE_INCR(local_blks_hit); return true; } @@ -737,7 +737,8 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN else PinBuffer_Locked(bufHdr); /* pin for first time */ - pgBufferUsage.shared_blks_hit++; + INSTR_BUFUSAGE_INCR(shared_blks_hit); + INSTR_BUFUSAGE_COUNT_SHARED_HIT(bufHdr->buf_id); return true; } @@ -1147,14 +1148,14 @@ PinBufferForBlock(Relation rel, { bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, foundPtr); if (*foundPtr) - pgBufferUsage.local_blks_hit++; + INSTR_BUFUSAGE_INCR(local_blks_hit); } else { bufHdr = BufferAlloc(smgr, persistence, forkNum, blockNum, strategy, foundPtr, io_context); if (*foundPtr) - pgBufferUsage.shared_blks_hit++; + INSTR_BUFUSAGE_COUNT_SHARED_HIT(bufHdr->buf_id); } if (rel) { @@ -1893,9 +1894,9 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) true); if (persistence == RELPERSISTENCE_TEMP) - pgBufferUsage.local_blks_hit += 1; + INSTR_BUFUSAGE_INCR(local_blks_hit); else - pgBufferUsage.shared_blks_hit += 1; + INSTR_BUFUSAGE_COUNT_SHARED_HIT(blocknum); if (operation->rel) pgstat_count_buffer_hit(operation->rel); @@ -1963,9 +1964,9 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress) io_start, 1, io_buffers_len * BLCKSZ); if (persistence == RELPERSISTENCE_TEMP) - pgBufferUsage.local_blks_read += io_buffers_len; + INSTR_BUFUSAGE_ADD(local_blks_read, io_buffers_len); else - pgBufferUsage.shared_blks_read += io_buffers_len; + INSTR_BUFUSAGE_ADD(shared_blks_read, io_buffers_len); /* * Track vacuum cost when issuing IO, not after waiting for it. @@ -2873,7 +2874,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, TerminateBufferIO(buf_hdr, false, BM_VALID, true, false); } - pgBufferUsage.shared_blks_written += extend_by; + INSTR_BUFUSAGE_ADD(shared_blks_written, extend_by); *extended_by = extend_by; @@ -2991,7 +2992,7 @@ MarkBufferDirty(Buffer buffer) */ if (!(old_buf_state & BM_DIRTY)) { - pgBufferUsage.shared_blks_dirtied++; + INSTR_BUFUSAGE_INCR(shared_blks_dirtied); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; } @@ -4399,7 +4400,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, IOOP_WRITE, io_start, 1, BLCKSZ); - pgBufferUsage.shared_blks_written++; + INSTR_BUFUSAGE_INCR(shared_blks_written); /* * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and @@ -5557,7 +5558,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) if (dirtied) { - pgBufferUsage.shared_blks_dirtied++; + INSTR_BUFUSAGE_INCR(shared_blks_dirtied); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 63101d56a074b..87a050cbbc39d 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -216,7 +216,7 @@ FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln) /* Mark not-dirty */ TerminateLocalBufferIO(bufHdr, true, 0, false); - pgBufferUsage.local_blks_written++; + INSTR_BUFUSAGE_INCR(local_blks_written); } static Buffer @@ -476,7 +476,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr, *extended_by = extend_by; - pgBufferUsage.local_blks_written += extend_by; + INSTR_BUFUSAGE_ADD(local_blks_written, extend_by); return first_block; } @@ -507,7 +507,7 @@ MarkLocalBufferDirty(Buffer buffer) buf_state = pg_atomic_read_u32(&bufHdr->state); if (!(buf_state & BM_DIRTY)) - pgBufferUsage.local_blks_dirtied++; + INSTR_BUFUSAGE_INCR(local_blks_dirtied); buf_state |= BM_DIRTY; diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 366d70d38a195..9d39df998cb33 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -474,13 +474,13 @@ BufFileLoadBuffer(BufFile *file) if (track_io_timing) { INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start); + INSTR_BUFUSAGE_TIME_ACCUM_DIFF(temp_blk_read_time, io_time, io_start); } /* we choose not to advance curOffset here */ if (file->nbytes > 0) - pgBufferUsage.temp_blks_read++; + INSTR_BUFUSAGE_INCR(temp_blks_read); } /* @@ -548,13 +548,13 @@ BufFileDumpBuffer(BufFile *file) if (track_io_timing) { INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start); + INSTR_BUFUSAGE_TIME_ACCUM_DIFF(temp_blk_write_time, io_time, io_start); } file->curOffset += bytestowrite; wpos += bytestowrite; - pgBufferUsage.temp_blks_written++; + INSTR_BUFUSAGE_INCR(temp_blks_written); } file->dirty = false; diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index d8d26379a571e..f466eb809d5d4 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -134,17 +134,17 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, { pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); if (io_object == IOOBJECT_RELATION) - INSTR_TIME_ADD(pgBufferUsage.shared_blk_write_time, io_time); + INSTR_BUFUSAGE_TIME_ADD(shared_blk_write_time, io_time); else if (io_object == IOOBJECT_TEMP_RELATION) - INSTR_TIME_ADD(pgBufferUsage.local_blk_write_time, io_time); + INSTR_BUFUSAGE_TIME_ADD(local_blk_write_time, io_time); } else if (io_op == IOOP_READ) { pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); if (io_object == IOOBJECT_RELATION) - INSTR_TIME_ADD(pgBufferUsage.shared_blk_read_time, io_time); + INSTR_BUFUSAGE_TIME_ADD(shared_blk_read_time, io_time); else if (io_object == IOOBJECT_TEMP_RELATION) - INSTR_TIME_ADD(pgBufferUsage.local_blk_read_time, io_time); + INSTR_BUFUSAGE_TIME_ADD(local_blk_read_time, io_time); } } diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d313099c027f0..5d19999a5d418 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -156,7 +156,6 @@ extern PGDLLIMPORT bool XLOG_DEBUG; #define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */ #define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */ - /* Checkpoint statistics */ typedef struct CheckpointStatsData { diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h index 32728f5d1a175..9bff759f86606 100644 --- a/src/include/commands/explain_state.h +++ b/src/include/commands/explain_state.h @@ -49,6 +49,8 @@ typedef struct ExplainState bool analyze; /* print actual times */ bool costs; /* print estimated costs */ bool buffers; /* print buffer usage */ + bool buffers_distinct; /* print estimated distinct shared buffer + * usage */ bool wal; /* print WAL usage */ bool timing; /* print detailed node timing */ bool summary; /* print total planning and execution timing */ diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h index 5e7106c397a26..90881188729e3 100644 --- a/src/include/executor/execParallel.h +++ b/src/include/executor/execParallel.h @@ -25,8 +25,7 @@ typedef struct ParallelExecutorInfo { PlanState *planstate; /* plan subtree we're running in parallel */ ParallelContext *pcxt; /* parallel context we're using */ - BufferUsage *buffer_usage; /* points to bufusage area in DSM */ - WalUsage *wal_usage; /* walusage area in DSM */ + InstrumentUsage *instrusage; /* points to instrument usage area in DSM */ SharedExecutorInstrumentation *instrumentation; /* optional */ struct SharedJitInstrumentation *jit_instrumentation; /* optional */ dsa_area *area; /* points to DSA area in DSM */ diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 03653ab6c6cde..b3ce3bcfb9ba4 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -13,14 +13,12 @@ #ifndef INSTRUMENT_H #define INSTRUMENT_H +#include "common/hashfn.h" +#include "lib/hyperloglog.h" #include "portability/instr_time.h" +#include "utils/resowner.h" -/* - * BufferUsage and WalUsage counters keep being incremented infinitely, - * i.e., must never be reset to zero, so that we can calculate how much - * the counters are incremented in an arbitrary period. - */ typedef struct BufferUsage { int64 shared_blks_hit; /* # of shared buffer hits */ @@ -63,15 +61,37 @@ typedef enum InstrumentOption INSTRUMENT_BUFFERS = 1 << 1, /* needs buffer usage */ INSTRUMENT_ROWS = 1 << 2, /* needs row count */ INSTRUMENT_WAL = 1 << 3, /* needs WAL usage */ + INSTRUMENT_SHARED_HIT_DISTINCT = 1 << 4, /* needs estimated distinct + * shared hit buffer count */ INSTRUMENT_ALL = PG_INT32_MAX } InstrumentOption; +typedef struct InstrumentUsageResource +{ + struct InstrumentUsage *previous; + + ResourceOwner owner; +} InstrumentUsageResource; + +typedef struct InstrumentUsage +{ + struct InstrumentUsage *previous; + BufferUsage bufusage; + WalUsage walusage; + hyperLogLogState *shared_blks_hit_distinct; + + InstrumentUsageResource *res; +} InstrumentUsage; + typedef struct Instrumentation { /* Parameters set at node creation: */ bool need_timer; /* true if we need timer data */ bool need_bufusage; /* true if we need buffer usage data */ bool need_walusage; /* true if we need WAL usage data */ + bool need_shared_hit_distinct; /* true if we need estimated + * distinct shared hit buffer + * count */ bool async_mode; /* true if node is in async mode */ /* Info about current plan cycle: */ bool running; /* true if we've completed first tuple */ @@ -79,8 +99,6 @@ typedef struct Instrumentation instr_time counter; /* accumulated runtime for this node */ double firsttuple; /* time for first tuple of this cycle */ double tuplecount; /* # of tuples emitted so far this cycle */ - BufferUsage bufusage_start; /* buffer usage at start */ - WalUsage walusage_start; /* WAL usage at start */ /* Accumulated statistics across all completed cycles: */ double startup; /* total startup time (in seconds) */ double total; /* total time (in seconds) */ @@ -89,8 +107,7 @@ typedef struct Instrumentation double nloops; /* # of run cycles for this node */ double nfiltered1; /* # of tuples removed by scanqual or joinqual */ double nfiltered2; /* # of tuples removed by "other" quals */ - BufferUsage bufusage; /* total buffer usage */ - WalUsage walusage; /* total WAL usage */ + InstrumentUsage instrusage; /* total buffer/WAL usage */ } Instrumentation; typedef struct WorkerInstrumentation @@ -99,23 +116,67 @@ typedef struct WorkerInstrumentation Instrumentation instrument[FLEXIBLE_ARRAY_MEMBER]; } WorkerInstrumentation; -extern PGDLLIMPORT BufferUsage pgBufferUsage; extern PGDLLIMPORT WalUsage pgWalUsage; +extern PGDLLIMPORT InstrumentUsage * pgInstrumentUsageStack; extern Instrumentation *InstrAlloc(int n, int instrument_options, bool async_mode); extern void InstrInit(Instrumentation *instr, int instrument_options); +extern void InstrStart(Instrumentation *instr, bool use_resowner); extern void InstrStartNode(Instrumentation *instr); +extern void InstrStop(Instrumentation *instr, double nTuples, bool use_resowner); extern void InstrStopNode(Instrumentation *instr, double nTuples); extern void InstrUpdateTupleCount(Instrumentation *instr, double nTuples); extern void InstrEndLoop(Instrumentation *instr); extern void InstrAggNode(Instrumentation *dst, Instrumentation *add); -extern void InstrStartParallelQuery(void); -extern void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage); -extern void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage); -extern void BufferUsageAccumDiff(BufferUsage *dst, - const BufferUsage *add, const BufferUsage *sub); extern void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub); +static inline bool +InstrumentUsageActive(void) +{ + return pgInstrumentUsageStack != NULL; +} + +#define INSTR_BUFUSAGE_INCR(fld) do { \ + if (pgInstrumentUsageStack) \ + pgInstrumentUsageStack->bufusage.fld++; \ + } while(0) +#define INSTR_BUFUSAGE_ADD(fld,val) do { \ + if (pgInstrumentUsageStack) \ + pgInstrumentUsageStack->bufusage.fld += val; \ + } while(0) +#define INSTR_BUFUSAGE_TIME_ADD(fld,val) do { \ + if (pgInstrumentUsageStack) \ + INSTR_TIME_ADD(pgInstrumentUsageStack->bufusage.fld, val); \ + } while (0) +#define INSTR_BUFUSAGE_TIME_ACCUM_DIFF(fld,endval,startval) do { \ + if (pgInstrumentUsageStack) \ + INSTR_TIME_ACCUM_DIFF(pgInstrumentUsageStack->bufusage.fld, endval, startval); \ + } while (0) + +#define INSTR_BUFUSAGE_COUNT_SHARED_HIT(bufId) do { \ + if (pgInstrumentUsageStack) { \ + pgInstrumentUsageStack->bufusage.shared_blks_hit++; \ + if (pgInstrumentUsageStack->shared_blks_hit_distinct) \ + addHyperLogLog(pgInstrumentUsageStack->shared_blks_hit_distinct, DatumGetUInt32(hash_any((unsigned char *) &bufId, sizeof(int)))); \ + } \ + } while(0) + +#define INSTR_WALUSAGE_INCR(fld) do { \ + if (pgInstrumentUsageStack) \ + pgInstrumentUsageStack->walusage.fld++; \ + } while(0) +#define INSTR_WALUSAGE_ADD(fld,val) do { \ + if (pgInstrumentUsageStack) \ + pgInstrumentUsageStack->walusage.fld += val; \ + } while(0) + +extern void InstrUsageStart(void); +extern InstrumentUsage * InstrUsageStop(void); +extern void InstrUsageAccumToPrevious(void); +extern void InstrUsageAdd(InstrumentUsage * dst, const InstrumentUsage * add); +extern void InstrUsageAddToCurrent(InstrumentUsage * instrusage); +extern void BufferUsageAdd(BufferUsage *dst, const BufferUsage *add); + #endif /* INSTRUMENT_H */