From 6123fe2d5e0e860fc3da74422d094927ecd6191e Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 5 Jun 2025 14:23:39 +0300 Subject: [PATCH] Add query execution time histogram (#10050) ## Problem It will be useful to understand what kind of queries our clients are executed. And one of the most important characteristic of query is query execution time - at least it allows to distinguish OLAP and OLTP queries. Also monitoring query execution time can help to detect problem with performance (assuming that workload is more or less stable). ## Summary of changes Add query execution time histogram. --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/neon.c | 76 ++++++++++++ pgxn/neon/neon_perf_counters.c | 122 ++++++++++++++++---- pgxn/neon/neon_perf_counters.h | 28 +++++ test_runner/regress/test_compute_metrics.py | 5 + 4 files changed, 208 insertions(+), 23 deletions(-) diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index a6a7021756..5b4ced7cf0 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -16,6 +16,7 @@ #if PG_MAJORVERSION_NUM >= 15 #include "access/xlogrecovery.h" #endif +#include "executor/instrument.h" #include "replication/logical.h" #include "replication/logicallauncher.h" #include "replication/slot.h" @@ -33,6 +34,7 @@ #include "file_cache.h" #include "neon.h" #include "neon_lwlsncache.h" +#include "neon_perf_counters.h" #include "control_plane_connector.h" #include "logical_replication_monitor.h" #include "unstable_extensions.h" @@ -46,6 +48,13 @@ void _PG_init(void); static int running_xacts_overflow_policy; +static bool monitor_query_exec_time = false; + +static ExecutorStart_hook_type prev_ExecutorStart = NULL; +static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; + +static void neon_ExecutorStart(QueryDesc *queryDesc, int eflags); +static void neon_ExecutorEnd(QueryDesc *queryDesc); #if PG_MAJORVERSION_NUM >= 16 static shmem_startup_hook_type prev_shmem_startup_hook; @@ -470,6 +479,16 @@ _PG_init(void) 0, NULL, NULL, NULL); + DefineCustomBoolVariable( + "neon.monitor_query_exec_time", + "Collect infortmation about query execution time", + NULL, + &monitor_query_exec_time, + false, + PGC_USERSET, + 0, + NULL, NULL, NULL); + DefineCustomBoolVariable( "neon.allow_replica_misconfig", "Allow replica startup when some critical GUCs have smaller value than on primary node", @@ -508,6 +527,11 @@ _PG_init(void) EmitWarningsOnPlaceholders("neon"); ReportSearchPath(); + + prev_ExecutorStart = ExecutorStart_hook; + ExecutorStart_hook = neon_ExecutorStart; + prev_ExecutorEnd = ExecutorEnd_hook; + ExecutorEnd_hook = neon_ExecutorEnd; } PG_FUNCTION_INFO_V1(pg_cluster_size); @@ -581,3 +605,55 @@ neon_shmem_startup_hook(void) #endif } #endif + +/* + * ExecutorStart hook: start up tracking if needed + */ +static void +neon_ExecutorStart(QueryDesc *queryDesc, int eflags) +{ + if (prev_ExecutorStart) + prev_ExecutorStart(queryDesc, eflags); + else + standard_ExecutorStart(queryDesc, eflags); + + if (monitor_query_exec_time) + { + /* + * Set up to track total elapsed time in ExecutorRun. Make sure the + * space is allocated in the per-query context so it will go away at + * ExecutorEnd. + */ + if (queryDesc->totaltime == NULL) + { + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); + queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_TIMER, false); + MemoryContextSwitchTo(oldcxt); + } + } +} + +/* + * ExecutorEnd hook: store results if needed + */ +static void +neon_ExecutorEnd(QueryDesc *queryDesc) +{ + if (monitor_query_exec_time && queryDesc->totaltime) + { + /* + * Make sure stats accumulation is done. (Note: it's okay if several + * levels of hook all do this.) + */ + InstrEndLoop(queryDesc->totaltime); + + inc_query_time(queryDesc->totaltime->total*1000000); /* convert to usec */ + } + + if (prev_ExecutorEnd) + prev_ExecutorEnd(queryDesc); + else + standard_ExecutorEnd(queryDesc); +} diff --git a/pgxn/neon/neon_perf_counters.c b/pgxn/neon/neon_perf_counters.c index c77d99d636..d0a3d15108 100644 --- a/pgxn/neon/neon_perf_counters.c +++ b/pgxn/neon/neon_perf_counters.c @@ -71,6 +71,27 @@ inc_iohist(IOHistogram hist, uint64 latency_us) hist->wait_us_count++; } +static inline void +inc_qthist(QTHistogram hist, uint64 elapsed_us) +{ + int lo = 0; + int hi = NUM_QT_BUCKETS - 1; + + /* Find the right bucket with binary search */ + while (lo < hi) + { + int mid = (lo + hi) / 2; + + if (elapsed_us < qt_bucket_thresholds[mid]) + hi = mid; + else + lo = mid + 1; + } + hist->elapsed_us_bucket[lo]++; + hist->elapsed_us_sum += elapsed_us; + hist->elapsed_us_count++; +} + /* * Count a GetPage wait operation. */ @@ -98,6 +119,13 @@ inc_page_cache_write_wait(uint64 latency) inc_iohist(&MyNeonCounters->file_cache_write_hist, latency); } + +void +inc_query_time(uint64 elapsed) +{ + inc_qthist(&MyNeonCounters->query_time_hist, elapsed); +} + /* * Support functions for the views, neon_backend_perf_counters and * neon_perf_counters. @@ -112,11 +140,11 @@ typedef struct } metric_t; static int -histogram_to_metrics(IOHistogram histogram, - metric_t *metrics, - const char *count, - const char *sum, - const char *bucket) +io_histogram_to_metrics(IOHistogram histogram, + metric_t *metrics, + const char *count, + const char *sum, + const char *bucket) { int i = 0; uint64 bucket_accum = 0; @@ -145,10 +173,44 @@ histogram_to_metrics(IOHistogram histogram, return i; } +static int +qt_histogram_to_metrics(QTHistogram histogram, + metric_t *metrics, + const char *count, + const char *sum, + const char *bucket) +{ + int i = 0; + uint64 bucket_accum = 0; + + metrics[i].name = count; + metrics[i].is_bucket = false; + metrics[i].value = (double) histogram->elapsed_us_count; + i++; + metrics[i].name = sum; + metrics[i].is_bucket = false; + metrics[i].value = (double) histogram->elapsed_us_sum / 1000000.0; + i++; + for (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++) + { + uint64 threshold = qt_bucket_thresholds[bucketno]; + + bucket_accum += histogram->elapsed_us_bucket[bucketno]; + + metrics[i].name = bucket; + metrics[i].is_bucket = true; + metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0; + metrics[i].value = (double) bucket_accum; + i++; + } + + return i; +} + static metric_t * neon_perf_counters_to_metrics(neon_per_backend_counters *counters) { -#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 12) +#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + (2 + NUM_QT_BUCKETS) + 12) metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); int i = 0; @@ -159,10 +221,10 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters) i++; \ } while (false) - i += histogram_to_metrics(&counters->getpage_hist, &metrics[i], - "getpage_wait_seconds_count", - "getpage_wait_seconds_sum", - "getpage_wait_seconds_bucket"); + i += io_histogram_to_metrics(&counters->getpage_hist, &metrics[i], + "getpage_wait_seconds_count", + "getpage_wait_seconds_sum", + "getpage_wait_seconds_bucket"); APPEND_METRIC(getpage_prefetch_requests_total); APPEND_METRIC(getpage_sync_requests_total); @@ -178,14 +240,19 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters) APPEND_METRIC(file_cache_hits_total); - i += histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i], - "file_cache_read_wait_seconds_count", - "file_cache_read_wait_seconds_sum", - "file_cache_read_wait_seconds_bucket"); - i += histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i], - "file_cache_write_wait_seconds_count", - "file_cache_write_wait_seconds_sum", - "file_cache_write_wait_seconds_bucket"); + i += io_histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i], + "file_cache_read_wait_seconds_count", + "file_cache_read_wait_seconds_sum", + "file_cache_read_wait_seconds_bucket"); + i += io_histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i], + "file_cache_write_wait_seconds_count", + "file_cache_write_wait_seconds_sum", + "file_cache_write_wait_seconds_bucket"); + + i += qt_histogram_to_metrics(&counters->query_time_hist, &metrics[i], + "query_time_seconds_count", + "query_time_seconds_sum", + "query_time_seconds_bucket"); Assert(i == NUM_METRICS); @@ -257,7 +324,7 @@ neon_get_backend_perf_counters(PG_FUNCTION_ARGS) } static inline void -histogram_merge_into(IOHistogram into, IOHistogram from) +io_histogram_merge_into(IOHistogram into, IOHistogram from) { into->wait_us_count += from->wait_us_count; into->wait_us_sum += from->wait_us_sum; @@ -265,6 +332,15 @@ histogram_merge_into(IOHistogram into, IOHistogram from) into->wait_us_bucket[bucketno] += from->wait_us_bucket[bucketno]; } +static inline void +qt_histogram_merge_into(QTHistogram into, QTHistogram from) +{ + into->elapsed_us_count += from->elapsed_us_count; + into->elapsed_us_sum += from->elapsed_us_sum; + for (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++) + into->elapsed_us_bucket[bucketno] += from->elapsed_us_bucket[bucketno]; +} + PG_FUNCTION_INFO_V1(neon_get_perf_counters); Datum neon_get_perf_counters(PG_FUNCTION_ARGS) @@ -283,7 +359,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS) { neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; - histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist); + io_histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist); totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total; totals.getpage_sync_requests_total += counters->getpage_sync_requests_total; totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total; @@ -294,13 +370,13 @@ neon_get_perf_counters(PG_FUNCTION_ARGS) totals.pageserver_open_requests += counters->pageserver_open_requests; totals.getpage_prefetches_buffered += counters->getpage_prefetches_buffered; totals.file_cache_hits_total += counters->file_cache_hits_total; - histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist); - histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist); - totals.compute_getpage_stuck_requests_total += counters->compute_getpage_stuck_requests_total; totals.compute_getpage_max_inflight_stuck_time_ms = Max( totals.compute_getpage_max_inflight_stuck_time_ms, counters->compute_getpage_max_inflight_stuck_time_ms); + io_histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist); + io_histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist); + qt_histogram_merge_into(&totals.query_time_hist, &counters->query_time_hist); } metrics = neon_perf_counters_to_metrics(&totals); diff --git a/pgxn/neon/neon_perf_counters.h b/pgxn/neon/neon_perf_counters.h index 10cf094d4a..4b611b0636 100644 --- a/pgxn/neon/neon_perf_counters.h +++ b/pgxn/neon/neon_perf_counters.h @@ -36,6 +36,28 @@ typedef struct IOHistogramData typedef IOHistogramData *IOHistogram; +static const uint64 qt_bucket_thresholds[] = { + 2, 3, 6, 10, /* 0 us - 10 us */ + 20, 30, 60, 100, /* 10 us - 100 us */ + 200, 300, 600, 1000, /* 100 us - 1 ms */ + 2000, 3000, 6000, 10000, /* 1 ms - 10 ms */ + 20000, 30000, 60000, 100000, /* 10 ms - 100 ms */ + 200000, 300000, 600000, 1000000, /* 100 ms - 1 s */ + 2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */ + 20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */ + UINT64_MAX, +}; +#define NUM_QT_BUCKETS (lengthof(qt_bucket_thresholds)) + +typedef struct QTHistogramData +{ + uint64 elapsed_us_count; + uint64 elapsed_us_sum; + uint64 elapsed_us_bucket[NUM_QT_BUCKETS]; +} QTHistogramData; + +typedef QTHistogramData *QTHistogram; + typedef struct { /* @@ -127,6 +149,11 @@ typedef struct /* LFC I/O time buckets */ IOHistogramData file_cache_read_hist; IOHistogramData file_cache_write_hist; + + /* + * Histogram of query execution time. + */ + QTHistogramData query_time_hist; } neon_per_backend_counters; /* Pointer to the shared memory array of neon_per_backend_counters structs */ @@ -149,6 +176,7 @@ extern neon_per_backend_counters *neon_per_backend_counters_shared; extern void inc_getpage_wait(uint64 latency); extern void inc_page_cache_read_wait(uint64 latency); extern void inc_page_cache_write_wait(uint64 latency); +extern void inc_query_time(uint64 elapsed); extern Size NeonPerfCountersShmemSize(void); extern void NeonPerfCountersShmemInit(void); diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py index 2cb2ee7b58..c751a3e7cc 100644 --- a/test_runner/regress/test_compute_metrics.py +++ b/test_runner/regress/test_compute_metrics.py @@ -466,8 +466,13 @@ def test_perf_counters(neon_simple_env: NeonEnv): # # 1.5 is the minimum version to contain these views. cur.execute("CREATE EXTENSION neon VERSION '1.5'") + cur.execute("set neon.monitor_query_exec_time = on") cur.execute("SELECT * FROM neon_perf_counters") cur.execute("SELECT * FROM neon_backend_perf_counters") + cur.execute( + "select value from neon_backend_perf_counters where metric='query_time_seconds_count' and pid=pg_backend_pid()" + ) + assert cur.fetchall()[0][0] == 2 def collect_metric(