Add query execution time histogram (#10050)

## Problem


It will be useful to understand what kind of queries our clients are
executed.
And one of the most important characteristic of query is query execution
time - at least it allows to distinguish OLAP and OLTP queries. Also
monitoring query execution time can help to detect problem with
performance (assuming that workload is more or less stable).

## Summary of changes

Add query execution time histogram.

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
This commit is contained in:
Konstantin Knizhnik
2025-06-05 14:23:39 +03:00
committed by GitHub
parent 1577665c20
commit 6123fe2d5e
4 changed files with 208 additions and 23 deletions

View File

@@ -16,6 +16,7 @@
#if PG_MAJORVERSION_NUM >= 15
#include "access/xlogrecovery.h"
#endif
#include "executor/instrument.h"
#include "replication/logical.h"
#include "replication/logicallauncher.h"
#include "replication/slot.h"
@@ -33,6 +34,7 @@
#include "file_cache.h"
#include "neon.h"
#include "neon_lwlsncache.h"
#include "neon_perf_counters.h"
#include "control_plane_connector.h"
#include "logical_replication_monitor.h"
#include "unstable_extensions.h"
@@ -46,6 +48,13 @@ void _PG_init(void);
static int running_xacts_overflow_policy;
static bool monitor_query_exec_time = false;
static ExecutorStart_hook_type prev_ExecutorStart = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
static void neon_ExecutorStart(QueryDesc *queryDesc, int eflags);
static void neon_ExecutorEnd(QueryDesc *queryDesc);
#if PG_MAJORVERSION_NUM >= 16
static shmem_startup_hook_type prev_shmem_startup_hook;
@@ -470,6 +479,16 @@ _PG_init(void)
0,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"neon.monitor_query_exec_time",
"Collect infortmation about query execution time",
NULL,
&monitor_query_exec_time,
false,
PGC_USERSET,
0,
NULL, NULL, NULL);
DefineCustomBoolVariable(
"neon.allow_replica_misconfig",
"Allow replica startup when some critical GUCs have smaller value than on primary node",
@@ -508,6 +527,11 @@ _PG_init(void)
EmitWarningsOnPlaceholders("neon");
ReportSearchPath();
prev_ExecutorStart = ExecutorStart_hook;
ExecutorStart_hook = neon_ExecutorStart;
prev_ExecutorEnd = ExecutorEnd_hook;
ExecutorEnd_hook = neon_ExecutorEnd;
}
PG_FUNCTION_INFO_V1(pg_cluster_size);
@@ -581,3 +605,55 @@ neon_shmem_startup_hook(void)
#endif
}
#endif
/*
* ExecutorStart hook: start up tracking if needed
*/
static void
neon_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
if (prev_ExecutorStart)
prev_ExecutorStart(queryDesc, eflags);
else
standard_ExecutorStart(queryDesc, eflags);
if (monitor_query_exec_time)
{
/*
* Set up to track total elapsed time in ExecutorRun. Make sure the
* space is allocated in the per-query context so it will go away at
* ExecutorEnd.
*/
if (queryDesc->totaltime == NULL)
{
MemoryContext oldcxt;
oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_TIMER, false);
MemoryContextSwitchTo(oldcxt);
}
}
}
/*
* ExecutorEnd hook: store results if needed
*/
static void
neon_ExecutorEnd(QueryDesc *queryDesc)
{
if (monitor_query_exec_time && queryDesc->totaltime)
{
/*
* Make sure stats accumulation is done. (Note: it's okay if several
* levels of hook all do this.)
*/
InstrEndLoop(queryDesc->totaltime);
inc_query_time(queryDesc->totaltime->total*1000000); /* convert to usec */
}
if (prev_ExecutorEnd)
prev_ExecutorEnd(queryDesc);
else
standard_ExecutorEnd(queryDesc);
}

View File

@@ -71,6 +71,27 @@ inc_iohist(IOHistogram hist, uint64 latency_us)
hist->wait_us_count++;
}
static inline void
inc_qthist(QTHistogram hist, uint64 elapsed_us)
{
int lo = 0;
int hi = NUM_QT_BUCKETS - 1;
/* Find the right bucket with binary search */
while (lo < hi)
{
int mid = (lo + hi) / 2;
if (elapsed_us < qt_bucket_thresholds[mid])
hi = mid;
else
lo = mid + 1;
}
hist->elapsed_us_bucket[lo]++;
hist->elapsed_us_sum += elapsed_us;
hist->elapsed_us_count++;
}
/*
* Count a GetPage wait operation.
*/
@@ -98,6 +119,13 @@ inc_page_cache_write_wait(uint64 latency)
inc_iohist(&MyNeonCounters->file_cache_write_hist, latency);
}
void
inc_query_time(uint64 elapsed)
{
inc_qthist(&MyNeonCounters->query_time_hist, elapsed);
}
/*
* Support functions for the views, neon_backend_perf_counters and
* neon_perf_counters.
@@ -112,11 +140,11 @@ typedef struct
} metric_t;
static int
histogram_to_metrics(IOHistogram histogram,
metric_t *metrics,
const char *count,
const char *sum,
const char *bucket)
io_histogram_to_metrics(IOHistogram histogram,
metric_t *metrics,
const char *count,
const char *sum,
const char *bucket)
{
int i = 0;
uint64 bucket_accum = 0;
@@ -145,10 +173,44 @@ histogram_to_metrics(IOHistogram histogram,
return i;
}
static int
qt_histogram_to_metrics(QTHistogram histogram,
metric_t *metrics,
const char *count,
const char *sum,
const char *bucket)
{
int i = 0;
uint64 bucket_accum = 0;
metrics[i].name = count;
metrics[i].is_bucket = false;
metrics[i].value = (double) histogram->elapsed_us_count;
i++;
metrics[i].name = sum;
metrics[i].is_bucket = false;
metrics[i].value = (double) histogram->elapsed_us_sum / 1000000.0;
i++;
for (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++)
{
uint64 threshold = qt_bucket_thresholds[bucketno];
bucket_accum += histogram->elapsed_us_bucket[bucketno];
metrics[i].name = bucket;
metrics[i].is_bucket = true;
metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
metrics[i].value = (double) bucket_accum;
i++;
}
return i;
}
static metric_t *
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
{
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 12)
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + (2 + NUM_QT_BUCKETS) + 12)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
int i = 0;
@@ -159,10 +221,10 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
i++; \
} while (false)
i += histogram_to_metrics(&counters->getpage_hist, &metrics[i],
"getpage_wait_seconds_count",
"getpage_wait_seconds_sum",
"getpage_wait_seconds_bucket");
i += io_histogram_to_metrics(&counters->getpage_hist, &metrics[i],
"getpage_wait_seconds_count",
"getpage_wait_seconds_sum",
"getpage_wait_seconds_bucket");
APPEND_METRIC(getpage_prefetch_requests_total);
APPEND_METRIC(getpage_sync_requests_total);
@@ -178,14 +240,19 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
APPEND_METRIC(file_cache_hits_total);
i += histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i],
"file_cache_read_wait_seconds_count",
"file_cache_read_wait_seconds_sum",
"file_cache_read_wait_seconds_bucket");
i += histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i],
"file_cache_write_wait_seconds_count",
"file_cache_write_wait_seconds_sum",
"file_cache_write_wait_seconds_bucket");
i += io_histogram_to_metrics(&counters->file_cache_read_hist, &metrics[i],
"file_cache_read_wait_seconds_count",
"file_cache_read_wait_seconds_sum",
"file_cache_read_wait_seconds_bucket");
i += io_histogram_to_metrics(&counters->file_cache_write_hist, &metrics[i],
"file_cache_write_wait_seconds_count",
"file_cache_write_wait_seconds_sum",
"file_cache_write_wait_seconds_bucket");
i += qt_histogram_to_metrics(&counters->query_time_hist, &metrics[i],
"query_time_seconds_count",
"query_time_seconds_sum",
"query_time_seconds_bucket");
Assert(i == NUM_METRICS);
@@ -257,7 +324,7 @@ neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
}
static inline void
histogram_merge_into(IOHistogram into, IOHistogram from)
io_histogram_merge_into(IOHistogram into, IOHistogram from)
{
into->wait_us_count += from->wait_us_count;
into->wait_us_sum += from->wait_us_sum;
@@ -265,6 +332,15 @@ histogram_merge_into(IOHistogram into, IOHistogram from)
into->wait_us_bucket[bucketno] += from->wait_us_bucket[bucketno];
}
static inline void
qt_histogram_merge_into(QTHistogram into, QTHistogram from)
{
into->elapsed_us_count += from->elapsed_us_count;
into->elapsed_us_sum += from->elapsed_us_sum;
for (int bucketno = 0; bucketno < NUM_QT_BUCKETS; bucketno++)
into->elapsed_us_bucket[bucketno] += from->elapsed_us_bucket[bucketno];
}
PG_FUNCTION_INFO_V1(neon_get_perf_counters);
Datum
neon_get_perf_counters(PG_FUNCTION_ARGS)
@@ -283,7 +359,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
{
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist);
io_histogram_merge_into(&totals.getpage_hist, &counters->getpage_hist);
totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
@@ -294,13 +370,13 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
totals.pageserver_open_requests += counters->pageserver_open_requests;
totals.getpage_prefetches_buffered += counters->getpage_prefetches_buffered;
totals.file_cache_hits_total += counters->file_cache_hits_total;
histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
totals.compute_getpage_stuck_requests_total += counters->compute_getpage_stuck_requests_total;
totals.compute_getpage_max_inflight_stuck_time_ms = Max(
totals.compute_getpage_max_inflight_stuck_time_ms,
counters->compute_getpage_max_inflight_stuck_time_ms);
io_histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
io_histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
qt_histogram_merge_into(&totals.query_time_hist, &counters->query_time_hist);
}
metrics = neon_perf_counters_to_metrics(&totals);

View File

@@ -36,6 +36,28 @@ typedef struct IOHistogramData
typedef IOHistogramData *IOHistogram;
static const uint64 qt_bucket_thresholds[] = {
2, 3, 6, 10, /* 0 us - 10 us */
20, 30, 60, 100, /* 10 us - 100 us */
200, 300, 600, 1000, /* 100 us - 1 ms */
2000, 3000, 6000, 10000, /* 1 ms - 10 ms */
20000, 30000, 60000, 100000, /* 10 ms - 100 ms */
200000, 300000, 600000, 1000000, /* 100 ms - 1 s */
2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */
20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */
UINT64_MAX,
};
#define NUM_QT_BUCKETS (lengthof(qt_bucket_thresholds))
typedef struct QTHistogramData
{
uint64 elapsed_us_count;
uint64 elapsed_us_sum;
uint64 elapsed_us_bucket[NUM_QT_BUCKETS];
} QTHistogramData;
typedef QTHistogramData *QTHistogram;
typedef struct
{
/*
@@ -127,6 +149,11 @@ typedef struct
/* LFC I/O time buckets */
IOHistogramData file_cache_read_hist;
IOHistogramData file_cache_write_hist;
/*
* Histogram of query execution time.
*/
QTHistogramData query_time_hist;
} neon_per_backend_counters;
/* Pointer to the shared memory array of neon_per_backend_counters structs */
@@ -149,6 +176,7 @@ extern neon_per_backend_counters *neon_per_backend_counters_shared;
extern void inc_getpage_wait(uint64 latency);
extern void inc_page_cache_read_wait(uint64 latency);
extern void inc_page_cache_write_wait(uint64 latency);
extern void inc_query_time(uint64 elapsed);
extern Size NeonPerfCountersShmemSize(void);
extern void NeonPerfCountersShmemInit(void);

View File

@@ -466,8 +466,13 @@ def test_perf_counters(neon_simple_env: NeonEnv):
#
# 1.5 is the minimum version to contain these views.
cur.execute("CREATE EXTENSION neon VERSION '1.5'")
cur.execute("set neon.monitor_query_exec_time = on")
cur.execute("SELECT * FROM neon_perf_counters")
cur.execute("SELECT * FROM neon_backend_perf_counters")
cur.execute(
"select value from neon_backend_perf_counters where metric='query_time_seconds_count' and pid=pg_backend_pid()"
)
assert cur.fetchall()[0][0] == 2
def collect_metric(