From 263dfba6eeef448864dba151e2d8d34a418b9629 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 23 Sep 2024 21:28:50 +0300 Subject: [PATCH] Add views for metrics about pageserver requests (#9008) The metrics include a histogram of how long we need to wait for a GetPage request, number of reconnects, and number of requests among other things. The metrics are not yet exported anywhere, but you can query them manually. Note: This does *not* bump the default version of the 'neon' extension. We will do that later, as a separate PR. The reason is that this allows us to roll back the compute image smoothly, if necessary. Once the image that includes the new extension .so file with the new functions has been rolled out, and we're confident that we don't need to roll back the image anymore, we can change default extension version and actually start using the new functions and views. This is what the view looks like: ``` postgres=# select * from neon_perf_counters ; metric | bucket_le | value ---------------------------------------+-----------+---------- getpage_wait_seconds_count | | 300 getpage_wait_seconds_sum | | 0.048506 getpage_wait_seconds_bucket | 2e-05 | 0 getpage_wait_seconds_bucket | 3e-05 | 0 getpage_wait_seconds_bucket | 6e-05 | 71 getpage_wait_seconds_bucket | 0.0001 | 124 getpage_wait_seconds_bucket | 0.0002 | 248 getpage_wait_seconds_bucket | 0.0003 | 279 getpage_wait_seconds_bucket | 0.0006 | 297 getpage_wait_seconds_bucket | 0.001 | 298 getpage_wait_seconds_bucket | 0.002 | 298 getpage_wait_seconds_bucket | 0.003 | 298 getpage_wait_seconds_bucket | 0.006 | 300 getpage_wait_seconds_bucket | 0.01 | 300 getpage_wait_seconds_bucket | 0.02 | 300 getpage_wait_seconds_bucket | 0.03 | 300 getpage_wait_seconds_bucket | 0.06 | 300 getpage_wait_seconds_bucket | 0.1 | 300 getpage_wait_seconds_bucket | 0.2 | 300 getpage_wait_seconds_bucket | 0.3 | 300 getpage_wait_seconds_bucket | 0.6 | 300 getpage_wait_seconds_bucket | 1 | 300 getpage_wait_seconds_bucket | 2 | 300 getpage_wait_seconds_bucket | 3 | 300 getpage_wait_seconds_bucket | 6 | 300 getpage_wait_seconds_bucket | 10 | 300 getpage_wait_seconds_bucket | 20 | 300 getpage_wait_seconds_bucket | 30 | 300 getpage_wait_seconds_bucket | 60 | 300 getpage_wait_seconds_bucket | 100 | 300 getpage_wait_seconds_bucket | Infinity | 300 getpage_prefetch_requests_total | | 69 getpage_sync_requests_total | | 231 getpage_prefetch_misses_total | | 0 getpage_prefetch_discards_total | | 0 pageserver_requests_sent_total | | 323 pageserver_requests_disconnects_total | | 0 pageserver_send_flushes_total | | 323 file_cache_hits_total | | 0 (39 rows) ``` --- pgxn/neon/Makefile | 4 +- pgxn/neon/libpagestore.c | 10 +- pgxn/neon/neon--1.4--1.5.sql | 39 +++ pgxn/neon/neon--1.5--1.4.sql | 4 + pgxn/neon/neon.control | 2 + pgxn/neon/neon_perf_counters.c | 261 ++++++++++++++++++++ pgxn/neon/neon_perf_counters.h | 111 +++++++++ pgxn/neon/neon_pgversioncompat.c | 44 ++++ pgxn/neon/neon_pgversioncompat.h | 6 + pgxn/neon/pagestore_smgr.c | 47 ++-- test_runner/regress/test_compute_metrics.py | 21 ++ test_runner/regress/test_neon_extension.py | 4 +- 12 files changed, 533 insertions(+), 20 deletions(-) create mode 100644 pgxn/neon/neon--1.4--1.5.sql create mode 100644 pgxn/neon/neon--1.5--1.4.sql create mode 100644 pgxn/neon/neon_perf_counters.c create mode 100644 pgxn/neon/neon_perf_counters.h create mode 100644 pgxn/neon/neon_pgversioncompat.c create mode 100644 test_runner/regress/test_compute_metrics.py diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile index 3b755bb042..ddc8155ff3 100644 --- a/pgxn/neon/Makefile +++ b/pgxn/neon/Makefile @@ -9,6 +9,8 @@ OBJS = \ hll.o \ libpagestore.o \ neon.o \ + neon_pgversioncompat.o \ + neon_perf_counters.o \ neon_utils.o \ neon_walreader.o \ pagestore_smgr.o \ @@ -23,7 +25,7 @@ SHLIB_LINK_INTERNAL = $(libpq) SHLIB_LINK = -lcurl EXTENSION = neon -DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql +DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql PGFILEDESC = "neon - cloud storage for PostgreSQL" EXTRA_CLEAN = \ diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index df7000acc0..07a19a7114 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -30,6 +30,7 @@ #include "utils/guc.h" #include "neon.h" +#include "neon_perf_counters.h" #include "neon_utils.h" #include "pagestore_client.h" #include "walproposer.h" @@ -331,6 +332,7 @@ CLEANUP_AND_DISCONNECT(PageServer *shard) } if (shard->conn) { + MyNeonCounters->pageserver_disconnects_total++; PQfinish(shard->conn); shard->conn = NULL; } @@ -737,6 +739,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request) PageServer *shard = &page_servers[shard_no]; PGconn *pageserver_conn; + MyNeonCounters->pageserver_requests_sent_total++; + /* If the connection was lost for some reason, reconnect */ if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD) { @@ -889,6 +893,7 @@ pageserver_flush(shardno_t shard_no) } else { + MyNeonCounters->pageserver_send_flushes_total++; if (PQflush(pageserver_conn)) { char *msg = pchomp(PQerrorMessage(pageserver_conn)); @@ -922,7 +927,7 @@ check_neon_id(char **newval, void **extra, GucSource source) static Size PagestoreShmemSize(void) { - return sizeof(PagestoreShmemState); + return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize()); } static bool @@ -941,6 +946,9 @@ PagestoreShmemInit(void) memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap)); AssignPageserverConnstring(page_server_connstring, NULL); } + + NeonPerfCountersShmemInit(); + LWLockRelease(AddinShmemInitLock); return found; } diff --git a/pgxn/neon/neon--1.4--1.5.sql b/pgxn/neon/neon--1.4--1.5.sql new file mode 100644 index 0000000000..a1db7bf1b1 --- /dev/null +++ b/pgxn/neon/neon--1.4--1.5.sql @@ -0,0 +1,39 @@ +\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit + + +CREATE FUNCTION get_backend_perf_counters() +RETURNS SETOF RECORD +AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters' +LANGUAGE C PARALLEL SAFE; + +CREATE FUNCTION get_perf_counters() +RETURNS SETOF RECORD +AS 'MODULE_PATHNAME', 'neon_get_perf_counters' +LANGUAGE C PARALLEL SAFE; + +-- Show various metrics, for each backend. Note that the values are not reset +-- when a backend exits. When a new backend starts with the backend ID, it will +-- continue accumulating the values from where the old backend left. If you are +-- only interested in the changes from your own session, store the values at the +-- beginning of the session somewhere, and subtract them on subsequent calls. +-- +-- For histograms, 'bucket_le' is the upper bound of the histogram bucket. +CREATE VIEW neon_backend_perf_counters AS + SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value + FROM get_backend_perf_counters() AS P ( + procno integer, + pid integer, + metric text, + bucket_le float8, + value float8 + ); + +-- Summary across all backends. (This could also be implemented with +-- an aggregate query over neon_backend_perf_counters view.) +CREATE VIEW neon_perf_counters AS + SELECT P.metric, P.bucket_le, P.value + FROM get_perf_counters() AS P ( + metric text, + bucket_le float8, + value float8 + ); diff --git a/pgxn/neon/neon--1.5--1.4.sql b/pgxn/neon/neon--1.5--1.4.sql new file mode 100644 index 0000000000..7939fd8aa9 --- /dev/null +++ b/pgxn/neon/neon--1.5--1.4.sql @@ -0,0 +1,4 @@ +DROP VIEW IF EXISTS neon_perf_counters; +DROP VIEW IF EXISTS neon_backend_perf_counters; +DROP FUNCTION IF EXISTS get_perf_counters(); +DROP FUNCTION IF EXISTS get_backend_perf_counters(); diff --git a/pgxn/neon/neon.control b/pgxn/neon/neon.control index 03bdb9a0b4..0b36bdbb65 100644 --- a/pgxn/neon/neon.control +++ b/pgxn/neon/neon.control @@ -1,5 +1,7 @@ # neon extension comment = 'cloud storage for PostgreSQL' +# TODO: bump default version to 1.5, after we are certain that we don't +# need to rollback the compute image default_version = '1.4' module_pathname = '$libdir/neon' relocatable = true diff --git a/pgxn/neon/neon_perf_counters.c b/pgxn/neon/neon_perf_counters.c new file mode 100644 index 0000000000..3e86d5b262 --- /dev/null +++ b/pgxn/neon/neon_perf_counters.c @@ -0,0 +1,261 @@ +/*------------------------------------------------------------------------- + * + * neon_perf_counters.c + * Collect statistics about Neon I/O + * + * Each backend has its own set of counters in shared memory. + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/proc.h" +#include "storage/shmem.h" +#include "utils/builtins.h" + +#include "neon_perf_counters.h" +#include "neon_pgversioncompat.h" + +neon_per_backend_counters *neon_per_backend_counters_shared; + +Size +NeonPerfCountersShmemSize(void) +{ + Size size = 0; + + size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters))); + + return size; +} + +bool +NeonPerfCountersShmemInit(void) +{ + bool found; + + neon_per_backend_counters_shared = + ShmemInitStruct("Neon perf counters", + mul_size(MaxBackends, + sizeof(neon_per_backend_counters)), + &found); + Assert(found == IsUnderPostmaster); + if (!found) + { + /* shared memory is initialized to zeros, so nothing to do here */ + } +} + +/* + * Count a GetPage wait operation. + */ +void +inc_getpage_wait(uint64 latency_us) +{ + int lo = 0; + int hi = NUM_GETPAGE_WAIT_BUCKETS - 1; + + /* Find the right bucket with binary search */ + while (lo < hi) + { + int mid = (lo + hi) / 2; + + if (latency_us < getpage_wait_bucket_thresholds[mid]) + hi = mid; + else + lo = mid + 1; + } + MyNeonCounters->getpage_wait_us_bucket[lo]++; + MyNeonCounters->getpage_wait_us_sum += latency_us; + MyNeonCounters->getpage_wait_us_count++; +} + +/* + * Support functions for the views, neon_backend_perf_counters and + * neon_perf_counters. + */ + +typedef struct +{ + char *name; + bool is_bucket; + double bucket_le; + double value; +} metric_t; + +static metric_t * +neon_perf_counters_to_metrics(neon_per_backend_counters *counters) +{ +#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8) + metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); + uint64 bucket_accum; + int i = 0; + Datum getpage_wait_str; + + metrics[i].name = "getpage_wait_seconds_count"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_wait_us_count; + i++; + metrics[i].name = "getpage_wait_seconds_sum"; + metrics[i].is_bucket = false; + metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0; + i++; + + bucket_accum = 0; + for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) + { + uint64 threshold = getpage_wait_bucket_thresholds[bucketno]; + + bucket_accum += counters->getpage_wait_us_bucket[bucketno]; + + metrics[i].name = "getpage_wait_seconds_bucket"; + metrics[i].is_bucket = true; + metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0; + metrics[i].value = (double) bucket_accum; + i++; + } + metrics[i].name = "getpage_prefetch_requests_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_requests_total; + i++; + metrics[i].name = "getpage_sync_requests_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_sync_requests_total; + i++; + metrics[i].name = "getpage_prefetch_misses_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_misses_total; + i++; + metrics[i].name = "getpage_prefetch_discards_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->getpage_prefetch_discards_total; + i++; + metrics[i].name = "pageserver_requests_sent_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_requests_sent_total; + i++; + metrics[i].name = "pageserver_requests_disconnects_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_disconnects_total; + i++; + metrics[i].name = "pageserver_send_flushes_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->pageserver_send_flushes_total; + i++; + metrics[i].name = "file_cache_hits_total"; + metrics[i].is_bucket = false; + metrics[i].value = (double) counters->file_cache_hits_total; + i++; + + Assert(i == NUM_METRICS); + + /* NULL entry marks end of array */ + metrics[i].name = NULL; + metrics[i].value = 0; + + return metrics; +} + +/* + * Write metric to three output Datums + */ +static void +metric_to_datums(metric_t *m, Datum *values, bool *nulls) +{ + values[0] = CStringGetTextDatum(m->name); + nulls[0] = false; + if (m->is_bucket) + { + values[1] = Float8GetDatum(m->bucket_le); + nulls[1] = false; + } + else + { + values[1] = (Datum) 0; + nulls[1] = true; + } + values[2] = Float8GetDatum(m->value); + nulls[2] = false; +} + +PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters); +Datum +neon_get_backend_perf_counters(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[5]; + bool nulls[5]; + + /* We put all the tuples into a tuplestore in one go. */ + InitMaterializedSRF(fcinfo, 0); + + for (int procno = 0; procno < MaxBackends; procno++) + { + PGPROC *proc = GetPGProcByNumber(procno); + int pid = proc->pid; + neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; + metric_t *metrics = neon_perf_counters_to_metrics(counters); + + values[0] = Int32GetDatum(procno); + nulls[0] = false; + values[1] = Int32GetDatum(pid); + nulls[1] = false; + + for (int i = 0; metrics[i].name != NULL; i++) + { + metric_to_datums(&metrics[i], &values[2], &nulls[2]); + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + pfree(metrics); + } + + return (Datum) 0; +} + +PG_FUNCTION_INFO_V1(neon_get_perf_counters); +Datum +neon_get_perf_counters(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Datum values[3]; + bool nulls[3]; + Datum getpage_wait_str; + neon_per_backend_counters totals = {0}; + metric_t *metrics; + + /* We put all the tuples into a tuplestore in one go. */ + InitMaterializedSRF(fcinfo, 0); + + /* Aggregate the counters across all backends */ + for (int procno = 0; procno < MaxBackends; procno++) + { + neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; + + totals.getpage_wait_us_count += counters->getpage_wait_us_count; + totals.getpage_wait_us_sum += counters->getpage_wait_us_sum; + for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) + totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno]; + totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total; + totals.getpage_sync_requests_total += counters->getpage_sync_requests_total; + totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total; + totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total; + totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total; + totals.pageserver_disconnects_total += counters->pageserver_disconnects_total; + totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total; + totals.file_cache_hits_total += counters->file_cache_hits_total; + } + + metrics = neon_perf_counters_to_metrics(&totals); + for (int i = 0; metrics[i].name != NULL; i++) + { + metric_to_datums(&metrics[i], &values[0], &nulls[0]); + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + pfree(metrics); + + return (Datum) 0; +} diff --git a/pgxn/neon/neon_perf_counters.h b/pgxn/neon/neon_perf_counters.h new file mode 100644 index 0000000000..ae35e8c3a5 --- /dev/null +++ b/pgxn/neon/neon_perf_counters.h @@ -0,0 +1,111 @@ +/*------------------------------------------------------------------------- + * + * neon_perf_counters.h + * Performance counters for neon storage requests + *------------------------------------------------------------------------- + */ + +#ifndef NEON_PERF_COUNTERS_H +#define NEON_PERF_COUNTERS_H + +#if PG_VERSION_NUM >= 170000 +#include "storage/procnumber.h" +#else +#include "storage/backendid.h" +#include "storage/proc.h" +#endif + +static const uint64 getpage_wait_bucket_thresholds[] = { + 20, 30, 60, 100, /* 0 - 100 us */ + 200, 300, 600, 1000, /* 100 us - 1 ms */ + 2000, 3000, 6000, 10000, /* 1 ms - 10 ms */ + 20000, 30000, 60000, 100000, /* 10 ms - 100 ms */ + 200000, 300000, 600000, 1000000, /* 100 ms - 1 s */ + 2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */ + 20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */ + UINT64_MAX, +}; +#define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds)) + +typedef struct +{ + /* + * Histogram for how long an smgrread() request needs to wait for response + * from pageserver. When prefetching is effective, these wait times can be + * lower than the network latency to the pageserver, even zero, if the + * page is already readily prefetched whenever we need to read a page. + * + * Note: we accumulate these in microseconds, because that's convenient in + * the backend, but the 'neon_backend_perf_counters' view will convert + * them to seconds, to make them more idiomatic as prometheus metrics. + */ + uint64 getpage_wait_us_count; + uint64 getpage_wait_us_sum; + uint64 getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS]; + + /* + * Total number of speculative prefetch Getpage requests and synchronous + * GetPage requests sent. + */ + uint64 getpage_prefetch_requests_total; + uint64 getpage_sync_requests_total; + + /* XXX: It's not clear to me when these misses happen. */ + uint64 getpage_prefetch_misses_total; + + /* + * Number of prefetched responses that were discarded becuase the + * prefetched page was not needed or because it was concurrently fetched / + * modified by another backend. + */ + uint64 getpage_prefetch_discards_total; + + /* + * Total number of requests send to pageserver. (prefetch_requests_total + * and sync_request_total count only GetPage requests, this counts all + * request types.) + */ + uint64 pageserver_requests_sent_total; + + /* + * Number of times the connection to the pageserver was lost and the + * backend had to reconnect. Note that this doesn't count the first + * connection in each backend, only reconnects. + */ + uint64 pageserver_disconnects_total; + + /* + * Number of network flushes to the pageserver. Synchronous requests are + * flushed immediately, but when prefetching requests are sent in batches, + * this can be smaller than pageserver_requests_sent_total. + */ + uint64 pageserver_send_flushes_total; + + /* + * Number of requests satisfied from the LFC. + * + * This is redundant with the server-wide file_cache_hits, but this gives + * per-backend granularity, and it's handy to have this in the same place + * as counters for requests that went to the pageserver. Maybe move all + * the LFC stats to this struct in the future? + */ + uint64 file_cache_hits_total; + +} neon_per_backend_counters; + +/* Pointer to the shared memory array of neon_per_backend_counters structs */ +extern neon_per_backend_counters *neon_per_backend_counters_shared; + +#if PG_VERSION_NUM >= 170000 +#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber]) +#else +#define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno]) +#endif + +extern void inc_getpage_wait(uint64 latency); + +extern Size NeonPerfCountersShmemSize(void); +extern bool NeonPerfCountersShmemInit(void); + + +#endif /* NEON_PERF_COUNTERS_H */ diff --git a/pgxn/neon/neon_pgversioncompat.c b/pgxn/neon/neon_pgversioncompat.c new file mode 100644 index 0000000000..a0dbddde4b --- /dev/null +++ b/pgxn/neon/neon_pgversioncompat.c @@ -0,0 +1,44 @@ +/* + * Support functions for the compatibility macros in neon_pgversioncompat.h + */ +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/tuplestore.h" + +#include "neon_pgversioncompat.h" + +#if PG_MAJORVERSION_NUM < 15 +void +InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + Tuplestorestate *tupstore; + MemoryContext old_context, + per_query_ctx; + TupleDesc stored_tupdesc; + + /* check to see if caller supports returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + + /* + * Store the tuplestore and the tuple descriptor in ReturnSetInfo. This + * must be done in the per-query memory context. + */ + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + old_context = MemoryContextSwitchTo(per_query_ctx); + + if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + tupstore = tuplestore_begin_heap(false, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = stored_tupdesc; + MemoryContextSwitchTo(old_context); +} +#endif diff --git a/pgxn/neon/neon_pgversioncompat.h b/pgxn/neon/neon_pgversioncompat.h index 59b97d64fe..e4754ec7ea 100644 --- a/pgxn/neon/neon_pgversioncompat.h +++ b/pgxn/neon/neon_pgversioncompat.h @@ -6,6 +6,8 @@ #ifndef NEON_PGVERSIONCOMPAT_H #define NEON_PGVERSIONCOMPAT_H +#include "fmgr.h" + #if PG_MAJORVERSION_NUM < 17 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId) #else @@ -123,4 +125,8 @@ #define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess()) #endif +#if PG_MAJORVERSION_NUM < 15 +extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags); +#endif + #endif /* NEON_PGVERSIONCOMPAT_H */ diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 36538ea5e2..1c87f4405c 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -66,6 +66,7 @@ #include "storage/md.h" #include "storage/smgr.h" +#include "neon_perf_counters.h" #include "pagestore_client.h" #include "bitmap.h" @@ -289,7 +290,6 @@ static PrefetchState *MyPState; static bool compact_prefetch_buffers(void); static void consume_prefetch_responses(void); -static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns); static bool prefetch_read(PrefetchRequest *slot); static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns); static bool prefetch_wait_for(uint64 ring_index); @@ -780,21 +780,27 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns } /* - * prefetch_register_buffer() - register and prefetch buffer + * prefetch_register_bufferv() - register and prefetch buffers * * Register that we may want the contents of BufferTag in the near future. + * This is used when issuing a speculative prefetch request, but also when + * performing a synchronous request and need the buffer right now. * * If force_request_lsns is not NULL, those values are sent to the * pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure * to calculate the LSNs to send. * + * When performing a prefetch rather than a synchronous request, + * is_prefetch==true. Currently, it only affects how the request is accounted + * in the perf counters. + * * NOTE: this function may indirectly update MyPState->pfs_hash; which * invalidates any active pointers into the hash table. */ - static uint64 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns, - BlockNumber nblocks, const bits8 *mask) + BlockNumber nblocks, const bits8 *mask, + bool is_prefetch) { uint64 min_ring_index; PrefetchRequest req; @@ -815,6 +821,7 @@ Retry: PrfHashEntry *entry = NULL; uint64 ring_index; neon_request_lsns *lsns; + if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i)) continue; @@ -858,6 +865,7 @@ Retry: prefetch_set_unused(ring_index); entry = NULL; slot = NULL; + MyNeonCounters->getpage_prefetch_discards_total++; } } @@ -972,6 +980,11 @@ Retry: min_ring_index = Min(min_ring_index, ring_index); + if (is_prefetch) + MyNeonCounters->getpage_prefetch_requests_total++; + else + MyNeonCounters->getpage_sync_requests_total++; + prefetch_do_request(slot, lsns); } @@ -1000,13 +1013,6 @@ Retry: } -static uint64 -prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns) -{ - return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL); -} - - /* * Note: this function can get canceled and use a long jump to the next catch * context. Take care. @@ -2612,7 +2618,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, lfc_present[i] = ~(lfc_present[i]); ring_index = prefetch_register_bufferv(tag, NULL, iterblocks, - lfc_present); + lfc_present, true); nblocks -= iterblocks; blocknum += iterblocks; @@ -2656,7 +2662,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln)); - ring_index = prefetch_register_buffer(tag, NULL); + ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true); Assert(ring_index < MyPState->ring_unused && MyPState->ring_last <= ring_index); @@ -2747,17 +2753,20 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block * weren't for the behaviour of the LwLsn cache that uses the highest * value of the LwLsn cache when the entry is not found. */ - prefetch_register_bufferv(buftag, request_lsns, nblocks, mask); + prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false); for (int i = 0; i < nblocks; i++) { void *buffer = buffers[i]; BlockNumber blockno = base_blockno + i; neon_request_lsns *reqlsns = &request_lsns[i]; + TimestampTz start_ts, end_ts; if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i)) continue; + start_ts = GetCurrentTimestamp(); + if (RecoveryInProgress() && MyBackendType != B_STARTUP) XLogWaitForReplayOf(reqlsns[0].request_lsn); @@ -2794,6 +2803,7 @@ Retry: /* drop caches */ prefetch_set_unused(slot->my_ring_index); pgBufferUsage.prefetch.expired += 1; + MyNeonCounters->getpage_prefetch_discards_total++; /* make it look like a prefetch cache miss */ entry = NULL; } @@ -2804,8 +2814,9 @@ Retry: if (entry == NULL) { pgBufferUsage.prefetch.misses += 1; + MyNeonCounters->getpage_prefetch_misses_total++; - ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL); + ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false); Assert(ring_index != UINT64_MAX); slot = GetPrfSlot(ring_index); } @@ -2860,6 +2871,9 @@ Retry: /* buffer was used, clean up for later reuse */ prefetch_set_unused(ring_index); prefetch_cleanup_trailing_unused(); + + end_ts = GetCurrentTimestamp(); + inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0); } } @@ -2913,6 +2927,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer /* Try to read from local file cache */ if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer)) { + MyNeonCounters->file_cache_hits_total++; return; } @@ -3097,7 +3112,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* assume heap */ RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno); RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno); - + if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py new file mode 100644 index 0000000000..6138c322d7 --- /dev/null +++ b/test_runner/regress/test_compute_metrics.py @@ -0,0 +1,21 @@ +from fixtures.neon_fixtures import NeonEnv + + +def test_compute_metrics(neon_simple_env: NeonEnv): + """ + Test compute metrics, exposed in the neon_backend_perf_counters and + neon_perf_counters views + """ + env = neon_simple_env + endpoint = env.endpoints.create_start("main") + + conn = endpoint.connect() + cur = conn.cursor() + + # We don't check that the values make sense, this is just a very + # basic check that the server doesn't crash or something like that. + # + # 1.5 is the minimum version to contain these views. + cur.execute("CREATE EXTENSION neon VERSION '1.5'") + cur.execute("SELECT * FROM neon_perf_counters") + cur.execute("SELECT * FROM neon_backend_perf_counters") diff --git a/test_runner/regress/test_neon_extension.py b/test_runner/regress/test_neon_extension.py index bb844244e3..22a6013225 100644 --- a/test_runner/regress/test_neon_extension.py +++ b/test_runner/regress/test_neon_extension.py @@ -50,8 +50,8 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder): # Ensure that the default version is also updated in the neon.control file assert cur.fetchone() == ("1.4",) cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE") - all_versions = ["1.4", "1.3", "1.2", "1.1", "1.0"] - current_version = "1.4" + all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"] + current_version = "1.5" for idx, begin_version in enumerate(all_versions): for target_version in all_versions[idx + 1 :]: if current_version != begin_version: