Add views for metrics about pageserver requests (#9008)

The metrics include a histogram of how long we need to wait for a
GetPage request, number of reconnects, and number of requests among
other things.

The metrics are not yet exported anywhere, but you can query them
manually.

Note: This does *not* bump the default version of the 'neon' extension. We
will do that later, as a separate PR. The reason is that this allows us to roll back
the compute image smoothly, if necessary. Once the image that includes the
new extension .so file with the new functions has been rolled out, and we're
confident that we don't need to roll back the image anymore, we can change
default extension version and actually start using the new functions and views.

This is what the view looks like:

```
postgres=# select * from neon_perf_counters ;
                metric                 | bucket_le |  value   
---------------------------------------+-----------+----------
 getpage_wait_seconds_count            |           |      300
 getpage_wait_seconds_sum              |           | 0.048506
 getpage_wait_seconds_bucket           |     2e-05 |        0
 getpage_wait_seconds_bucket           |     3e-05 |        0
 getpage_wait_seconds_bucket           |     6e-05 |       71
 getpage_wait_seconds_bucket           |    0.0001 |      124
 getpage_wait_seconds_bucket           |    0.0002 |      248
 getpage_wait_seconds_bucket           |    0.0003 |      279
 getpage_wait_seconds_bucket           |    0.0006 |      297
 getpage_wait_seconds_bucket           |     0.001 |      298
 getpage_wait_seconds_bucket           |     0.002 |      298
 getpage_wait_seconds_bucket           |     0.003 |      298
 getpage_wait_seconds_bucket           |     0.006 |      300
 getpage_wait_seconds_bucket           |      0.01 |      300
 getpage_wait_seconds_bucket           |      0.02 |      300
 getpage_wait_seconds_bucket           |      0.03 |      300
 getpage_wait_seconds_bucket           |      0.06 |      300
 getpage_wait_seconds_bucket           |       0.1 |      300
 getpage_wait_seconds_bucket           |       0.2 |      300
 getpage_wait_seconds_bucket           |       0.3 |      300
 getpage_wait_seconds_bucket           |       0.6 |      300
 getpage_wait_seconds_bucket           |         1 |      300
 getpage_wait_seconds_bucket           |         2 |      300
 getpage_wait_seconds_bucket           |         3 |      300
 getpage_wait_seconds_bucket           |         6 |      300
 getpage_wait_seconds_bucket           |        10 |      300
 getpage_wait_seconds_bucket           |        20 |      300
 getpage_wait_seconds_bucket           |        30 |      300
 getpage_wait_seconds_bucket           |        60 |      300
 getpage_wait_seconds_bucket           |       100 |      300
 getpage_wait_seconds_bucket           |  Infinity |      300
 getpage_prefetch_requests_total       |           |       69
 getpage_sync_requests_total           |           |      231
 getpage_prefetch_misses_total         |           |        0
 getpage_prefetch_discards_total       |           |        0
 pageserver_requests_sent_total        |           |      323
 pageserver_requests_disconnects_total |           |        0
 pageserver_send_flushes_total         |           |      323
 file_cache_hits_total                 |           |        0
(39 rows)
```
This commit is contained in:
Heikki Linnakangas
2024-09-23 21:28:50 +03:00
committed by GitHub
parent df3996265f
commit 263dfba6ee
12 changed files with 533 additions and 20 deletions

View File

@@ -9,6 +9,8 @@ OBJS = \
hll.o \
libpagestore.o \
neon.o \
neon_pgversioncompat.o \
neon_perf_counters.o \
neon_utils.o \
neon_walreader.o \
pagestore_smgr.o \
@@ -23,7 +25,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
SHLIB_LINK = -lcurl
EXTENSION = neon
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
PGFILEDESC = "neon - cloud storage for PostgreSQL"
EXTRA_CLEAN = \

View File

@@ -30,6 +30,7 @@
#include "utils/guc.h"
#include "neon.h"
#include "neon_perf_counters.h"
#include "neon_utils.h"
#include "pagestore_client.h"
#include "walproposer.h"
@@ -331,6 +332,7 @@ CLEANUP_AND_DISCONNECT(PageServer *shard)
}
if (shard->conn)
{
MyNeonCounters->pageserver_disconnects_total++;
PQfinish(shard->conn);
shard->conn = NULL;
}
@@ -737,6 +739,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
PageServer *shard = &page_servers[shard_no];
PGconn *pageserver_conn;
MyNeonCounters->pageserver_requests_sent_total++;
/* If the connection was lost for some reason, reconnect */
if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
{
@@ -889,6 +893,7 @@ pageserver_flush(shardno_t shard_no)
}
else
{
MyNeonCounters->pageserver_send_flushes_total++;
if (PQflush(pageserver_conn))
{
char *msg = pchomp(PQerrorMessage(pageserver_conn));
@@ -922,7 +927,7 @@ check_neon_id(char **newval, void **extra, GucSource source)
static Size
PagestoreShmemSize(void)
{
return sizeof(PagestoreShmemState);
return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize());
}
static bool
@@ -941,6 +946,9 @@ PagestoreShmemInit(void)
memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
AssignPageserverConnstring(page_server_connstring, NULL);
}
NeonPerfCountersShmemInit();
LWLockRelease(AddinShmemInitLock);
return found;
}

View File

@@ -0,0 +1,39 @@
\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
CREATE FUNCTION get_backend_perf_counters()
RETURNS SETOF RECORD
AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters'
LANGUAGE C PARALLEL SAFE;
CREATE FUNCTION get_perf_counters()
RETURNS SETOF RECORD
AS 'MODULE_PATHNAME', 'neon_get_perf_counters'
LANGUAGE C PARALLEL SAFE;
-- Show various metrics, for each backend. Note that the values are not reset
-- when a backend exits. When a new backend starts with the backend ID, it will
-- continue accumulating the values from where the old backend left. If you are
-- only interested in the changes from your own session, store the values at the
-- beginning of the session somewhere, and subtract them on subsequent calls.
--
-- For histograms, 'bucket_le' is the upper bound of the histogram bucket.
CREATE VIEW neon_backend_perf_counters AS
SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value
FROM get_backend_perf_counters() AS P (
procno integer,
pid integer,
metric text,
bucket_le float8,
value float8
);
-- Summary across all backends. (This could also be implemented with
-- an aggregate query over neon_backend_perf_counters view.)
CREATE VIEW neon_perf_counters AS
SELECT P.metric, P.bucket_le, P.value
FROM get_perf_counters() AS P (
metric text,
bucket_le float8,
value float8
);

View File

@@ -0,0 +1,4 @@
DROP VIEW IF EXISTS neon_perf_counters;
DROP VIEW IF EXISTS neon_backend_perf_counters;
DROP FUNCTION IF EXISTS get_perf_counters();
DROP FUNCTION IF EXISTS get_backend_perf_counters();

View File

@@ -1,5 +1,7 @@
# neon extension
comment = 'cloud storage for PostgreSQL'
# TODO: bump default version to 1.5, after we are certain that we don't
# need to rollback the compute image
default_version = '1.4'
module_pathname = '$libdir/neon'
relocatable = true

View File

@@ -0,0 +1,261 @@
/*-------------------------------------------------------------------------
*
* neon_perf_counters.c
* Collect statistics about Neon I/O
*
* Each backend has its own set of counters in shared memory.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "utils/builtins.h"
#include "neon_perf_counters.h"
#include "neon_pgversioncompat.h"
neon_per_backend_counters *neon_per_backend_counters_shared;
Size
NeonPerfCountersShmemSize(void)
{
Size size = 0;
size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters)));
return size;
}
bool
NeonPerfCountersShmemInit(void)
{
bool found;
neon_per_backend_counters_shared =
ShmemInitStruct("Neon perf counters",
mul_size(MaxBackends,
sizeof(neon_per_backend_counters)),
&found);
Assert(found == IsUnderPostmaster);
if (!found)
{
/* shared memory is initialized to zeros, so nothing to do here */
}
}
/*
* Count a GetPage wait operation.
*/
void
inc_getpage_wait(uint64 latency_us)
{
int lo = 0;
int hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
/* Find the right bucket with binary search */
while (lo < hi)
{
int mid = (lo + hi) / 2;
if (latency_us < getpage_wait_bucket_thresholds[mid])
hi = mid;
else
lo = mid + 1;
}
MyNeonCounters->getpage_wait_us_bucket[lo]++;
MyNeonCounters->getpage_wait_us_sum += latency_us;
MyNeonCounters->getpage_wait_us_count++;
}
/*
* Support functions for the views, neon_backend_perf_counters and
* neon_perf_counters.
*/
typedef struct
{
char *name;
bool is_bucket;
double bucket_le;
double value;
} metric_t;
static metric_t *
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
{
#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
uint64 bucket_accum;
int i = 0;
Datum getpage_wait_str;
metrics[i].name = "getpage_wait_seconds_count";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_wait_us_count;
i++;
metrics[i].name = "getpage_wait_seconds_sum";
metrics[i].is_bucket = false;
metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
i++;
bucket_accum = 0;
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
{
uint64 threshold = getpage_wait_bucket_thresholds[bucketno];
bucket_accum += counters->getpage_wait_us_bucket[bucketno];
metrics[i].name = "getpage_wait_seconds_bucket";
metrics[i].is_bucket = true;
metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
metrics[i].value = (double) bucket_accum;
i++;
}
metrics[i].name = "getpage_prefetch_requests_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_requests_total;
i++;
metrics[i].name = "getpage_sync_requests_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_sync_requests_total;
i++;
metrics[i].name = "getpage_prefetch_misses_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_misses_total;
i++;
metrics[i].name = "getpage_prefetch_discards_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->getpage_prefetch_discards_total;
i++;
metrics[i].name = "pageserver_requests_sent_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_requests_sent_total;
i++;
metrics[i].name = "pageserver_requests_disconnects_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_disconnects_total;
i++;
metrics[i].name = "pageserver_send_flushes_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->pageserver_send_flushes_total;
i++;
metrics[i].name = "file_cache_hits_total";
metrics[i].is_bucket = false;
metrics[i].value = (double) counters->file_cache_hits_total;
i++;
Assert(i == NUM_METRICS);
/* NULL entry marks end of array */
metrics[i].name = NULL;
metrics[i].value = 0;
return metrics;
}
/*
* Write metric to three output Datums
*/
static void
metric_to_datums(metric_t *m, Datum *values, bool *nulls)
{
values[0] = CStringGetTextDatum(m->name);
nulls[0] = false;
if (m->is_bucket)
{
values[1] = Float8GetDatum(m->bucket_le);
nulls[1] = false;
}
else
{
values[1] = (Datum) 0;
nulls[1] = true;
}
values[2] = Float8GetDatum(m->value);
nulls[2] = false;
}
PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters);
Datum
neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Datum values[5];
bool nulls[5];
/* We put all the tuples into a tuplestore in one go. */
InitMaterializedSRF(fcinfo, 0);
for (int procno = 0; procno < MaxBackends; procno++)
{
PGPROC *proc = GetPGProcByNumber(procno);
int pid = proc->pid;
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
metric_t *metrics = neon_perf_counters_to_metrics(counters);
values[0] = Int32GetDatum(procno);
nulls[0] = false;
values[1] = Int32GetDatum(pid);
nulls[1] = false;
for (int i = 0; metrics[i].name != NULL; i++)
{
metric_to_datums(&metrics[i], &values[2], &nulls[2]);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
pfree(metrics);
}
return (Datum) 0;
}
PG_FUNCTION_INFO_V1(neon_get_perf_counters);
Datum
neon_get_perf_counters(PG_FUNCTION_ARGS)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Datum values[3];
bool nulls[3];
Datum getpage_wait_str;
neon_per_backend_counters totals = {0};
metric_t *metrics;
/* We put all the tuples into a tuplestore in one go. */
InitMaterializedSRF(fcinfo, 0);
/* Aggregate the counters across all backends */
for (int procno = 0; procno < MaxBackends; procno++)
{
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
totals.getpage_wait_us_count += counters->getpage_wait_us_count;
totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
totals.file_cache_hits_total += counters->file_cache_hits_total;
}
metrics = neon_perf_counters_to_metrics(&totals);
for (int i = 0; metrics[i].name != NULL; i++)
{
metric_to_datums(&metrics[i], &values[0], &nulls[0]);
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
}
pfree(metrics);
return (Datum) 0;
}

View File

@@ -0,0 +1,111 @@
/*-------------------------------------------------------------------------
*
* neon_perf_counters.h
* Performance counters for neon storage requests
*-------------------------------------------------------------------------
*/
#ifndef NEON_PERF_COUNTERS_H
#define NEON_PERF_COUNTERS_H
#if PG_VERSION_NUM >= 170000
#include "storage/procnumber.h"
#else
#include "storage/backendid.h"
#include "storage/proc.h"
#endif
static const uint64 getpage_wait_bucket_thresholds[] = {
20, 30, 60, 100, /* 0 - 100 us */
200, 300, 600, 1000, /* 100 us - 1 ms */
2000, 3000, 6000, 10000, /* 1 ms - 10 ms */
20000, 30000, 60000, 100000, /* 10 ms - 100 ms */
200000, 300000, 600000, 1000000, /* 100 ms - 1 s */
2000000, 3000000, 6000000, 10000000, /* 1 s - 10 s */
20000000, 30000000, 60000000, 100000000, /* 10 s - 100 s */
UINT64_MAX,
};
#define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds))
typedef struct
{
/*
* Histogram for how long an smgrread() request needs to wait for response
* from pageserver. When prefetching is effective, these wait times can be
* lower than the network latency to the pageserver, even zero, if the
* page is already readily prefetched whenever we need to read a page.
*
* Note: we accumulate these in microseconds, because that's convenient in
* the backend, but the 'neon_backend_perf_counters' view will convert
* them to seconds, to make them more idiomatic as prometheus metrics.
*/
uint64 getpage_wait_us_count;
uint64 getpage_wait_us_sum;
uint64 getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS];
/*
* Total number of speculative prefetch Getpage requests and synchronous
* GetPage requests sent.
*/
uint64 getpage_prefetch_requests_total;
uint64 getpage_sync_requests_total;
/* XXX: It's not clear to me when these misses happen. */
uint64 getpage_prefetch_misses_total;
/*
* Number of prefetched responses that were discarded becuase the
* prefetched page was not needed or because it was concurrently fetched /
* modified by another backend.
*/
uint64 getpage_prefetch_discards_total;
/*
* Total number of requests send to pageserver. (prefetch_requests_total
* and sync_request_total count only GetPage requests, this counts all
* request types.)
*/
uint64 pageserver_requests_sent_total;
/*
* Number of times the connection to the pageserver was lost and the
* backend had to reconnect. Note that this doesn't count the first
* connection in each backend, only reconnects.
*/
uint64 pageserver_disconnects_total;
/*
* Number of network flushes to the pageserver. Synchronous requests are
* flushed immediately, but when prefetching requests are sent in batches,
* this can be smaller than pageserver_requests_sent_total.
*/
uint64 pageserver_send_flushes_total;
/*
* Number of requests satisfied from the LFC.
*
* This is redundant with the server-wide file_cache_hits, but this gives
* per-backend granularity, and it's handy to have this in the same place
* as counters for requests that went to the pageserver. Maybe move all
* the LFC stats to this struct in the future?
*/
uint64 file_cache_hits_total;
} neon_per_backend_counters;
/* Pointer to the shared memory array of neon_per_backend_counters structs */
extern neon_per_backend_counters *neon_per_backend_counters_shared;
#if PG_VERSION_NUM >= 170000
#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])
#else
#define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno])
#endif
extern void inc_getpage_wait(uint64 latency);
extern Size NeonPerfCountersShmemSize(void);
extern bool NeonPerfCountersShmemInit(void);
#endif /* NEON_PERF_COUNTERS_H */

View File

@@ -0,0 +1,44 @@
/*
* Support functions for the compatibility macros in neon_pgversioncompat.h
*/
#include "postgres.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "utils/tuplestore.h"
#include "neon_pgversioncompat.h"
#if PG_MAJORVERSION_NUM < 15
void
InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
{
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Tuplestorestate *tupstore;
MemoryContext old_context,
per_query_ctx;
TupleDesc stored_tupdesc;
/* check to see if caller supports returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
/*
* Store the tuplestore and the tuple descriptor in ReturnSetInfo. This
* must be done in the per-query memory context.
*/
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
old_context = MemoryContextSwitchTo(per_query_ctx);
if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
tupstore = tuplestore_begin_heap(false, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = stored_tupdesc;
MemoryContextSwitchTo(old_context);
}
#endif

View File

@@ -6,6 +6,8 @@
#ifndef NEON_PGVERSIONCOMPAT_H
#define NEON_PGVERSIONCOMPAT_H
#include "fmgr.h"
#if PG_MAJORVERSION_NUM < 17
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
#else
@@ -123,4 +125,8 @@
#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
#endif
#if PG_MAJORVERSION_NUM < 15
extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
#endif
#endif /* NEON_PGVERSIONCOMPAT_H */

View File

@@ -66,6 +66,7 @@
#include "storage/md.h"
#include "storage/smgr.h"
#include "neon_perf_counters.h"
#include "pagestore_client.h"
#include "bitmap.h"
@@ -289,7 +290,6 @@ static PrefetchState *MyPState;
static bool compact_prefetch_buffers(void);
static void consume_prefetch_responses(void);
static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns);
static bool prefetch_read(PrefetchRequest *slot);
static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
static bool prefetch_wait_for(uint64 ring_index);
@@ -780,21 +780,27 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
}
/*
* prefetch_register_buffer() - register and prefetch buffer
* prefetch_register_bufferv() - register and prefetch buffers
*
* Register that we may want the contents of BufferTag in the near future.
* This is used when issuing a speculative prefetch request, but also when
* performing a synchronous request and need the buffer right now.
*
* If force_request_lsns is not NULL, those values are sent to the
* pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
* to calculate the LSNs to send.
*
* When performing a prefetch rather than a synchronous request,
* is_prefetch==true. Currently, it only affects how the request is accounted
* in the perf counters.
*
* NOTE: this function may indirectly update MyPState->pfs_hash; which
* invalidates any active pointers into the hash table.
*/
static uint64
prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
BlockNumber nblocks, const bits8 *mask)
BlockNumber nblocks, const bits8 *mask,
bool is_prefetch)
{
uint64 min_ring_index;
PrefetchRequest req;
@@ -815,6 +821,7 @@ Retry:
PrfHashEntry *entry = NULL;
uint64 ring_index;
neon_request_lsns *lsns;
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
continue;
@@ -858,6 +865,7 @@ Retry:
prefetch_set_unused(ring_index);
entry = NULL;
slot = NULL;
MyNeonCounters->getpage_prefetch_discards_total++;
}
}
@@ -972,6 +980,11 @@ Retry:
min_ring_index = Min(min_ring_index, ring_index);
if (is_prefetch)
MyNeonCounters->getpage_prefetch_requests_total++;
else
MyNeonCounters->getpage_sync_requests_total++;
prefetch_do_request(slot, lsns);
}
@@ -1000,13 +1013,6 @@ Retry:
}
static uint64
prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns)
{
return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL);
}
/*
* Note: this function can get canceled and use a long jump to the next catch
* context. Take care.
@@ -2612,7 +2618,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
lfc_present[i] = ~(lfc_present[i]);
ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
lfc_present);
lfc_present, true);
nblocks -= iterblocks;
blocknum += iterblocks;
@@ -2656,7 +2662,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));
ring_index = prefetch_register_buffer(tag, NULL);
ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true);
Assert(ring_index < MyPState->ring_unused &&
MyPState->ring_last <= ring_index);
@@ -2747,17 +2753,20 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
* weren't for the behaviour of the LwLsn cache that uses the highest
* value of the LwLsn cache when the entry is not found.
*/
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask);
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
for (int i = 0; i < nblocks; i++)
{
void *buffer = buffers[i];
BlockNumber blockno = base_blockno + i;
neon_request_lsns *reqlsns = &request_lsns[i];
TimestampTz start_ts, end_ts;
if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
continue;
start_ts = GetCurrentTimestamp();
if (RecoveryInProgress() && MyBackendType != B_STARTUP)
XLogWaitForReplayOf(reqlsns[0].request_lsn);
@@ -2794,6 +2803,7 @@ Retry:
/* drop caches */
prefetch_set_unused(slot->my_ring_index);
pgBufferUsage.prefetch.expired += 1;
MyNeonCounters->getpage_prefetch_discards_total++;
/* make it look like a prefetch cache miss */
entry = NULL;
}
@@ -2804,8 +2814,9 @@ Retry:
if (entry == NULL)
{
pgBufferUsage.prefetch.misses += 1;
MyNeonCounters->getpage_prefetch_misses_total++;
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL);
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
Assert(ring_index != UINT64_MAX);
slot = GetPrfSlot(ring_index);
}
@@ -2860,6 +2871,9 @@ Retry:
/* buffer was used, clean up for later reuse */
prefetch_set_unused(ring_index);
prefetch_cleanup_trailing_unused();
end_ts = GetCurrentTimestamp();
inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0);
}
}
@@ -2913,6 +2927,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
/* Try to read from local file cache */
if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
{
MyNeonCounters->file_cache_hits_total++;
return;
}
@@ -3097,7 +3112,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
/* assume heap */
RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
{
neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",

View File

@@ -0,0 +1,21 @@
from fixtures.neon_fixtures import NeonEnv
def test_compute_metrics(neon_simple_env: NeonEnv):
"""
Test compute metrics, exposed in the neon_backend_perf_counters and
neon_perf_counters views
"""
env = neon_simple_env
endpoint = env.endpoints.create_start("main")
conn = endpoint.connect()
cur = conn.cursor()
# We don't check that the values make sense, this is just a very
# basic check that the server doesn't crash or something like that.
#
# 1.5 is the minimum version to contain these views.
cur.execute("CREATE EXTENSION neon VERSION '1.5'")
cur.execute("SELECT * FROM neon_perf_counters")
cur.execute("SELECT * FROM neon_backend_perf_counters")

View File

@@ -50,8 +50,8 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
# Ensure that the default version is also updated in the neon.control file
assert cur.fetchone() == ("1.4",)
cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
all_versions = ["1.4", "1.3", "1.2", "1.1", "1.0"]
current_version = "1.4"
all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
current_version = "1.5"
for idx, begin_version in enumerate(all_versions):
for target_version in all_versions[idx + 1 :]:
if current_version != begin_version: