From 5e3cb2ab070c77d8f836fa0f0008f8dd08f25875 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 29 Jul 2025 16:12:44 +0300 Subject: [PATCH] Refactor LFC stats functions (#12696) Split the functions into two parts: an internal function in file_cache.c which returns an array of structs representing the result set, and another function in neon.c with the glue code to expose it as a SQL function. This is in preparation for the new communicator, which needs to implement the same SQL functions, but getting the information from a different place. In the glue code, use the more modern Postgres way of building a result set using a tuplestore. --- pgxn/neon/file_cache.c | 386 +++++++++++------------------------------ pgxn/neon/file_cache.h | 20 +++ pgxn/neon/neon.c | 74 ++++++++ 3 files changed, 194 insertions(+), 286 deletions(-) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 88086689c8..3c680eab86 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -1832,125 +1832,46 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, LWLockRelease(lfc_lock); } -typedef struct +/* + * Return metrics about the LFC. + * + * The return format is a palloc'd array of LfcStatsEntrys. The size + * of the returned array is returned in *num_entries. + */ +LfcStatsEntry * +lfc_get_stats(size_t *num_entries) { - TupleDesc tupdesc; -} NeonGetStatsCtx; + LfcStatsEntry *entries; + size_t n = 0; -#define NUM_NEON_GET_STATS_COLS 2 +#define MAX_ENTRIES 10 + entries = palloc(sizeof(LfcStatsEntry) * MAX_ENTRIES); -PG_FUNCTION_INFO_V1(neon_get_lfc_stats); -Datum -neon_get_lfc_stats(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - NeonGetStatsCtx *fctx; - MemoryContext oldcontext; - TupleDesc tupledesc; - Datum result; - HeapTuple tuple; - char const *key; - uint64 value = 0; - Datum values[NUM_NEON_GET_STATS_COLS]; - bool nulls[NUM_NEON_GET_STATS_COLS]; + entries[n++] = (LfcStatsEntry) {"file_cache_chunk_size_pages", lfc_ctl == NULL, + lfc_ctl ? lfc_blocks_per_chunk : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_misses", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->misses : 0}; + entries[n++] = (LfcStatsEntry) {"file_cache_hits", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->hits : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_used", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->used : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_writes", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->writes : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_size", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->size : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_used_pages", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->used_pages : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_evicted_pages", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->evicted_pages : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_limit", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->limit : 0 }; + entries[n++] = (LfcStatsEntry) {"file_cache_chunks_pinned", lfc_ctl == NULL, + lfc_ctl ? lfc_ctl->pinned : 0 }; + Assert(n <= MAX_ENTRIES); +#undef MAX_ENTRIES - if (SRF_IS_FIRSTCALL()) - { - funcctx = SRF_FIRSTCALL_INIT(); - - /* Switch context when allocating stuff to be used in later calls */ - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* Create a user function context for cross-call persistence */ - fctx = (NeonGetStatsCtx *) palloc(sizeof(NeonGetStatsCtx)); - - /* Construct a tuple descriptor for the result rows. */ - tupledesc = CreateTemplateTupleDesc(NUM_NEON_GET_STATS_COLS); - - TupleDescInitEntry(tupledesc, (AttrNumber) 1, "lfc_key", - TEXTOID, -1, 0); - TupleDescInitEntry(tupledesc, (AttrNumber) 2, "lfc_value", - INT8OID, -1, 0); - - fctx->tupdesc = BlessTupleDesc(tupledesc); - funcctx->user_fctx = fctx; - - /* Return to original context when allocating transient memory */ - MemoryContextSwitchTo(oldcontext); - } - - funcctx = SRF_PERCALL_SETUP(); - - /* Get the saved state */ - fctx = (NeonGetStatsCtx *) funcctx->user_fctx; - - switch (funcctx->call_cntr) - { - case 0: - key = "file_cache_misses"; - if (lfc_ctl) - value = lfc_ctl->misses; - break; - case 1: - key = "file_cache_hits"; - if (lfc_ctl) - value = lfc_ctl->hits; - break; - case 2: - key = "file_cache_used"; - if (lfc_ctl) - value = lfc_ctl->used; - break; - case 3: - key = "file_cache_writes"; - if (lfc_ctl) - value = lfc_ctl->writes; - break; - case 4: - key = "file_cache_size"; - if (lfc_ctl) - value = lfc_ctl->size; - break; - case 5: - key = "file_cache_used_pages"; - if (lfc_ctl) - value = lfc_ctl->used_pages; - break; - case 6: - key = "file_cache_evicted_pages"; - if (lfc_ctl) - value = lfc_ctl->evicted_pages; - break; - case 7: - key = "file_cache_limit"; - if (lfc_ctl) - value = lfc_ctl->limit; - break; - case 8: - key = "file_cache_chunk_size_pages"; - value = lfc_blocks_per_chunk; - break; - case 9: - key = "file_cache_chunks_pinned"; - if (lfc_ctl) - value = lfc_ctl->pinned; - break; - default: - SRF_RETURN_DONE(funcctx); - } - values[0] = PointerGetDatum(cstring_to_text(key)); - nulls[0] = false; - if (lfc_ctl) - { - nulls[1] = false; - values[1] = Int64GetDatum(value); - } - else - nulls[1] = true; - - tuple = heap_form_tuple(fctx->tupdesc, values, nulls); - result = HeapTupleGetDatum(tuple); - SRF_RETURN_NEXT(funcctx, result); + *num_entries = n; + return entries; } @@ -1958,193 +1879,86 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS) * Function returning data from the local file cache * relation node/tablespace/database/blocknum and access_counter */ -PG_FUNCTION_INFO_V1(local_cache_pages); - -/* - * Record structure holding the to be exposed cache data. - */ -typedef struct +LocalCachePagesRec * +lfc_local_cache_pages(size_t *num_entries) { - uint32 pageoffs; - Oid relfilenode; - Oid reltablespace; - Oid reldatabase; - ForkNumber forknum; - BlockNumber blocknum; - uint16 accesscount; -} LocalCachePagesRec; + HASH_SEQ_STATUS status; + FileCacheEntry *entry; + size_t n_pages; + size_t n; + LocalCachePagesRec *result; -/* - * Function context for data persisting over repeated calls. - */ -typedef struct -{ - TupleDesc tupdesc; - LocalCachePagesRec *record; -} LocalCachePagesContext; - - -#define NUM_LOCALCACHE_PAGES_ELEM 7 - -Datum -local_cache_pages(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - Datum result; - MemoryContext oldcontext; - LocalCachePagesContext *fctx; /* User function context. */ - TupleDesc tupledesc; - TupleDesc expected_tupledesc; - HeapTuple tuple; - - if (SRF_IS_FIRSTCALL()) + if (!lfc_ctl) { - HASH_SEQ_STATUS status; - FileCacheEntry *entry; - uint32 n_pages = 0; + *num_entries = 0; + return NULL; + } - funcctx = SRF_FIRSTCALL_INIT(); + LWLockAcquire(lfc_lock, LW_SHARED); + if (!LFC_ENABLED()) + { + LWLockRelease(lfc_lock); + *num_entries = 0; + return NULL; + } - /* Switch context when allocating stuff to be used in later calls */ - oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* Create a user function context for cross-call persistence */ - fctx = (LocalCachePagesContext *) palloc(sizeof(LocalCachePagesContext)); - - /* - * To smoothly support upgrades from version 1.0 of this extension - * transparently handle the (non-)existence of the pinning_backends - * column. We unfortunately have to get the result type for that... - - * we can't use the result type determined by the function definition - * without potentially crashing when somebody uses the old (or even - * wrong) function definition though. - */ - if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE) - neon_log(ERROR, "return type must be a row type"); - - if (expected_tupledesc->natts != NUM_LOCALCACHE_PAGES_ELEM) - neon_log(ERROR, "incorrect number of output arguments"); - - /* Construct a tuple descriptor for the result rows. */ - tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts); - TupleDescInitEntry(tupledesc, (AttrNumber) 1, "pageoffs", - INT8OID, -1, 0); -#if PG_MAJORVERSION_NUM < 16 - TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", - OIDOID, -1, 0); -#else - TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenumber", - OIDOID, -1, 0); -#endif - TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", - OIDOID, -1, 0); - TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", - OIDOID, -1, 0); - TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber", - INT2OID, -1, 0); - TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber", - INT8OID, -1, 0); - TupleDescInitEntry(tupledesc, (AttrNumber) 7, "accesscount", - INT4OID, -1, 0); - - fctx->tupdesc = BlessTupleDesc(tupledesc); - - if (lfc_ctl) + /* Count the pages first */ + n_pages = 0; + hash_seq_init(&status, lfc_hash); + while ((entry = hash_seq_search(&status)) != NULL) + { + /* Skip hole tags */ + if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0) { - LWLockAcquire(lfc_lock, LW_SHARED); + for (int i = 0; i < lfc_blocks_per_chunk; i++) + n_pages += GET_STATE(entry, i) == AVAILABLE; + } + } - if (LFC_ENABLED()) + if (n_pages == 0) + { + LWLockRelease(lfc_lock); + *num_entries = 0; + return NULL; + } + + result = (LocalCachePagesRec *) + MemoryContextAllocHuge(CurrentMemoryContext, + sizeof(LocalCachePagesRec) * n_pages); + + /* + * Scan through all the cache entries, saving the relevant fields + * in the result structure. + */ + n = 0; + hash_seq_init(&status, lfc_hash); + while ((entry = hash_seq_search(&status)) != NULL) + { + for (int i = 0; i < lfc_blocks_per_chunk; i++) + { + if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0) { - hash_seq_init(&status, lfc_hash); - while ((entry = hash_seq_search(&status)) != NULL) + if (GET_STATE(entry, i) == AVAILABLE) { - /* Skip hole tags */ - if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0) - { - for (int i = 0; i < lfc_blocks_per_chunk; i++) - n_pages += GET_STATE(entry, i) == AVAILABLE; - } + result[n].pageoffs = entry->offset * lfc_blocks_per_chunk + i; + result[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)); + result[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key)); + result[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key)); + result[n].forknum = entry->key.forkNum; + result[n].blocknum = entry->key.blockNum + i; + result[n].accesscount = entry->access_count; + n += 1; } } } - fctx->record = (LocalCachePagesRec *) - MemoryContextAllocHuge(CurrentMemoryContext, - sizeof(LocalCachePagesRec) * n_pages); - - /* Set max calls and remember the user function context. */ - funcctx->max_calls = n_pages; - funcctx->user_fctx = fctx; - - /* Return to original context when allocating transient memory */ - MemoryContextSwitchTo(oldcontext); - - if (n_pages != 0) - { - /* - * Scan through all the cache entries, saving the relevant fields - * in the fctx->record structure. - */ - uint32 n = 0; - - hash_seq_init(&status, lfc_hash); - while ((entry = hash_seq_search(&status)) != NULL) - { - for (int i = 0; i < lfc_blocks_per_chunk; i++) - { - if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0) - { - if (GET_STATE(entry, i) == AVAILABLE) - { - fctx->record[n].pageoffs = entry->offset * lfc_blocks_per_chunk + i; - fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)); - fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key)); - fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key)); - fctx->record[n].forknum = entry->key.forkNum; - fctx->record[n].blocknum = entry->key.blockNum + i; - fctx->record[n].accesscount = entry->access_count; - n += 1; - } - } - } - } - Assert(n_pages == n); - } - if (lfc_ctl) - LWLockRelease(lfc_lock); } + Assert(n_pages == n); + LWLockRelease(lfc_lock); - funcctx = SRF_PERCALL_SETUP(); - - /* Get the saved state */ - fctx = funcctx->user_fctx; - - if (funcctx->call_cntr < funcctx->max_calls) - { - uint32 i = funcctx->call_cntr; - Datum values[NUM_LOCALCACHE_PAGES_ELEM]; - bool nulls[NUM_LOCALCACHE_PAGES_ELEM] = { - false, false, false, false, false, false, false - }; - - values[0] = Int64GetDatum((int64) fctx->record[i].pageoffs); - values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); - values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); - values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase); - values[4] = ObjectIdGetDatum(fctx->record[i].forknum); - values[5] = Int64GetDatum((int64) fctx->record[i].blocknum); - values[6] = Int32GetDatum(fctx->record[i].accesscount); - - /* Build and return the tuple. */ - tuple = heap_form_tuple(fctx->tupdesc, values, nulls); - result = HeapTupleGetDatum(tuple); - - SRF_RETURN_NEXT(funcctx, result); - } - else - SRF_RETURN_DONE(funcctx); + *num_entries = n_pages; + return result; } - /* * Internal implementation of the approximate_working_set_size_seconds() * function. diff --git a/pgxn/neon/file_cache.h b/pgxn/neon/file_cache.h index 14e5d4f753..4145327942 100644 --- a/pgxn/neon/file_cache.h +++ b/pgxn/neon/file_cache.h @@ -47,6 +47,26 @@ extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blk extern FileCacheState* lfc_get_state(size_t max_entries); extern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers); +typedef struct LfcStatsEntry +{ + const char *metric_name; + bool isnull; + uint64 value; +} LfcStatsEntry; +extern LfcStatsEntry *lfc_get_stats(size_t *num_entries); + +typedef struct +{ + uint32 pageoffs; + Oid relfilenode; + Oid reltablespace; + Oid reldatabase; + ForkNumber forknum; + BlockNumber blocknum; + uint16 accesscount; +} LocalCachePagesRec; +extern LocalCachePagesRec *lfc_local_cache_pages(size_t *num_entries); + extern int32 lfc_approximate_working_set_size_seconds(time_t duration, bool reset); diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 6cd21cce39..07de696d7b 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -625,11 +625,15 @@ _PG_init(void) ExecutorEnd_hook = neon_ExecutorEnd; } +/* Various functions exposed at SQL level */ + PG_FUNCTION_INFO_V1(pg_cluster_size); PG_FUNCTION_INFO_V1(backpressure_lsns); PG_FUNCTION_INFO_V1(backpressure_throttling_time); PG_FUNCTION_INFO_V1(approximate_working_set_size_seconds); PG_FUNCTION_INFO_V1(approximate_working_set_size); +PG_FUNCTION_INFO_V1(neon_get_lfc_stats); +PG_FUNCTION_INFO_V1(local_cache_pages); Datum pg_cluster_size(PG_FUNCTION_ARGS) @@ -704,6 +708,76 @@ approximate_working_set_size(PG_FUNCTION_ARGS) PG_RETURN_INT32(dc); } +Datum +neon_get_lfc_stats(PG_FUNCTION_ARGS) +{ +#define NUM_NEON_GET_STATS_COLS 2 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + LfcStatsEntry *entries; + size_t num_entries; + + InitMaterializedSRF(fcinfo, 0); + + /* lfc_get_stats() does all the heavy lifting */ + entries = lfc_get_stats(&num_entries); + + /* Convert the LfcStatsEntrys to a result set */ + for (size_t i = 0; i < num_entries; i++) + { + LfcStatsEntry *entry = &entries[i]; + Datum values[NUM_NEON_GET_STATS_COLS]; + bool nulls[NUM_NEON_GET_STATS_COLS]; + + values[0] = CStringGetTextDatum(entry->metric_name); + nulls[0] = false; + values[1] = Int64GetDatum(entry->isnull ? 0 : entry->value); + nulls[1] = entry->isnull; + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + PG_RETURN_VOID(); + +#undef NUM_NEON_GET_STATS_COLS +} + +Datum +local_cache_pages(PG_FUNCTION_ARGS) +{ +#define NUM_LOCALCACHE_PAGES_COLS 7 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + LocalCachePagesRec *entries; + size_t num_entries; + + InitMaterializedSRF(fcinfo, 0); + + /* lfc_local_cache_pages() does all the heavy lifting */ + entries = lfc_local_cache_pages(&num_entries); + + /* Convert the LocalCachePagesRec structs to a result set */ + for (size_t i = 0; i < num_entries; i++) + { + LocalCachePagesRec *entry = &entries[i]; + Datum values[NUM_LOCALCACHE_PAGES_COLS]; + bool nulls[NUM_LOCALCACHE_PAGES_COLS] = { + false, false, false, false, false, false, false + }; + + values[0] = Int64GetDatum((int64) entry->pageoffs); + values[1] = ObjectIdGetDatum(entry->relfilenode); + values[2] = ObjectIdGetDatum(entry->reltablespace); + values[3] = ObjectIdGetDatum(entry->reldatabase); + values[4] = ObjectIdGetDatum(entry->forknum); + values[5] = Int64GetDatum((int64) entry->blocknum); + values[6] = Int32GetDatum(entry->accesscount); + + /* Build and return the tuple. */ + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + PG_RETURN_VOID(); + +#undef NUM_LOCALCACHE_PAGES_COLS +} + /* * Initialization stage 2: make requests for the amount of shared memory we * will need.