From 284d7b4da69420a4fb19bd47326f48ce9506aeee Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 24 Oct 2024 22:14:43 +0300 Subject: [PATCH] Report prewarm progress --- pgxn/neon/file_cache.c | 72 +++++++++++++++++-------- pgxn/neon/neon--1.5--1.6.sql | 8 +++ test_runner/regress/test_lfc_prewarm.py | 14 +++-- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index f3f8450937..5b39a95112 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -43,6 +43,10 @@ #include "utils/dynahash.h" #include "utils/guc.h" +#if PG_VERSION_NUM >= 150000 +#include "access/xlogrecovery.h" +#endif + #include "hll.h" #include "bitmap.h" #include "neon.h" @@ -123,6 +127,10 @@ typedef struct FileCacheControl uint64 writes; /* number of writes issued */ uint64 time_read; /* time spent reading (us) */ uint64 time_write; /* time spent writing (us) */ + uint32 prewarm_total_chunks; + uint32 prewarm_curr_chunk; + uint32 prewarmed_pages; + uint32 skipped_pages; dlist_head lru; /* double linked list for LRU replacement * algorithm */ dlist_head holes; /* double linked list of punched holes */ @@ -313,14 +321,7 @@ lfc_shmem_startup(void) n_chunks + 1, n_chunks + 1, &info, HASH_ELEM | HASH_BLOBS); - lfc_ctl->generation = 0; - lfc_ctl->size = 0; - lfc_ctl->used = 0; - lfc_ctl->hits = 0; - lfc_ctl->misses = 0; - lfc_ctl->writes = 0; - lfc_ctl->time_read = 0; - lfc_ctl->time_write = 0; + memset(lfc_ctl, 0, sizeof *lfc_ctl); dlist_init(&lfc_ctl->lru); dlist_init(&lfc_ctl->holes); @@ -533,19 +534,8 @@ lfc_init(void) BackgroundWorker bgw; memset(&bgw, 0, sizeof(bgw)); bgw.bgw_flags = BGWORKER_SHMEM_ACCESS; - /* - * Prewarming LFC at replica is problematic and doubtful. - * 1. It has not so much sense because replica is skipping all WAL records which target pages is not present - * in shared buffers and invalidate LFC in this case. And as far as size of shared buffers is very small, - * there is really no sense to try to prewarm LFC which will be invalidated in any case. - * 2. Unlike primary,it is not possible to retrieve most recent version of the page. We should follow current apply LSN. - * It significantly complicates prewarming. - * - * BgWorkerStart_RecoveryFinished means we won't ever get started on a hot_standby see - * https://www.postgresql.org/docs/10/static/bgworker.html as it's not - * documented in bgworker.c. - */ - bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + + bgw.bgw_start_time = BgWorkerStart_ConsistentState; snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon"); snprintf(bgw.bgw_function_name, BGW_MAXLEN, "LfcPrewarmMain"); snprintf(bgw.bgw_name, BGW_MAXLEN, "LFC prewarm"); @@ -635,6 +625,7 @@ lfc_init_prewarm(void) /* Do not try to load more than fits in LFC */ max_entries = Min(rc / sizeof(FileCacheStateEntry), lfc_ctl->limit); + lfc_ctl->prewarm_total_chunks = max_entries; elog(LOG, "LFC: read state with %lu entries", (long)(rc / sizeof(FileCacheStateEntry))); for (i = 0; i < max_entries; i++) @@ -748,6 +739,7 @@ lfc_load_pages(void) shard_no = get_shard_number(&fs[chunk_no].key); resp = page_server->receive(shard_no); + lfc_ctl->prewarm_curr_chunk = chunk_no; if (resp->tag != T_NeonGetPageResponse) { @@ -792,6 +784,11 @@ lfc_load_pages(void) { lfc_ctl->used_pages += 1 - ((entry->bitmap[offs_in_chunk >> 5] >> (offs_in_chunk & 31)) & 1); entry->bitmap[offs_in_chunk >> 5] |= 1 << (offs_in_chunk & 31); + lfc_ctl->prewarmed_pages += 1; + } + else + { + lfc_ctl->skipped_pages += 1; } Assert(entry->prewarm_started); entry->prewarm_started = false; @@ -803,6 +800,7 @@ lfc_load_pages(void) else { Assert(!entry || !entry->prewarm_started); + lfc_ctl->skipped_pages += 1; LWLockRelease(lfc_lock); } @@ -812,6 +810,7 @@ lfc_load_pages(void) } } } + lfc_ctl->prewarm_curr_chunk = max_entries; free(fs); elog(LOG, "LFC: complete prewarming: loaded %ld pages", (long)n_received); } @@ -1719,7 +1718,6 @@ approximate_working_set_size(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(save_local_cache_state); - Datum save_local_cache_state(PG_FUNCTION_ARGS) { @@ -1727,3 +1725,33 @@ save_local_cache_state(PG_FUNCTION_ARGS) PG_RETURN_NULL(); } +PG_FUNCTION_INFO_V1(get_prewarm_info); + +Datum +get_prewarm_info(PG_FUNCTION_ARGS) +{ + Datum values[4]; + bool nulls[4]; + TupleDesc tupdesc; + + if (lfc_size_limit != 0) + PG_RETURN_NULL(); + + tupdesc = CreateTemplateTupleDesc(4); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_chunks", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "curr_chunk", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prewarmed_pages", INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "skipped_pages", INT4OID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + MemSet(nulls, 0, sizeof(nulls)); + LWLockAcquire(lfc_lock, LW_SHARED); + values[0] = Int32GetDatum(lfc_ctl->prewarm_total_chunks); + values[1] = Int32GetDatum(lfc_ctl->prewarm_curr_chunk); + values[2] = Int32GetDatum(lfc_ctl->prewarmed_pages); + values[3] = Int32GetDatum(lfc_ctl->skipped_pages); + LWLockRelease(lfc_lock); + + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + diff --git a/pgxn/neon/neon--1.5--1.6.sql b/pgxn/neon/neon--1.5--1.6.sql index 8f0d59ee86..501e4c4e12 100644 --- a/pgxn/neon/neon--1.5--1.6.sql +++ b/pgxn/neon/neon--1.5--1.6.sql @@ -6,3 +6,11 @@ AS 'MODULE_PATHNAME', 'save_local_cache_state' LANGUAGE C STRICT PARALLEL UNSAFE; +CREATE FUNCTION get_prewarm_info(out total_chunks integer, out curr_chunk integer, out prewarmed_pages integer, out skipped_pages integer) +RETURNS record +AS 'MODULE_PATHNAME', 'get_prewarm_info' +LANGUAGE C STRICT +PARALLEL SAFE; + + + diff --git a/test_runner/regress/test_lfc_prewarm.py b/test_runner/regress/test_lfc_prewarm.py index 6b682a27a2..fd8dd5096a 100644 --- a/test_runner/regress/test_lfc_prewarm.py +++ b/test_runner/regress/test_lfc_prewarm.py @@ -20,7 +20,7 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv): ) conn = endpoint.connect() cur = conn.cursor() - cur.execute("create extension neon") + cur.execute("create extension neon version '1.6'") cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))") cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))") @@ -35,10 +35,18 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv): cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'") lfc_used_pages = cur.fetchall()[0][0] log.info(f"Used LFC size: {lfc_used_pages}") - if lfc_used_pages > 10000: - break + cur.execute("select * from get_prewarm_info()") + prewarm_info = cur.fetchall()[0] + log.info(f"Prewrm info: {prewarm_info}") + if prewarm_info[0] > 0: + log.info(f"Prewarm progress: {prewarm_info[1]*100//prewarm_info[0]}%") + if prewarm_info[0] == prewarm_info[1]: + break assert lfc_used_pages > 10000 + assert prewarm_info[0] > 0 and prewarm_info[0] == prewarm_info[1]: cur.execute("select sum(pk) from t") assert cur.fetchall()[0][0] == n_records * (n_records + 1) / 2 + + assert prewarm_info[1] > 0