From 3eb83a0ebbae56acad54190bc71085c7b424fb13 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 29 Feb 2024 15:54:58 +0200 Subject: [PATCH] Provide appoximation of working set using hyper-log-log algorithm in LFC (#6935) ## Summary of changes Calculate number of unique page accesses at compute. It can be used to estimate working set size and adjust cache size (shared_buffers or local file cache). Approximation is made using HyperLogLog algorithm. It is performed by local file cache and so is available only when local file cache is enabled. This calculation doesn't take in account access to the pages present in shared buffers, but includes pages available in local file cache. This information can be retrieved using approximate_working_set_size(reset bool) function from neon extension. reset parameter can be used to reset statistic and so collect unique accesses for the particular interval. Below is an example of estimating working set size after pgbench -c 10 -S -T 100 -s 10: ``` postgres=# select approximate_working_set_size(false); approximate_working_set_size ------------------------------ 19052 (1 row) postgres=# select pg_table_size('pgbench_accounts')/8192; ?column? ---------- 16402 (1 row) ``` ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/Makefile | 2 +- pgxn/neon/file_cache.c | 36 ++++++++++++++++++++++ pgxn/neon/neon--1.2--1.3.sql | 9 ++++++ pgxn/neon/neon.control | 2 +- test_runner/regress/test_neon_extension.py | 2 +- 5 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 pgxn/neon/neon--1.2--1.3.sql diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile index ef0a79a50c..7ea767ec74 100644 --- a/pgxn/neon/Makefile +++ b/pgxn/neon/Makefile @@ -21,7 +21,7 @@ SHLIB_LINK_INTERNAL = $(libpq) SHLIB_LINK = -lcurl EXTENSION = neon -DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql +DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql PGFILEDESC = "neon - cloud storage for PostgreSQL" EXTRA_CLEAN = \ diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 11d6f6aec5..25275ef31f 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -25,6 +25,8 @@ #include "funcapi.h" #include "miscadmin.h" #include "pagestore_client.h" +#include "common/hashfn.h" +#include "lib/hyperloglog.h" #include "pgstat.h" #include "postmaster/bgworker.h" #include RELFILEINFO_HDR @@ -60,6 +62,7 @@ #define BLOCKS_PER_CHUNK 128 /* 1Mb chunk */ #define MB ((uint64)1024*1024) +#define HYPER_LOG_LOG_BIT_WIDTH 10 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK)) typedef struct FileCacheEntry @@ -84,6 +87,8 @@ typedef struct FileCacheControl uint64 writes; dlist_head lru; /* double linked list for LRU replacement * algorithm */ + hyperLogLogState wss_estimation; /* estimation of wroking set size */ + uint8_t hyperloglog_hashes[(1 << HYPER_LOG_LOG_BIT_WIDTH) + 1]; } FileCacheControl; static HTAB *lfc_hash; @@ -232,6 +237,14 @@ lfc_shmem_startup(void) lfc_ctl->writes = 0; dlist_init(&lfc_ctl->lru); + /* Initialize hyper-log-log structure for estimating working set size */ + initHyperLogLog(&lfc_ctl->wss_estimation, HYPER_LOG_LOG_BIT_WIDTH); + + /* We need hashes in shared memory */ + pfree(lfc_ctl->wss_estimation.hashesArr); + memset(lfc_ctl->hyperloglog_hashes, 0, sizeof lfc_ctl->hyperloglog_hashes); + lfc_ctl->wss_estimation.hashesArr = lfc_ctl->hyperloglog_hashes; + /* Recreate file cache on restart */ fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC); if (fd < 0) @@ -529,6 +542,11 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, } entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL); + + /* Approximate working set */ + tag.blockNum = blkno; + addHyperLogLog(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag))); + if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0) { /* Page is not cached */ @@ -967,3 +985,21 @@ local_cache_pages(PG_FUNCTION_ARGS) else SRF_RETURN_DONE(funcctx); } + +PG_FUNCTION_INFO_V1(approximate_working_set_size); + +Datum +approximate_working_set_size(PG_FUNCTION_ARGS) +{ + int32 dc = -1; + if (lfc_size_limit != 0) + { + bool reset = PG_GETARG_BOOL(0); + LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED); + dc = (int32) estimateHyperLogLog(&lfc_ctl->wss_estimation); + if (reset) + memset(lfc_ctl->hyperloglog_hashes, 0, sizeof lfc_ctl->hyperloglog_hashes); + LWLockRelease(lfc_lock); + } + PG_RETURN_INT32(dc); +} diff --git a/pgxn/neon/neon--1.2--1.3.sql b/pgxn/neon/neon--1.2--1.3.sql new file mode 100644 index 0000000000..9583008777 --- /dev/null +++ b/pgxn/neon/neon--1.2--1.3.sql @@ -0,0 +1,9 @@ +\echo Use "ALTER EXTENSION neon UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION approximate_working_set_size(reset bool) +RETURNS integer +AS 'MODULE_PATHNAME', 'approximate_working_set_size' +LANGUAGE C PARALLEL SAFE; + +GRANT EXECUTE ON FUNCTION approximate_working_set_size(bool) TO pg_monitor; + diff --git a/pgxn/neon/neon.control b/pgxn/neon/neon.control index 599b54b2ff..cee2f336f2 100644 --- a/pgxn/neon/neon.control +++ b/pgxn/neon/neon.control @@ -1,6 +1,6 @@ # neon extension comment = 'cloud storage for PostgreSQL' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/neon' relocatable = true trusted = true diff --git a/test_runner/regress/test_neon_extension.py b/test_runner/regress/test_neon_extension.py index 672f2b495d..1179a3afe9 100644 --- a/test_runner/regress/test_neon_extension.py +++ b/test_runner/regress/test_neon_extension.py @@ -23,7 +23,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder): # IMPORTANT: # If the version has changed, the test should be updated. # Ensure that the default version is also updated in the neon.control file - assert cur.fetchone() == ("1.2",) + assert cur.fetchone() == ("1.3",) cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE") res = cur.fetchall() log.info(res)