moving LastWrittenLSNCache to Neon Extension (#11031)

## Problem

We currently have this code duplicated across different PG versions.
Moving this to an extension would reduce duplication and simplify
maintenance.

## Summary of changes

Moving the LastWrittenLSN code from PG versions to the Neon extension
and linking it with hooks.

Related Postgres PR: https://github.com/neondatabase/postgres/pull/590

Closes: https://github.com/neondatabase/neon/issues/10973

---------

Co-authored-by: Tristan Partin <tristan@neon.tech>
This commit is contained in:
Suhas Thalanki
2025-03-19 13:29:40 -04:00
committed by GitHub
parent 019a29748d
commit 5589efb6de
15 changed files with 586 additions and 61 deletions

View File

@@ -15,7 +15,7 @@ index 7a4b88c..56678af 100644
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
diff --git a/src/hnswbuild.c b/src/hnswbuild.c diff --git a/src/hnswbuild.c b/src/hnswbuild.c
index b667478..fc1897c 100644 index b667478..dc95d89 100644
--- a/src/hnswbuild.c --- a/src/hnswbuild.c
+++ b/src/hnswbuild.c +++ b/src/hnswbuild.c
@@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc) @@ -843,9 +843,17 @@ HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc)
@@ -36,7 +36,7 @@ index b667478..fc1897c 100644
/* Close relations within worker */ /* Close relations within worker */
index_close(indexRel, indexLockmode); index_close(indexRel, indexLockmode);
table_close(heapRel, heapLockmode); table_close(heapRel, heapLockmode);
@@ -1100,12 +1108,38 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo, @@ -1100,12 +1108,39 @@ BuildIndex(Relation heap, Relation index, IndexInfo *indexInfo,
SeedRandom(42); SeedRandom(42);
#endif #endif
@@ -62,10 +62,11 @@ index b667478..fc1897c 100644
+#else +#else
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node; + RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif +#endif
+ + if (set_lwlsn_block_range_hook)
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator, + set_lwlsn_block_range_hook(XactLastRecEnd, rlocator,
+ MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM); + if (set_lwlsn_relation_hook)
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+ } + }
+#endif +#endif
+ } + }

View File

@@ -1,11 +1,5 @@
commit 68f3b3b0d594f08aacc4a082ee210749ed5677eb
Author: Anastasia Lubennikova <anastasia@neon.tech>
Date: Mon Jul 15 12:31:56 2024 +0100
Neon: fix unlogged index build patch
diff --git a/src/ruminsert.c b/src/ruminsert.c diff --git a/src/ruminsert.c b/src/ruminsert.c
index e8b209d..e89bf2a 100644 index 255e616..7a2240f 100644
--- a/src/ruminsert.c --- a/src/ruminsert.c
+++ b/src/ruminsert.c +++ b/src/ruminsert.c
@@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo) @@ -628,6 +628,10 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
@@ -30,7 +24,7 @@ index e8b209d..e89bf2a 100644
/* /*
* Write index to xlog * Write index to xlog
*/ */
@@ -713,6 +721,21 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo) @@ -713,6 +721,22 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo)
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
} }
@@ -41,9 +35,10 @@ index e8b209d..e89bf2a 100644
+#else +#else
+ RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node; + RelFileNode rlocator = RelationGetSmgr(index)->smgr_rnode.node;
+#endif +#endif
+ + if (set_lwlsn_block_range_hook)
+ SetLastWrittenLSNForBlockRange(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index)); + set_lwlsn_block_range_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM, 0, RelationGetNumberOfBlocks(index));
+ SetLastWrittenLSNForRelation(XactLastRecEnd, rlocator, MAIN_FORKNUM); + if (set_lwlsn_relation_hook)
+ set_lwlsn_relation_hook(XactLastRecEnd, rlocator, MAIN_FORKNUM);
+ +
+ smgr_end_unlogged_build(index->rd_smgr); + smgr_end_unlogged_build(index->rd_smgr);
+ } + }

View File

@@ -10,6 +10,7 @@ OBJS = \
libpagestore.o \ libpagestore.o \
logical_replication_monitor.o \ logical_replication_monitor.o \
neon.o \ neon.o \
neon_lwlsncache.o \
neon_pgversioncompat.o \ neon_pgversioncompat.o \
neon_perf_counters.o \ neon_perf_counters.o \
neon_utils.o \ neon_utils.o \

View File

@@ -48,6 +48,7 @@
#include "hll.h" #include "hll.h"
#include "bitmap.h" #include "bitmap.h"
#include "neon.h" #include "neon.h"
#include "neon_lwlsncache.h"
#include "neon_perf_counters.h" #include "neon_perf_counters.h"
#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "LFC: assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0) #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "LFC: assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
@@ -999,7 +1000,9 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
LWLockRelease(lfc_lock); LWLockRelease(lfc_lock);
return false; return false;
} }
lwlsn = GetLastWrittenLSN(rinfo, forknum, blkno);
lwlsn = neon_get_lwlsn(rinfo, forknum, blkno);
if (lwlsn > lsn) if (lwlsn > lsn)
{ {
elog(DEBUG1, "Skip LFC write for %d because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X", elog(DEBUG1, "Skip LFC write for %d because LwLSN=%X/%X is greater than not_nodified_since LSN %X/%X",

View File

@@ -33,6 +33,7 @@
#include "extension_server.h" #include "extension_server.h"
#include "neon.h" #include "neon.h"
#include "neon_lwlsncache.h"
#include "control_plane_connector.h" #include "control_plane_connector.h"
#include "logical_replication_monitor.h" #include "logical_replication_monitor.h"
#include "unstable_extensions.h" #include "unstable_extensions.h"
@@ -437,6 +438,8 @@ _PG_init(void)
pg_init_libpagestore(); pg_init_libpagestore();
pg_init_walproposer(); pg_init_walproposer();
init_lwlsncache();
pagestore_smgr_init(); pagestore_smgr_init();
Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;

508
pgxn/neon/neon_lwlsncache.c Normal file
View File

@@ -0,0 +1,508 @@
#include "postgres.h"
#include "neon_lwlsncache.h"
#include "miscadmin.h"
#include "access/xlog.h"
#include "storage/ipc.h"
#include "storage/shmem.h"
#include "storage/buf_internals.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
typedef struct LastWrittenLsnCacheEntry
{
BufferTag key;
XLogRecPtr lsn;
/* double linked list for LRU replacement algorithm */
dlist_node lru_node;
} LastWrittenLsnCacheEntry;
typedef struct LwLsnCacheCtl {
int lastWrittenLsnCacheSize;
/*
* Maximal last written LSN for pages not present in lastWrittenLsnCache
*/
XLogRecPtr maxLastWrittenLsn;
/*
* Double linked list to implement LRU replacement policy for last written LSN cache.
* Access to this list as well as to last written LSN cache is protected by 'LastWrittenLsnLock'.
*/
dlist_head lastWrittenLsnLRU;
} LwLsnCacheCtl;
/*
* Cache of last written LSN for each relation page.
* Also to provide request LSN for smgrnblocks, smgrexists there is pseudokey=InvalidBlockId which stores LSN of last
* relation metadata update.
* Size of the cache is limited by GUC variable lastWrittenLsnCacheSize ("lsn_cache_size"),
* pages are replaced using LRU algorithm, based on L2-list.
* Access to this cache is protected by 'LastWrittenLsnLock'.
*/
static HTAB *lastWrittenLsnCache;
LwLsnCacheCtl* LwLsnCache;
static int lwlsn_cache_size = (128 * 1024);
static void
lwlc_register_gucs(void)
{
DefineCustomIntVariable("neon.last_written_lsn_cache_size",
"Size of last written LSN cache used by Neon",
NULL,
&lwlsn_cache_size,
(128*1024), 1024, INT_MAX,
PGC_POSTMASTER,
0, /* plain units */
NULL, NULL, NULL);
}
static XLogRecPtr SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
NRelFileInfo rlocator,
ForkNumber forknum,
BlockNumber from,
BlockNumber n_blocks);
/* All the necessary hooks are defined here */
/* These hold the set_lwlsn_* hooks which were installed before ours, if any */
static set_lwlsn_block_range_hook_type prev_set_lwlsn_block_range_hook = NULL;
static set_lwlsn_block_v_hook_type prev_set_lwlsn_block_v_hook = NULL;
static set_lwlsn_block_hook_type prev_set_lwlsn_block_hook = NULL;
static set_max_lwlsn_hook_type prev_set_max_lwlsn_hook = NULL;
static set_lwlsn_relation_hook_type prev_set_lwlsn_relation_hook = NULL;
static set_lwlsn_db_hook_type prev_set_lwlsn_db_hook = NULL;
static shmem_startup_hook_type prev_shmem_startup_hook;
#if PG_VERSION_NUM >= 150000
static shmem_request_hook_type prev_shmem_request_hook;
#endif
static void shmemrequest(void);
static void shmeminit(void);
static void neon_set_max_lwlsn(XLogRecPtr lsn);
void
init_lwlsncache(void)
{
if (!process_shared_preload_libraries_in_progress)
ereport(ERROR, errcode(ERRCODE_INTERNAL_ERROR), errmsg("Loading of shared preload libraries is not in progress. Exiting"));
lwlc_register_gucs();
prev_shmem_startup_hook = shmem_startup_hook;
shmem_startup_hook = shmeminit;
#if PG_VERSION_NUM >= 150000
prev_shmem_request_hook = shmem_request_hook;
shmem_request_hook = shmemrequest;
#else
shmemrequest();
#endif
prev_set_lwlsn_block_range_hook = set_lwlsn_block_range_hook;
set_lwlsn_block_range_hook = neon_set_lwlsn_block_range;
prev_set_lwlsn_block_v_hook = set_lwlsn_block_v_hook;
set_lwlsn_block_v_hook = neon_set_lwlsn_block_v;
prev_set_lwlsn_block_hook = set_lwlsn_block_hook;
set_lwlsn_block_hook = neon_set_lwlsn_block;
prev_set_max_lwlsn_hook = set_max_lwlsn_hook;
set_max_lwlsn_hook = neon_set_max_lwlsn;
prev_set_lwlsn_relation_hook = set_lwlsn_relation_hook;
set_lwlsn_relation_hook = neon_set_lwlsn_relation;
prev_set_lwlsn_db_hook = set_lwlsn_db_hook;
set_lwlsn_db_hook = neon_set_lwlsn_db;
}
static void shmemrequest(void) {
Size requested_size = sizeof(LwLsnCacheCtl);
requested_size += hash_estimate_size(lwlsn_cache_size, sizeof(LastWrittenLsnCacheEntry));
RequestAddinShmemSpace(requested_size);
#if PG_VERSION_NUM >= 150000
if (prev_shmem_request_hook)
prev_shmem_request_hook();
#endif
}
static void shmeminit(void) {
static HASHCTL info;
bool found;
if (lwlsn_cache_size > 0)
{
info.keysize = sizeof(BufferTag);
info.entrysize = sizeof(LastWrittenLsnCacheEntry);
lastWrittenLsnCache = ShmemInitHash("last_written_lsn_cache",
lwlsn_cache_size, lwlsn_cache_size,
&info,
HASH_ELEM | HASH_BLOBS);
LwLsnCache = ShmemInitStruct("neon/LwLsnCacheCtl", sizeof(LwLsnCacheCtl), &found);
// Now set the size in the struct
LwLsnCache->lastWrittenLsnCacheSize = lwlsn_cache_size;
if (found) {
return;
}
}
dlist_init(&LwLsnCache->lastWrittenLsnLRU);
LwLsnCache->maxLastWrittenLsn = GetRedoRecPtr();
if (prev_shmem_startup_hook) {
prev_shmem_startup_hook();
}
}
/*
* neon_get_lwlsn -- Returns maximal LSN of written page.
* It returns an upper bound for the last written LSN of a given page,
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough, iterating through all hash items may be rather expensive.
* But neon_get_lwlsn(InvalidOid) is used only by neon_dbsize which is not performance critical.
*/
XLogRecPtr
neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)
{
XLogRecPtr lsn;
LastWrittenLsnCacheEntry* entry;
Assert(LwLsnCache->lastWrittenLsnCacheSize != 0);
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
/* Maximal last written LSN among all non-cached pages */
lsn = LwLsnCache->maxLastWrittenLsn;
if (NInfoGetRelNumber(rlocator) != InvalidOid)
{
BufferTag key;
Oid spcOid = NInfoGetSpcOid(rlocator);
Oid dbOid = NInfoGetDbOid(rlocator);
Oid relNumber = NInfoGetRelNumber(rlocator);
BufTagInit(key, relNumber, forknum, blkno, spcOid, dbOid);
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
if (entry != NULL)
lsn = entry->lsn;
else
{
LWLockRelease(LastWrittenLsnLock);
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
/*
* In case of statements CREATE TABLE AS SELECT... or INSERT FROM SELECT... we are fetching data from source table
* and storing it in destination table. It cause problems with prefetch last-written-lsn is known for the pages of
* source table (which for example happens after compute restart). In this case we get get global value of
* last-written-lsn which is changed frequently as far as we are writing pages of destination table.
* As a result request-lsn for the prefetch and request-let when this page is actually needed are different
* and we got exported prefetch request. So it actually disarms prefetch.
* To prevent that, we re-insert the page with the latest LSN, so that it's
* less likely the LSN for this page will get evicted from the LwLsnCache
* before the page is read.
*/
lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, blkno, 1);
}
}
else
{
HASH_SEQ_STATUS seq;
/* Find maximum of all cached LSNs */
hash_seq_init(&seq, lastWrittenLsnCache);
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
{
if (entry->lsn > lsn)
lsn = entry->lsn;
}
}
LWLockRelease(LastWrittenLsnLock);
return lsn;
}
static void neon_set_max_lwlsn(XLogRecPtr lsn) {
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
LwLsnCache->maxLastWrittenLsn = lsn;
LWLockRelease(LastWrittenLsnLock);
}
/*
* GetLastWrittenLSN -- Returns maximal LSN of written page.
* It returns an upper bound for the last written LSN of a given page,
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough, iterating through all hash items may be rather expensive.
* But GetLastWrittenLSN(InvalidOid) is used only by neon_dbsize which is not performance critical.
*/
void
neon_get_lwlsn_v(NRelFileInfo relfilenode, ForkNumber forknum,
BlockNumber blkno, int nblocks, XLogRecPtr *lsns)
{
LastWrittenLsnCacheEntry* entry;
XLogRecPtr lsn;
Assert(LwLsnCache->lastWrittenLsnCacheSize != 0);
Assert(nblocks > 0);
Assert(PointerIsValid(lsns));
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
if (NInfoGetRelNumber(relfilenode) != InvalidOid)
{
BufferTag key;
bool missed_keys = false;
Oid spcOid = NInfoGetSpcOid(relfilenode);
Oid dbOid = NInfoGetDbOid(relfilenode);
Oid relNumber = NInfoGetRelNumber(relfilenode);
BufTagInit(key, relNumber, forknum, blkno, spcOid, dbOid);
for (int i = 0; i < nblocks; i++)
{
/* Maximal last written LSN among all non-cached pages */
key.blockNum = blkno + i;
entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);
if (entry != NULL)
{
lsns[i] = entry->lsn;
}
else
{
/* Mark this block's LSN as missing - we'll update the LwLSN for missing blocks in bulk later */
lsns[i] = InvalidXLogRecPtr;
missed_keys = true;
}
}
/*
* If we had any missing LwLSN entries, we add the missing ones now.
* By doing the insertions in one batch, we decrease lock contention.
*/
if (missed_keys)
{
LWLockRelease(LastWrittenLsnLock);
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
lsn = LwLsnCache->maxLastWrittenLsn;
for (int i = 0; i < nblocks; i++)
{
if (lsns[i] == InvalidXLogRecPtr)
{
lsns[i] = lsn;
SetLastWrittenLSNForBlockRangeInternal(lsn, relfilenode, forknum, blkno + i, 1);
}
}
}
}
else
{
HASH_SEQ_STATUS seq;
lsn = LwLsnCache->maxLastWrittenLsn;
/* Find maximum of all cached LSNs */
hash_seq_init(&seq, lastWrittenLsnCache);
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
{
if (entry->lsn > lsn)
lsn = entry->lsn;
}
for (int i = 0; i < nblocks; i++)
lsns[i] = lsn;
}
LWLockRelease(LastWrittenLsnLock);
}
/*
* Guts for SetLastWrittenLSNForBlockRange.
* Caller must ensure LastWrittenLsnLock is held in exclusive mode.
*/
static XLogRecPtr
SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
NRelFileInfo rlocator,
ForkNumber forknum,
BlockNumber from,
BlockNumber n_blocks)
{
if (NInfoGetRelNumber(rlocator) == InvalidOid)
{
if (lsn > LwLsnCache->maxLastWrittenLsn)
LwLsnCache->maxLastWrittenLsn = lsn;
else
lsn = LwLsnCache->maxLastWrittenLsn;
}
else
{
LastWrittenLsnCacheEntry* entry;
BufferTag key;
bool found;
BlockNumber i;
Oid spcOid = NInfoGetSpcOid(rlocator);
Oid dbOid = NInfoGetDbOid(rlocator);
Oid relNumber = NInfoGetRelNumber(rlocator);
BufTagInit(key, relNumber, forknum, from, spcOid, dbOid);
for (i = 0; i < n_blocks; i++)
{
key.blockNum = from + i;
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
if (found)
{
if (lsn > entry->lsn)
entry->lsn = lsn;
else
lsn = entry->lsn;
/* Unlink from LRU list */
dlist_delete(&entry->lru_node);
}
else
{
entry->lsn = lsn;
if (hash_get_num_entries(lastWrittenLsnCache) > LwLsnCache->lastWrittenLsnCacheSize)
{
/* Replace least recently used entry */
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&LwLsnCache->lastWrittenLsnLRU));
/* Adjust max LSN for not cached relations/chunks if needed */
if (victim->lsn > LwLsnCache->maxLastWrittenLsn)
LwLsnCache->maxLastWrittenLsn = victim->lsn;
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
}
}
/* Link to the end of LRU list */
dlist_push_tail(&LwLsnCache->lastWrittenLsnLRU, &entry->lru_node);
}
}
return lsn;
}
/*
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. Keeping last written LSN for each page allows to use old LSN when
* requesting pages of unchanged or appended relations. Also it is critical for
* efficient work of prefetch in case massive update operations (like vacuum or remove).
*
* rlocator.relNumber can be InvalidOid, in this case maxLastWrittenLsn is updated.
* SetLastWrittenLsn with dummy rlocator is used by createdb and dbase_redo functions.
*/
XLogRecPtr
neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)
{
if (lsn == InvalidXLogRecPtr || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)
return lsn;
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, from, n_blocks);
LWLockRelease(LastWrittenLsnLock);
return lsn;
}
/*
* neon_set_lwlsn_block_v -- Set maximal LSN of pages to their respective
* LSNs.
*
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. Keeping last written LSN for each page allows to use old LSN when
* requesting pages of unchanged or appended relations. Also it is critical for
* efficient work of prefetch in case massive update operations (like vacuum or remove).
*
* Note: This is different from SetLastWrittenLSNForBlockRange[Internal], in that this
* specifies per-block LSNs, rather than only a single LSN.
*/
XLogRecPtr
neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
ForkNumber forknum, BlockNumber blockno,
int nblocks)
{
LastWrittenLsnCacheEntry* entry;
BufferTag key;
bool found;
XLogRecPtr max = InvalidXLogRecPtr;
Oid spcOid = NInfoGetSpcOid(relfilenode);
Oid dbOid = NInfoGetDbOid(relfilenode);
Oid relNumber = NInfoGetRelNumber(relfilenode);
if (lsns == NULL || nblocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0 ||
NInfoGetRelNumber(relfilenode) == InvalidOid)
return InvalidXLogRecPtr;
BufTagInit(key, relNumber, forknum, blockno, spcOid, dbOid);
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
for (int i = 0; i < nblocks; i++)
{
XLogRecPtr lsn = lsns[i];
key.blockNum = blockno + i;
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
if (found)
{
if (lsn > entry->lsn)
entry->lsn = lsn;
else
lsn = entry->lsn;
/* Unlink from LRU list */
dlist_delete(&entry->lru_node);
}
else
{
entry->lsn = lsn;
if (hash_get_num_entries(lastWrittenLsnCache) > LwLsnCache->lastWrittenLsnCacheSize)
{
/* Replace least recently used entry */
LastWrittenLsnCacheEntry* victim = dlist_container(LastWrittenLsnCacheEntry, lru_node, dlist_pop_head_node(&LwLsnCache->lastWrittenLsnLRU));
/* Adjust max LSN for not cached relations/chunks if needed */
if (victim->lsn > LwLsnCache->maxLastWrittenLsn)
LwLsnCache->maxLastWrittenLsn = victim->lsn;
hash_search(lastWrittenLsnCache, victim, HASH_REMOVE, NULL);
}
}
/* Link to the end of LRU list */
dlist_push_tail(&LwLsnCache->lastWrittenLsnLRU, &entry->lru_node);
max = Max(max, lsn);
}
LWLockRelease(LastWrittenLsnLock);
return max;
}
/*
* SetLastWrittenLSNForBlock -- Set maximal LSN for block
*/
XLogRecPtr
neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno)
{
return neon_set_lwlsn_block_range(lsn, rlocator, forknum, blkno, 1);
}
/*
* neon_set_lwlsn_relation -- Set maximal LSN for relation metadata
*/
XLogRecPtr
neon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum)
{
return neon_set_lwlsn_block(lsn, rlocator, forknum, REL_METADATA_PSEUDO_BLOCKNO);
}
/*
* neon_set_lwlsn_db -- Set maximal LSN for the whole database
*/
XLogRecPtr
neon_set_lwlsn_db(XLogRecPtr lsn)
{
NRelFileInfo dummyNode = {InvalidOid, InvalidOid, InvalidOid};
return neon_set_lwlsn_block(lsn, dummyNode, MAIN_FORKNUM, 0);
}

View File

@@ -0,0 +1,17 @@
#ifndef NEON_LWLSNCACHE_H
#define NEON_LWLSNCACHE_H
#include "neon_pgversioncompat.h"
void init_lwlsncache(void);
/* Hooks */
XLogRecPtr neon_get_lwlsn(NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno);
void neon_get_lwlsn_v(NRelFileInfo relfilenode, ForkNumber forknum, BlockNumber blkno, int nblocks, XLogRecPtr *lsns);
XLogRecPtr neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks);
XLogRecPtr neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode, ForkNumber forknum, BlockNumber blockno, int nblocks);
XLogRecPtr neon_set_lwlsn_block(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum, BlockNumber blkno);
XLogRecPtr neon_set_lwlsn_relation(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber forknum);
XLogRecPtr neon_set_lwlsn_db(XLogRecPtr lsn);
#endif /* NEON_LWLSNCACHE_H */

View File

@@ -76,6 +76,14 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
#define BufTagGetRelNumber(tagp) ((tagp)->rnode.relNode) #define BufTagGetRelNumber(tagp) ((tagp)->rnode.relNode)
#define BufTagInit(tag, relNumber, forknum, blkno, spcOid, dbOid) \
do { \
RelFileNode rnode = { .spcNode = spcOid, .dbNode = dbOid, .relNode = relNumber}; \
(tag).forkNum = forknum; \
(tag).blockNum = blkno; \
(tag).rnode = rnode; \
} while (false)
#define InvalidRelFileNumber InvalidOid #define InvalidRelFileNumber InvalidOid
#define SMgrRelGetRelInfo(reln) \ #define SMgrRelGetRelInfo(reln) \
@@ -125,6 +133,15 @@ InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
.relNumber = (tag).relNumber, \ .relNumber = (tag).relNumber, \
}) })
#define BufTagInit(tag, relNumber, forknum, blkno, spcOid, dbOid) \
do { \
(tag).forkNum = forknum; \
(tag).blockNum = blkno; \
(tag).spcOid = spcOid; \
(tag).dbOid = dbOid; \
(tag).relNumber = relNumber; \
} while (false)
#define SMgrRelGetRelInfo(reln) \ #define SMgrRelGetRelInfo(reln) \
((reln)->smgr_rlocator) ((reln)->smgr_rlocator)

View File

@@ -69,6 +69,7 @@
#include "bitmap.h" #include "bitmap.h"
#include "neon.h" #include "neon.h"
#include "neon_lwlsncache.h"
#include "neon_perf_counters.h" #include "neon_perf_counters.h"
#include "pagestore_client.h" #include "pagestore_client.h"
@@ -340,11 +341,6 @@ static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_
static bool prefetch_wait_for(uint64 ring_index); static bool prefetch_wait_for(uint64 ring_index);
static void prefetch_cleanup_trailing_unused(void); static void prefetch_cleanup_trailing_unused(void);
static inline void prefetch_set_unused(uint64 ring_index); static inline void prefetch_set_unused(uint64 ring_index);
#if PG_MAJORVERSION_NUM < 17
static void
GetLastWrittenLSNv(NRelFileInfo relfilenode, ForkNumber forknum,
BlockNumber blkno, int nblocks, XLogRecPtr *lsns);
#endif
static void static void
neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum,
@@ -864,7 +860,7 @@ prefetch_on_ps_disconnect(void)
/* /*
* We can have gone into retry due to network error, so update stats with * We can have gone into retry due to network error, so update stats with
* the latest available * the latest available
*/ */
MyNeonCounters->pageserver_open_requests = MyNeonCounters->pageserver_open_requests =
MyPState->n_requests_inflight; MyPState->n_requests_inflight;
@@ -1105,7 +1101,7 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
Retry: Retry:
/* /*
* We can have gone into retry due to network error, so update stats with * We can have gone into retry due to network error, so update stats with
* the latest available * the latest available
*/ */
MyNeonCounters->pageserver_open_requests = MyNeonCounters->pageserver_open_requests =
MyPState->ring_unused - MyPState->ring_receive; MyPState->ring_unused - MyPState->ring_receive;
@@ -1997,7 +1993,7 @@ neon_wallog_pagev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
if (batch_size >= BLOCK_BATCH_SIZE) if (batch_size >= BLOCK_BATCH_SIZE)
{ {
SetLastWrittenLSNForBlockv(lsns, InfoFromSMgrRel(reln), forknum, neon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,
batch_blockno, batch_blockno,
batch_size); batch_size);
batch_blockno += batch_size; batch_blockno += batch_size;
@@ -2007,7 +2003,7 @@ neon_wallog_pagev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
if (batch_size != 0) if (batch_size != 0)
{ {
SetLastWrittenLSNForBlockv(lsns, InfoFromSMgrRel(reln), forknum, neon_set_lwlsn_block_v(lsns, InfoFromSMgrRel(reln), forknum,
batch_blockno, batch_blockno,
batch_size); batch_size);
} }
@@ -2134,7 +2130,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
* Remember the LSN on this page. When we read the page again, we must * Remember the LSN on this page. When we read the page again, we must
* read the same or newer version of it. * read the same or newer version of it.
*/ */
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forknum, blocknum); neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forknum, blocknum);
} }
/* /*
@@ -2217,19 +2213,6 @@ nm_adjust_lsn(XLogRecPtr lsn)
} }
/*
* Since PG17 we use vetorized version,
* so add compatibility function for older versions
*/
#if PG_MAJORVERSION_NUM < 17
static void
GetLastWrittenLSNv(NRelFileInfo relfilenode, ForkNumber forknum,
BlockNumber blkno, int nblocks, XLogRecPtr *lsns)
{
lsns[0] = GetLastWrittenLSN(relfilenode, forknum, blkno);
}
#endif
/* /*
* Return LSN for requesting pages and number of blocks from page server * Return LSN for requesting pages and number of blocks from page server
*/ */
@@ -2241,7 +2224,7 @@ neon_get_request_lsns(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
Assert(nblocks <= PG_IOV_MAX); Assert(nblocks <= PG_IOV_MAX);
GetLastWrittenLSNv(rinfo, forknum, blkno, (int) nblocks, last_written_lsns); neon_get_lwlsn_v(rinfo, forknum, blkno, (int) nblocks, last_written_lsns);
for (int i = 0; i < nblocks; i++) for (int i = 0; i < nblocks; i++)
{ {
@@ -2844,9 +2827,9 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
if (lsn == InvalidXLogRecPtr) if (lsn == InvalidXLogRecPtr)
{ {
lsn = GetXLogInsertRecPtr(); lsn = GetXLogInsertRecPtr();
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum, blkno); neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum, blkno);
} }
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum); neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
} }
#if PG_MAJORVERSION_NUM >= 16 #if PG_MAJORVERSION_NUM >= 16
@@ -2941,7 +2924,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data); lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum, neon_set_lwlsn_block(lsn, InfoFromSMgrRel(reln), forkNum,
blocknum + i); blocknum + i);
} }
@@ -2951,7 +2934,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
Assert(lsn != 0); Assert(lsn != 0);
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum); neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forkNum);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum); set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
} }
#endif #endif
@@ -4052,7 +4035,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, Blo
* for the extended pages, so there's no harm in leaving behind obsolete * for the extended pages, so there's no harm in leaving behind obsolete
* entries for the truncated chunks. * entries for the truncated chunks.
*/ */
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forknum); neon_set_lwlsn_relation(lsn, InfoFromSMgrRel(reln), forknum);
#ifdef DEBUG_COMPARE_LOCAL #ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln)) if (IS_LOCAL_REL(reln))
@@ -4510,7 +4493,7 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
if (relsize < blkno + 1) if (relsize < blkno + 1)
{ {
update_cached_relsize(rinfo, forknum, blkno + 1); update_cached_relsize(rinfo, forknum, blkno + 1);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum); neon_set_lwlsn_relation(end_recptr, rinfo, forknum);
} }
} }
else else
@@ -4543,7 +4526,7 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
relsize = Max(nbresponse->n_blocks, blkno + 1); relsize = Max(nbresponse->n_blocks, blkno + 1);
set_cached_relsize(rinfo, forknum, relsize); set_cached_relsize(rinfo, forknum, relsize);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum); neon_set_lwlsn_relation(end_recptr, rinfo, forknum);
neon_log(SmgrTrace, "Set length to %d", relsize); neon_log(SmgrTrace, "Set length to %d", relsize);
} }
@@ -4674,7 +4657,7 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
*/ */
if (no_redo_needed) if (no_redo_needed)
{ {
SetLastWrittenLSNForBlock(end_recptr, rinfo, forknum, blkno); neon_set_lwlsn_block(end_recptr, rinfo, forknum, blkno);
/* /*
* Redo changes if page exists in LFC. * Redo changes if page exists in LFC.
* We should perform this check after assigning LwLSN to prevent * We should perform this check after assigning LwLSN to prevent

View File

@@ -286,9 +286,6 @@ WalRedoMain(int argc, char *argv[])
max_wal_senders = 0; max_wal_senders = 0;
InitializeMaxBackends(); InitializeMaxBackends();
/* Disable lastWrittenLsnCache */
lastWrittenLsnCacheSize = 0;
#if PG_VERSION_NUM >= 150000 #if PG_VERSION_NUM >= 150000
process_shmem_requests(); process_shmem_requests();
InitializeShmemGUCs(); InitializeShmemGUCs();

View File

@@ -1,18 +1,18 @@
{ {
"v17": [ "v17": [
"17.4", "17.4",
"e5e87b9f52d0eaeb83f3e2517bb9727aac37729b" "22533c63fc42cdc1dbe138650ba1eca10a70c5d7"
], ],
"v16": [ "v16": [
"16.8", "16.8",
"512856aaa8bedbaa8f06811449518dcb0c2e5d8f" "473f68210d52ff8508f71c15b0c77c01296f4ace"
], ],
"v15": [ "v15": [
"15.12", "15.12",
"ee794ba767eef9b10260ef67d3a58084f1dabd6f" "6cea02e23caa950d5f06932491a91b6af8f54360"
], ],
"v14": [ "v14": [
"14.17", "14.17",
"7b7592e74059f795b64f06860cea97673418f35e" "35bc1b0cba55680e3b37abce4e67a46bb15f3315"
] ]
} }