Fix unlogged build

This commit is contained in:
Konstantin Knizhnik
2024-06-01 10:27:59 +03:00
parent 520101170f
commit 947f8c59dd
3 changed files with 186 additions and 45 deletions

View File

@@ -295,12 +295,13 @@ extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
/* utils for neon relsize cache */
extern void relsize_hash_init(void);
extern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size);
extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern bool set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size);
extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
extern bool start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum);
extern bool start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize);
extern bool is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize);
extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize);
extern bool stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum);
extern void resume_unlogged_build(void);

View File

@@ -97,6 +97,8 @@ const int SmgrTrace = DEBUG5;
page_server_api *page_server;
const PGAlignedBlock zero_buffer;
static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
@@ -1391,6 +1393,17 @@ PageIsEmptyHeapPage(char *buffer)
* A page is being evicted from the shared buffer cache. Update the
* last-written LSN of the page, and WAL-log it if needed.
*/
static void
unlogged_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber old_relsize, BlockNumber new_relsize)
{
#if PG_MAJORVERSION_NUM < 16
mdextend(reln, forknum, new_relsize, (char *) zero_buffer.data, true);
#else
mdzeroextend(reln, forknum, old_relsize, new_relsize - old_relsize, true);
#endif
}
static void
#if PG_MAJORVERSION_NUM < 16
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
@@ -1398,6 +1411,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const char *buffer, bool force)
#endif
{
BlockNumber relsize;
XLogRecPtr lsn = PageGetLSN((Page) buffer);
bool log_page;
@@ -1474,12 +1488,16 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
}
else if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM)
{
if (start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum+1))
if (start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
mdcreate(reln, forknum, true);
resume_unlogged_build();
}
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
@@ -1493,12 +1511,16 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
}
else if (lsn < FirstNormalUnloggedLSN)
{
if (start_unlogged_build(InfoFromSMgrRel(reln),forknum, blocknum+1))
if (start_unlogged_build(InfoFromSMgrRel(reln),forknum, blocknum, &relsize))
{
mdcreate(reln, forknum, true);
resume_unlogged_build();
}
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
@@ -1508,10 +1530,15 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
}
else
{
if (is_unlogged_build(InfoFromSMgrRel(reln), forknum))
if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
resume_unlogged_build();
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u with LSN=%X/%X of relation %u/%u/%u.%u is saved locally.",
blocknum,
@@ -2054,7 +2081,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
&reln->smgr_cached_nblocks[forkNum]);
}
else
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0, NULL);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2114,6 +2141,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
#endif
{
XLogRecPtr lsn;
BlockNumber old_relsize;
BlockNumber n_blocks = 0;
switch (reln->smgr_relpersistence)
@@ -2165,8 +2193,12 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
neon_wallog_page(reln, forkNum, blkno, buffer, false);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1, &old_relsize))
{
unlogged_extend(reln, forkNum, old_relsize, blkno + 1);
resume_unlogged_build();
}
lsn = PageGetLSN((Page) buffer);
neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
@@ -2200,8 +2232,8 @@ void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync)
{
const PGAlignedBlock buffer = {0};
int remblocks = nblocks;
BlockNumber old_relsize;
BlockNumber remblocks = nblocks;
XLogRecPtr lsn = 0;
switch (reln->smgr_relpersistence)
@@ -2251,11 +2283,29 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
if (!XLogInsertAllowed())
return;
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks);
if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks, &old_relsize))
{
unlogged_extend(reln, forkNum, old_relsize, blocknum + nblocks);
resume_unlogged_build();
}
if (forkNum != MAIN_FORKNUM) /* no need to wal-log zero pages except VM/FSM forks */
{
/* ensure we have enough xlog buffers to log max-sized records */
XLogEnsureRecordSpace(Min(remblocks, (XLR_MAX_BLOCK_ID - 1)), 0);
}
else
{
/*
* smgr_extend is often called with an all-zeroes page, so
* lsn==InvalidXLogRecPtr. An smgr_write() call will come for the buffer
* later, after it has been initialized with the real page contents, and
* it is eventually evicted from the buffer cache. But we need a valid LSN
* to the relation metadata update now.
*/
lsn = GetXLogInsertRecPtr();
}
#if 0
/* ensure we have enough xlog buffers to log max-sized records */
XLogEnsureRecordSpace(Min(remblocks, (XLR_MAX_BLOCK_ID - 1)), 0);
/*
* Iterate over all the pages. They are collected into batches of
* XLR_MAX_BLOCK_ID pages, and a single WAL-record is written for each
@@ -2265,17 +2315,19 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
{
int count = Min(remblocks, XLR_MAX_BLOCK_ID);
XLogBeginInsert();
if (forkNum != MAIN_FORKNUM) /* no need to wal-log zero pages except VM/FSM forks */
{
XLogBeginInsert();
for (int i = 0; i < count; i++)
XLogRegisterBlock(i, &InfoFromSMgrRel(reln), forkNum, blocknum + i,
(char *) buffer.data, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
lsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);
for (int i = 0; i < count; i++)
XLogRegisterBlock(i, &InfoFromSMgrRel(reln), forkNum, blocknum + i,
(char *) zero_buffer.data, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
lsn = XLogInsert(RM_XLOG_ID, XLOG_FPI);
}
for (int i = 0; i < count; i++)
{
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, buffer.data);
lfc_write(InfoFromSMgrRel(reln), forkNum, blocknum + i, zero_buffer.data);
SetLastWrittenLSNForBlock(lsn, InfoFromSMgrRel(reln), forkNum,
blocknum + i);
}
@@ -2287,8 +2339,6 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
Assert(lsn != 0);
SetLastWrittenLSNForRelation(lsn, InfoFromSMgrRel(reln), forkNum);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum);
#endif
}
#endif
@@ -2555,6 +2605,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
#endif
{
neon_request_lsns request_lsns;
BlockNumber relsize;
switch (reln->smgr_relpersistence)
{
@@ -2581,9 +2632,9 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno);
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
if (is_unlogged_build(InfoFromSMgrRel(reln), forkNum))
if (is_unlogged_build(InfoFromSMgrRel(reln), forkNum, &relsize))
{
if (blkno >= mdnblocks(reln, forkNum))
if (blkno >= relsize)
{
elog(SmgrTrace, "Get empty local page %d of relation %u/%u/%u.%u",
blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum);
@@ -2711,11 +2762,22 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *bu
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
#endif
{
BlockNumber relsize;
XLogRecPtr lsn;
switch (reln->smgr_relpersistence)
{
case 0:
if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, skipFsync);
resume_unlogged_build();
return;
}
/* This is a bit tricky. Check if the relation exists locally */
if (mdexists(reln, forknum))
{
@@ -2920,7 +2982,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks, NULL);
/*
* Truncating a relation drops all its buffers from the buffer cache
@@ -3176,7 +3238,7 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
relsize = Max(nbresponse->n_blocks, blkno + 1);
set_cached_relsize(rinfo, forknum, relsize);
set_cached_relsize(rinfo, forknum, relsize, NULL);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
neon_log(SmgrTrace, "Set length to %d", relsize);

View File

@@ -134,9 +134,15 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
return found;
}
void
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
/*
* Cache relation size.
* Returns true if it happens during unlogged build.
* In thids case lock isnot released.
*/
bool
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size)
{
bool unlogged = false;
if (relsize_hash_size > 0)
{
RelTag tag;
@@ -164,7 +170,11 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
relsize_ctl->size -= 1;
}
}
entry->size = size;
if (old_size)
{
*old_size = found ? entry->size : 0;
}
entry->size = new_size;
if (!found)
{
entry->unlogged = false;
@@ -190,17 +200,27 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
relsize_ctl->size += 1;
}
}
else if (!entry->unlogged) /* entries of relation involved in unlogged build are pinned */
else if (entry->unlogged) /* entries of relation involved in unlogged build are pinned */
{
dlist_delete(&entry->lru_node);
}
if (!entry->unlogged) /* entries of relation involved in unlogged build are pinned */
{
dlist_push_tail(&relsize_ctl->lru, &entry->lru_node);
}
else
{
Assert(old_size);
unlogged = true;
}
relsize_ctl->writes += 1;
LWLockRelease(relsize_lock);
if (!unlogged)
{
LWLockRelease(relsize_lock);
}
}
return unlogged;
}
void
@@ -292,7 +312,7 @@ forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
* in critical section, for example right now it create relation on the disk using mdcreate
*/
bool
start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize)
{
bool start = false;
if (relsize_hash_size > 0)
@@ -306,7 +326,8 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);
if (!found) {
entry->size = size;
*relsize = 0;
entry->size = blocknum + 1;
start = true;
if (relsize_ctl->size+1 == relsize_hash_size)
@@ -330,8 +351,11 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
{
start = !entry->unlogged;
if (entry->size < size)
entry->size = size;
*relsize = entry->size;
if (entry->size <= blocknum)
{
entry->size = blocknum + 1;
}
if (start)
{
@@ -346,12 +370,9 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
* We are not putting entry in LRU least to prevent it fro eviction until the end of unlogged build
*/
if (!start)
LWLockRelease(relsize_lock);
else
if (start)
elog(LOG, "Start unlogged build for %u/%u/%u.%u",
RelFileInfoFmt(rinfo), forknum);
}
return start;
}
@@ -363,7 +384,7 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
* It allows to read page from local file without risk that it is removed by stop_unlogged_build by some other backend.
*/
bool
is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize)
{
bool unlogged = false;
@@ -379,6 +400,62 @@ is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
if (entry != NULL)
{
unlogged = entry->unlogged;
*relsize = entry->size;
relsize_ctl->hits += 1;
}
else
{
relsize_ctl->misses += 1;
}
if (!unlogged)
LWLockRelease(relsize_lock);
}
return unlogged;
}
/*
* Check if releation is extended during unlogged build.
* If it is unlogged, true is returns and lock on relsize cache is hold.
* It should be later released by called using resume_unlogged_build().
* It allows to atomocally extend local file.
*/
bool
is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize)
{
bool unlogged = false;
if (relsize_hash_size > 0)
{
RelTag tag;
RelSizeEntry *entry;
tag.rinfo = rinfo;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_SHARED);
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
if (entry != NULL)
{
if (entry->size <= blocknum)
{
/* Very rare case: it can happen only if relation is thrown away from relcache before unlogged build is detected */
/* Repeat search under exclusive lock */
LWLockRelease(relsize_lock);
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
if (entry == NULL)
{
relsize_ctl->misses += 1;
LWLockRelease(relsize_lock);
return false;
}
}
unlogged = entry->unlogged;
*relsize = entry->size;
if (entry->size <= blocknum)
{
entry->size = blocknum + 1;
}
relsize_ctl->hits += 1;
}
else
@@ -436,7 +513,8 @@ stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
void
resume_unlogged_build(void)
{
LWLockRelease(relsize_lock);
if (relsize_hash_size > 0)
LWLockRelease(relsize_lock);
}