Do not write pages to the local disk during unlogged build

This commit is contained in:
Konstantin Knizhnik
2024-06-04 09:20:51 +03:00
parent 0c9dee9d06
commit f9416ebf2b
3 changed files with 57 additions and 250 deletions

View File

@@ -295,15 +295,13 @@ extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
/* utils for neon relsize cache */
extern void relsize_hash_init(void);
extern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size);
extern bool set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size);
extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size);
extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);
extern bool start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize);
extern bool is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize);
extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize);
extern bool stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum);
extern void resume_unlogged_build(void);
extern void start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum);
extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum);
extern void stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum);

View File

@@ -1389,21 +1389,6 @@ PageIsEmptyHeapPage(char *buffer)
return memcmp(buffer, empty_page.data, BLCKSZ) == 0;
}
/*
* A page is being evicted from the shared buffer cache. Update the
* last-written LSN of the page, and WAL-log it if needed.
*/
static void
unlogged_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber old_relsize, BlockNumber new_relsize)
{
#if PG_MAJORVERSION_NUM < 16
mdextend(reln, forknum, new_relsize, (char *) zero_buffer.data, true);
#else
mdzeroextend(reln, forknum, old_relsize, new_relsize - old_relsize, true);
#endif
}
static void
#if PG_MAJORVERSION_NUM < 16
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
@@ -1428,13 +1413,28 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
Assert(XLogInsertAllowed());
log_page = true;
}
else if (XLogInsertAllowed() &&
!ShutdownRequestPending &&
(forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM))
else if (XLogInsertAllowed() && !ShutdownRequestPending)
{
log_page = true;
if (forknum == MAIN_FORKNUM)
{
if (!PageIsNew((Page) buffer))
{
if (lsn < FirstNormalUnloggedLSN)
{
start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum);
log_page = true;
}
else if (is_unlogged_build(InfoFromSMgrRel(reln), forknum))
{
log_page = true;
}
}
}
else
{
log_page = true;
}
}
if (log_page)
{
XLogRecPtr recptr;
@@ -1486,21 +1486,19 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
}
else if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM)
else
{
if (start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
mdcreate(reln, forknum, true);
}
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
/*
* Its a bad sign if there is a page with zero LSN in the buffer
* cache in a standby, too. However, PANICing seems like a cure
* worse than the disease, as the damage has likely already been
* done in the primary. So in a standby, make this an assertion,
* and in a release build just LOG the error and soldier on. We
* update the last-written LSN of the page with a conservative
* value in that case, which is the last replayed LSN.
*/
ereport(RecoveryInProgress() ? LOG : PANIC,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
@@ -1509,51 +1507,6 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
lsn = GetXLogReplayRecPtr(NULL); /* in standby mode, soldier on */
}
}
else if (lsn < FirstNormalUnloggedLSN)
{
if (start_unlogged_build(InfoFromSMgrRel(reln),forknum, blocknum, &relsize))
{
mdcreate(reln, forknum, true);
}
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
}
else
{
if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, true);
resume_unlogged_build();
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u with LSN=%X/%X of relation %u/%u/%u.%u is saved locally.",
blocknum,
LSN_FORMAT_ARGS(lsn),
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
}
else
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is already wal-logged at lsn=%X/%X",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum, LSN_FORMAT_ARGS(lsn)
)));
}
/*
* Remember the LSN on this page. When we read the page again, we must
@@ -1571,17 +1524,10 @@ static void
neon_log_newpage_range_callback(Relation rel, ForkNumber forknum)
{
SMgrRelation smgr = RelationGetSmgr(rel);
if (stop_unlogged_build(InfoFromSMgrRel(smgr), forknum))
{
mdclose(smgr, forknum);
/* use isRedo == true, so that we drop it immediately */
mdunlink(InfoBFromSMgrRel(smgr), forknum, true);
}
stop_unlogged_build(InfoFromSMgrRel(smgr), forknum);
}
/*
* neon_init() -- Initialize private state
*/
@@ -2081,7 +2027,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
&reln->smgr_cached_nblocks[forkNum]);
}
else
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0, NULL);
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0);
#ifdef DEBUG_COMPARE_LOCAL
if (IS_LOCAL_REL(reln))
@@ -2141,7 +2087,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
#endif
{
XLogRecPtr lsn;
BlockNumber old_relsize;
BlockNumber n_blocks = 0;
switch (reln->smgr_relpersistence)
@@ -2194,11 +2139,8 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
neon_wallog_page(reln, forkNum, blkno, buffer, false);
if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1, &old_relsize))
{
unlogged_extend(reln, forkNum, old_relsize, blkno + 1);
resume_unlogged_build();
}
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1);
lsn = PageGetLSN((Page) buffer);
neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
RelFileInfoFmt(InfoFromSMgrRel(reln)),
@@ -2232,7 +2174,6 @@ void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync)
{
BlockNumber old_relsize;
BlockNumber remblocks = nblocks;
XLogRecPtr lsn = 0;
@@ -2283,11 +2224,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
if (!XLogInsertAllowed())
return;
if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks, &old_relsize))
{
unlogged_extend(reln, forkNum, old_relsize, blocknum + nblocks);
resume_unlogged_build();
}
set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks);
if (forkNum != MAIN_FORKNUM) /* no need to wal-log zero pages except VM/FSM forks */
{
@@ -2632,27 +2569,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno);
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
if (is_unlogged_build(InfoFromSMgrRel(reln), forkNum, &relsize))
{
if (blkno >= relsize)
{
elog(SmgrTrace, "Get empty local page %d of relation %u/%u/%u.%u",
blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum);
memset(buffer, 0, BLCKSZ);
}
else
{
elog(SmgrTrace, "Read local page %d of relation %u/%u/%u.%u",
blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum);
mdread(reln, forkNum, blkno, buffer);
}
resume_unlogged_build();
}
else
{
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno);
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
}
#ifdef DEBUG_COMPARE_LOCAL
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
{
@@ -2762,22 +2679,11 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *bu
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
#endif
{
BlockNumber relsize;
XLogRecPtr lsn;
switch (reln->smgr_relpersistence)
{
case 0:
if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize))
{
if (blocknum >= relsize)
{
unlogged_extend(reln, forknum, relsize, blocknum+1);
}
mdwrite(reln, forknum, blocknum, buffer, skipFsync);
resume_unlogged_build();
return;
}
/* This is a bit tricky. Check if the relation exists locally */
if (mdexists(reln, forknum))
{
@@ -2982,7 +2888,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks, NULL);
set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks);
/*
* Truncating a relation drops all its buffers from the buffer cache
@@ -3238,7 +3144,7 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
relsize = Max(nbresponse->n_blocks, blkno + 1);
set_cached_relsize(rinfo, forknum, relsize, NULL);
set_cached_relsize(rinfo, forknum, relsize);
SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum);
neon_log(SmgrTrace, "Set length to %d", relsize);

View File

@@ -136,13 +136,10 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size)
/*
* Cache relation size.
* Returns true if it happens during unlogged build.
* In thids case lock isnot released.
*/
bool
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size)
void
set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size)
{
bool unlogged = false;
if (relsize_hash_size > 0)
{
RelTag tag;
@@ -170,11 +167,7 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size,
relsize_ctl->size -= 1;
}
}
if (old_size)
{
*old_size = found ? entry->size : 0;
}
entry->size = new_size;
entry->size = size;
if (!found)
{
entry->unlogged = false;
@@ -209,18 +202,9 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size,
{
dlist_push_tail(&relsize_ctl->lru, &entry->lru_node);
}
else
{
Assert(old_size);
unlogged = true;
}
relsize_ctl->writes += 1;
if (!unlogged)
{
LWLockRelease(relsize_lock);
}
LWLockRelease(relsize_lock);
}
return unlogged;
}
void
@@ -305,28 +289,22 @@ forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum)
* The criteria for starting iunlogged build is writing page without normal LSN.
* It can happen in any backend when page is evicted from shared buffers.
* Or can not happen at all if index fits in shared buffers.
*
* If this function really starts unlogged build, then it returns true, remove entry from LRU list
* (protecting it from eviction until the end of unlogged build) and keeps lock on relsize hash.
* This lock should be later released using resume_unlogged_build(). It allows caller to perform some actions
* in critical section, for example right now it create relation on the disk using mdcreate
*/
bool
start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize)
void
start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum)
{
bool start = false;
if (relsize_hash_size > 0)
{
RelTag tag;
RelSizeEntry *entry;
bool found;
bool start = false;
tag.rinfo = rinfo;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found);
if (!found) {
*relsize = 0;
entry->size = blocknum + 1;
start = true;
@@ -351,7 +329,6 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknu
{
start = !entry->unlogged;
*relsize = entry->size;
if (entry->size <= blocknum)
{
entry->size = blocknum + 1;
@@ -373,18 +350,15 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknu
if (start)
elog(LOG, "Start unlogged build for %u/%u/%u.%u",
RelFileInfoFmt(rinfo), forknum);
LWLockRelease(relsize_lock);
}
return start;
}
/*
* Check if unlogged build is in progress.
* If so, true is returns and lock on relsize cache is hold.
* It should be later released by called using resume_unlogged_build().
* It allows to read page from local file without risk that it is removed by stop_unlogged_build by some other backend.
*/
bool
is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize)
is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
{
bool unlogged = false;
@@ -400,82 +374,23 @@ is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize)
if (entry != NULL)
{
unlogged = entry->unlogged;
*relsize = entry->size;
relsize_ctl->hits += 1;
}
else
{
relsize_ctl->misses += 1;
}
if (!unlogged)
LWLockRelease(relsize_lock);
LWLockRelease(relsize_lock);
}
return unlogged;
}
/*
* Check if releation is extended during unlogged build.
* If it is unlogged, true is returns and lock on relsize cache is hold.
* It should be later released by called using resume_unlogged_build().
* It allows to atomocally extend local file.
* Clear unlogged build if it was set.
*/
bool
is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize)
{
bool unlogged = false;
if (relsize_hash_size > 0)
{
RelTag tag;
RelSizeEntry *entry;
tag.rinfo = rinfo;
tag.forknum = forknum;
LWLockAcquire(relsize_lock, LW_SHARED);
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
if (entry != NULL)
{
if (entry->size <= blocknum)
{
/* Very rare case: it can happen only if relation is thrown away from relcache before unlogged build is detected */
/* Repeat search under exclusive lock */
LWLockRelease(relsize_lock);
LWLockAcquire(relsize_lock, LW_EXCLUSIVE);
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
if (entry == NULL)
{
relsize_ctl->misses += 1;
LWLockRelease(relsize_lock);
return false;
}
}
unlogged = entry->unlogged;
*relsize = entry->size;
if (entry->size <= blocknum)
{
entry->size = blocknum + 1;
}
relsize_ctl->hits += 1;
}
else
{
relsize_ctl->misses += 1;
}
if (!unlogged)
LWLockRelease(relsize_lock);
}
return unlogged;
}
/*
* Check if unlogged build is in progress and if so, clear th flag, return entry to LRU list and return true.
*/
bool
void
stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
{
bool unlogged = false;
if (relsize_hash_size > 0)
{
RelTag tag;
@@ -487,7 +402,7 @@ stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL);
if (entry != NULL)
{
unlogged = entry->unlogged;
bool unlogged = entry->unlogged;
entry->unlogged = false;
relsize_ctl->hits += 1;
if (unlogged)
@@ -504,20 +419,8 @@ stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum)
}
LWLockRelease(relsize_lock);
}
return unlogged;
}
/*
* Release lock obtained by start_unlogged_build or is_unlogged-build functions
*/
void
resume_unlogged_build(void)
{
if (relsize_hash_size > 0)
LWLockRelease(relsize_lock);
}
void
relsize_hash_init(void)
{