diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index 94389521d2..08f1eff9e2 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -295,15 +295,13 @@ extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum); /* utils for neon relsize cache */ extern void relsize_hash_init(void); extern bool get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size); -extern bool set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size); +extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size); extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size); extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum); -extern bool start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize); -extern bool is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize); -extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize); -extern bool stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum); -extern void resume_unlogged_build(void); +extern void start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum); +extern bool is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum); +extern void stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum); diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index c318c18bc2..d06eebbf66 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -1389,21 +1389,6 @@ PageIsEmptyHeapPage(char *buffer) return memcmp(buffer, empty_page.data, BLCKSZ) == 0; } -/* - * A page is being evicted from the shared buffer cache. Update the - * last-written LSN of the page, and WAL-log it if needed. - */ -static void -unlogged_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber old_relsize, BlockNumber new_relsize) -{ -#if PG_MAJORVERSION_NUM < 16 - mdextend(reln, forknum, new_relsize, (char *) zero_buffer.data, true); -#else - mdzeroextend(reln, forknum, old_relsize, new_relsize - old_relsize, true); -#endif -} - - static void #if PG_MAJORVERSION_NUM < 16 neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force) @@ -1428,13 +1413,28 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co Assert(XLogInsertAllowed()); log_page = true; } - else if (XLogInsertAllowed() && - !ShutdownRequestPending && - (forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM)) + else if (XLogInsertAllowed() && !ShutdownRequestPending) { - log_page = true; + if (forknum == MAIN_FORKNUM) + { + if (!PageIsNew((Page) buffer)) + { + if (lsn < FirstNormalUnloggedLSN) + { + start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum); + log_page = true; + } + else if (is_unlogged_build(InfoFromSMgrRel(reln), forknum)) + { + log_page = true; + } + } + } + else + { + log_page = true; + } } - if (log_page) { XLogRecPtr recptr; @@ -1486,21 +1486,19 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co RelFileInfoFmt(InfoFromSMgrRel(reln)), forknum))); } - else if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM) + else { - if (start_unlogged_build(InfoFromSMgrRel(reln), forknum, blocknum, &relsize)) - { - mdcreate(reln, forknum, true); - } - if (blocknum >= relsize) - { - unlogged_extend(reln, forknum, relsize, blocknum+1); - } - mdwrite(reln, forknum, blocknum, buffer, true); - resume_unlogged_build(); - - ereport(SmgrTrace, - (errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.", + /* + * Its a bad sign if there is a page with zero LSN in the buffer + * cache in a standby, too. However, PANICing seems like a cure + * worse than the disease, as the damage has likely already been + * done in the primary. So in a standby, make this an assertion, + * and in a release build just LOG the error and soldier on. We + * update the last-written LSN of the page with a conservative + * value in that case, which is the last replayed LSN. + */ + ereport(RecoveryInProgress() ? LOG : PANIC, + (errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is evicted with zero LSN", blocknum, RelFileInfoFmt(InfoFromSMgrRel(reln)), forknum))); @@ -1509,51 +1507,6 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co lsn = GetXLogReplayRecPtr(NULL); /* in standby mode, soldier on */ } } - else if (lsn < FirstNormalUnloggedLSN) - { - if (start_unlogged_build(InfoFromSMgrRel(reln),forknum, blocknum, &relsize)) - { - mdcreate(reln, forknum, true); - } - if (blocknum >= relsize) - { - unlogged_extend(reln, forknum, relsize, blocknum+1); - } - mdwrite(reln, forknum, blocknum, buffer, true); - resume_unlogged_build(); - - ereport(SmgrTrace, - (errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.", - blocknum, - RelFileInfoFmt(InfoFromSMgrRel(reln)), - forknum))); - } - else - { - if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize)) - { - if (blocknum >= relsize) - { - unlogged_extend(reln, forknum, relsize, blocknum+1); - } - mdwrite(reln, forknum, blocknum, buffer, true); - resume_unlogged_build(); - - ereport(SmgrTrace, - (errmsg(NEON_TAG "Page %u with LSN=%X/%X of relation %u/%u/%u.%u is saved locally.", - blocknum, - LSN_FORMAT_ARGS(lsn), - RelFileInfoFmt(InfoFromSMgrRel(reln)), - forknum))); - } - else - ereport(SmgrTrace, - (errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is already wal-logged at lsn=%X/%X", - blocknum, - RelFileInfoFmt(InfoFromSMgrRel(reln)), - forknum, LSN_FORMAT_ARGS(lsn) - ))); - } /* * Remember the LSN on this page. When we read the page again, we must @@ -1571,17 +1524,10 @@ static void neon_log_newpage_range_callback(Relation rel, ForkNumber forknum) { SMgrRelation smgr = RelationGetSmgr(rel); - if (stop_unlogged_build(InfoFromSMgrRel(smgr), forknum)) - { - mdclose(smgr, forknum); - /* use isRedo == true, so that we drop it immediately */ - mdunlink(InfoBFromSMgrRel(smgr), forknum, true); - } + stop_unlogged_build(InfoFromSMgrRel(smgr), forknum); } - - /* * neon_init() -- Initialize private state */ @@ -2081,7 +2027,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) &reln->smgr_cached_nblocks[forkNum]); } else - set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0, NULL); + set_cached_relsize(InfoFromSMgrRel(reln), forkNum, 0); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -2141,7 +2087,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, #endif { XLogRecPtr lsn; - BlockNumber old_relsize; BlockNumber n_blocks = 0; switch (reln->smgr_relpersistence) @@ -2194,11 +2139,8 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, neon_wallog_page(reln, forkNum, blkno, buffer, false); - if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1, &old_relsize)) - { - unlogged_extend(reln, forkNum, old_relsize, blkno + 1); - resume_unlogged_build(); - } + set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blkno + 1); + lsn = PageGetLSN((Page) buffer); neon_log(SmgrTrace, "smgrextend called for %u/%u/%u.%u blk %u, page LSN: %X/%08X", RelFileInfoFmt(InfoFromSMgrRel(reln)), @@ -2232,7 +2174,6 @@ void neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, int nblocks, bool skipFsync) { - BlockNumber old_relsize; BlockNumber remblocks = nblocks; XLogRecPtr lsn = 0; @@ -2283,11 +2224,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, if (!XLogInsertAllowed()) return; - if (set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks, &old_relsize)) - { - unlogged_extend(reln, forkNum, old_relsize, blocknum + nblocks); - resume_unlogged_build(); - } + set_cached_relsize(InfoFromSMgrRel(reln), forkNum, blocknum + nblocks); if (forkNum != MAIN_FORKNUM) /* no need to wal-log zero pages except VM/FSM forks */ { @@ -2632,27 +2569,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno); neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer); - if (is_unlogged_build(InfoFromSMgrRel(reln), forkNum, &relsize)) - { - if (blkno >= relsize) - { - elog(SmgrTrace, "Get empty local page %d of relation %u/%u/%u.%u", - blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum); - memset(buffer, 0, BLCKSZ); - } - else - { - elog(SmgrTrace, "Read local page %d of relation %u/%u/%u.%u", - blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum); - mdread(reln, forkNum, blkno, buffer); - } - resume_unlogged_build(); - } - else - { - request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno); - neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer); - } + #ifdef DEBUG_COMPARE_LOCAL if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) { @@ -2762,22 +2679,11 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *bu neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) #endif { - BlockNumber relsize; XLogRecPtr lsn; switch (reln->smgr_relpersistence) { case 0: - if (is_unlogged_build_extend(InfoFromSMgrRel(reln), forknum, blocknum, &relsize)) - { - if (blocknum >= relsize) - { - unlogged_extend(reln, forknum, relsize, blocknum+1); - } - mdwrite(reln, forknum, blocknum, buffer, skipFsync); - resume_unlogged_build(); - return; - } /* This is a bit tricky. Check if the relation exists locally */ if (mdexists(reln, forknum)) { @@ -2982,7 +2888,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); } - set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks, NULL); + set_cached_relsize(InfoFromSMgrRel(reln), forknum, nblocks); /* * Truncating a relation drops all its buffers from the buffer cache @@ -3238,7 +3144,7 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno, relsize = Max(nbresponse->n_blocks, blkno + 1); - set_cached_relsize(rinfo, forknum, relsize, NULL); + set_cached_relsize(rinfo, forknum, relsize); SetLastWrittenLSNForRelation(end_recptr, rinfo, forknum); neon_log(SmgrTrace, "Set length to %d", relsize); diff --git a/pgxn/neon/relsize_cache.c b/pgxn/neon/relsize_cache.c index 47a382d9b1..1313010864 100644 --- a/pgxn/neon/relsize_cache.c +++ b/pgxn/neon/relsize_cache.c @@ -136,13 +136,10 @@ get_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber *size) /* * Cache relation size. - * Returns true if it happens during unlogged build. - * In thids case lock isnot released. */ -bool -set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, BlockNumber* old_size) +void +set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size) { - bool unlogged = false; if (relsize_hash_size > 0) { RelTag tag; @@ -170,11 +167,7 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, relsize_ctl->size -= 1; } } - if (old_size) - { - *old_size = found ? entry->size : 0; - } - entry->size = new_size; + entry->size = size; if (!found) { entry->unlogged = false; @@ -209,18 +202,9 @@ set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber new_size, { dlist_push_tail(&relsize_ctl->lru, &entry->lru_node); } - else - { - Assert(old_size); - unlogged = true; - } relsize_ctl->writes += 1; - if (!unlogged) - { - LWLockRelease(relsize_lock); - } + LWLockRelease(relsize_lock); } - return unlogged; } void @@ -305,28 +289,22 @@ forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum) * The criteria for starting iunlogged build is writing page without normal LSN. * It can happen in any backend when page is evicted from shared buffers. * Or can not happen at all if index fits in shared buffers. - * - * If this function really starts unlogged build, then it returns true, remove entry from LRU list - * (protecting it from eviction until the end of unlogged build) and keeps lock on relsize hash. - * This lock should be later released using resume_unlogged_build(). It allows caller to perform some actions - * in critical section, for example right now it create relation on the disk using mdcreate */ -bool -start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize) +void +start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum) { - bool start = false; if (relsize_hash_size > 0) { RelTag tag; RelSizeEntry *entry; bool found; + bool start = false; tag.rinfo = rinfo; tag.forknum = forknum; LWLockAcquire(relsize_lock, LW_EXCLUSIVE); entry = hash_search(relsize_hash, &tag, HASH_ENTER, &found); if (!found) { - *relsize = 0; entry->size = blocknum + 1; start = true; @@ -351,7 +329,6 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknu { start = !entry->unlogged; - *relsize = entry->size; if (entry->size <= blocknum) { entry->size = blocknum + 1; @@ -373,18 +350,15 @@ start_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknu if (start) elog(LOG, "Start unlogged build for %u/%u/%u.%u", RelFileInfoFmt(rinfo), forknum); + LWLockRelease(relsize_lock); } - return start; } /* * Check if unlogged build is in progress. - * If so, true is returns and lock on relsize cache is hold. - * It should be later released by called using resume_unlogged_build(). - * It allows to read page from local file without risk that it is removed by stop_unlogged_build by some other backend. */ bool -is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize) +is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum) { bool unlogged = false; @@ -400,82 +374,23 @@ is_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber* relsize) if (entry != NULL) { unlogged = entry->unlogged; - *relsize = entry->size; relsize_ctl->hits += 1; } else { relsize_ctl->misses += 1; } - if (!unlogged) - LWLockRelease(relsize_lock); + LWLockRelease(relsize_lock); } return unlogged; } /* - * Check if releation is extended during unlogged build. - * If it is unlogged, true is returns and lock on relsize cache is hold. - * It should be later released by called using resume_unlogged_build(). - * It allows to atomocally extend local file. + * Clear unlogged build if it was set. */ -bool -is_unlogged_build_extend(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, BlockNumber* relsize) -{ - bool unlogged = false; - - if (relsize_hash_size > 0) - { - RelTag tag; - RelSizeEntry *entry; - - tag.rinfo = rinfo; - tag.forknum = forknum; - - LWLockAcquire(relsize_lock, LW_SHARED); - entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL); - if (entry != NULL) - { - if (entry->size <= blocknum) - { - /* Very rare case: it can happen only if relation is thrown away from relcache before unlogged build is detected */ - /* Repeat search under exclusive lock */ - LWLockRelease(relsize_lock); - LWLockAcquire(relsize_lock, LW_EXCLUSIVE); - entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL); - if (entry == NULL) - { - relsize_ctl->misses += 1; - LWLockRelease(relsize_lock); - return false; - } - } - unlogged = entry->unlogged; - *relsize = entry->size; - if (entry->size <= blocknum) - { - entry->size = blocknum + 1; - } - relsize_ctl->hits += 1; - } - else - { - relsize_ctl->misses += 1; - } - if (!unlogged) - LWLockRelease(relsize_lock); - } - return unlogged; -} - -/* - * Check if unlogged build is in progress and if so, clear th flag, return entry to LRU list and return true. - */ -bool +void stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum) { - bool unlogged = false; - if (relsize_hash_size > 0) { RelTag tag; @@ -487,7 +402,7 @@ stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum) entry = hash_search(relsize_hash, &tag, HASH_FIND, NULL); if (entry != NULL) { - unlogged = entry->unlogged; + bool unlogged = entry->unlogged; entry->unlogged = false; relsize_ctl->hits += 1; if (unlogged) @@ -504,20 +419,8 @@ stop_unlogged_build(NRelFileInfo rinfo, ForkNumber forknum) } LWLockRelease(relsize_lock); } - return unlogged; } -/* - * Release lock obtained by start_unlogged_build or is_unlogged-build functions - */ -void -resume_unlogged_build(void) -{ - if (relsize_hash_size > 0) - LWLockRelease(relsize_lock); -} - - void relsize_hash_init(void) {