Support unlogged build in Neon erxtension

This commit is contained in:
Konstantin Knizhnik
2024-04-20 09:20:10 +03:00
parent acf0a11fea
commit fcd7d7008f
2 changed files with 30 additions and 214 deletions

View File

@@ -10,10 +10,6 @@
* Temporary and unlogged tables are stored locally, by md.c. The functions
* here just pass the calls through to corresponding md.c functions.
*
* Index build operations that use the buffer cache are also handled locally,
* just like unlogged tables. Such operations must be marked by calling
* smgr_start_unlogged_build() and friends.
*
* In order to know what relations are permanent and which ones are not, we
* have added a 'smgr_relpersistence' field to SmgrRelationData, and it is set
* by smgropen() callers, when they have the relcache entry at hand. However,
@@ -100,18 +96,6 @@ const int SmgrTrace = DEBUG5;
page_server_api *page_server;
/* unlogged relation build states */
typedef enum
{
UNLOGGED_BUILD_NOT_IN_PROGRESS = 0,
UNLOGGED_BUILD_PHASE_1,
UNLOGGED_BUILD_PHASE_2,
UNLOGGED_BUILD_NOT_PERMANENT
} UnloggedBuildPhase;
static SMgrRelation unlogged_build_rel = NULL;
static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
@@ -1473,6 +1457,8 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
*/
if (PageIsNew((Page) buffer))
{
static PGAlignedBlock empty_page;
Assert(memcmp(buffer, empty_page.data, BLCKSZ) == 0);
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is all-zeros",
blocknum,
@@ -1489,17 +1475,11 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
}
else if (forknum != FSM_FORKNUM && forknum != VISIBILITYMAP_FORKNUM)
{
/*
* Its a bad sign if there is a page with zero LSN in the buffer
* cache in a standby, too. However, PANICing seems like a cure
* worse than the disease, as the damage has likely already been
* done in the primary. So in a standby, make this an assertion,
* and in a release build just LOG the error and soldier on. We
* update the last-written LSN of the page with a conservative
* value in that case, which is the last replayed LSN.
*/
ereport(RecoveryInProgress() ? LOG : PANIC,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is evicted with zero LSN",
mdcreate(reln, forknum, true);
mdwrite(reln, forknum, blocknum, buffer, true);
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
@@ -1508,6 +1488,17 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
lsn = GetXLogReplayRecPtr(NULL); /* in standby mode, soldier on */
}
}
else if (lsn < FirstNormalUnloggedLSN)
{
mdcreate(reln, forknum, true);
mdwrite(reln, forknum, blocknum, buffer, true);
ereport(SmgrTrace,
(errmsg(NEON_TAG "Page %u of relation %u/%u/%u.%u is saved locally.",
blocknum,
RelFileInfoFmt(InfoFromSMgrRel(reln)),
forknum)));
}
else
{
ereport(SmgrTrace,
@@ -2545,6 +2536,15 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
request_lsns = neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno);
neon_read_at_lsn(InfoFromSMgrRel(reln), forkNum, blkno, request_lsns, buffer);
if (forkNum == MAIN_FORKNUM && PageIsNew((Page)buffer) && mdexists(reln, forkNum))
{
elog(LOG, "Read local page %d of relation %u/%u/%u.%u",
blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum);
if (blkno >= mdnblocks(reln, forkNum))
memset(buffer, 0, BLCKSZ);
else
mdread(reln, forkNum, blkno, buffer);
}
#ifdef DEBUG_COMPARE_LOCAL
if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln))
@@ -2939,150 +2939,6 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
#endif
}
/*
* neon_start_unlogged_build() -- Starting build operation on a rel.
*
* Some indexes are built in two phases, by first populating the table with
* regular inserts, using the shared buffer cache but skipping WAL-logging,
* and WAL-logging the whole relation after it's done. Neon relies on the
* WAL to reconstruct pages, so we cannot use the page server in the
* first phase when the changes are not logged.
*/
static void
neon_start_unlogged_build(SMgrRelation reln)
{
/*
* Currently, there can be only one unlogged relation build operation in
* progress at a time. That's enough for the current usage.
*/
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
neon_log(ERROR, "unlogged relation build is already in progress");
Assert(unlogged_build_rel == NULL);
ereport(SmgrTrace,
(errmsg(NEON_TAG "starting unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
switch (reln->smgr_relpersistence)
{
case 0:
neon_log(ERROR, "cannot call smgr_start_unlogged_build() on rel with unknown persistence");
break;
case RELPERSISTENCE_PERMANENT:
break;
case RELPERSISTENCE_TEMP:
case RELPERSISTENCE_UNLOGGED:
unlogged_build_rel = reln;
unlogged_build_phase = UNLOGGED_BUILD_NOT_PERMANENT;
return;
default:
neon_log(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
}
if (smgrnblocks(reln, MAIN_FORKNUM) != 0)
neon_log(ERROR, "cannot perform unlogged index build, index is not empty ");
unlogged_build_rel = reln;
unlogged_build_phase = UNLOGGED_BUILD_PHASE_1;
/* Make the relation look like it's unlogged */
reln->smgr_relpersistence = RELPERSISTENCE_UNLOGGED;
/*
* Create the local file. In a parallel build, the leader is expected to
* call this first and do it.
*
* FIXME: should we pass isRedo true to create the tablespace dir if it
* doesn't exist? Is it needed?
*/
if (!IsParallelWorker())
mdcreate(reln, MAIN_FORKNUM, false);
}
/*
* neon_finish_unlogged_build_phase_1()
*
* Call this after you have finished populating a relation in unlogged mode,
* before you start WAL-logging it.
*/
static void
neon_finish_unlogged_build_phase_1(SMgrRelation reln)
{
Assert(unlogged_build_rel == reln);
ereport(SmgrTrace,
(errmsg(NEON_TAG "finishing phase 1 of unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromSMgrRel(reln)))));
if (unlogged_build_phase == UNLOGGED_BUILD_NOT_PERMANENT)
return;
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_1);
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
/*
* In a parallel build, (only) the leader process performs the 2nd
* phase.
*/
if (IsParallelWorker())
{
unlogged_build_rel = NULL;
unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
}
else
unlogged_build_phase = UNLOGGED_BUILD_PHASE_2;
}
/*
* neon_end_unlogged_build() -- Finish an unlogged rel build.
*
* Call this after you have finished WAL-logging an relation that was
* first populated without WAL-logging.
*
* This removes the local copy of the rel, since it's now been fully
* WAL-logged and is present in the page server.
*/
static void
neon_end_unlogged_build(SMgrRelation reln)
{
NRelFileInfoBackend rinfob = InfoBFromSMgrRel(reln);
Assert(unlogged_build_rel == reln);
ereport(SmgrTrace,
(errmsg(NEON_TAG "ending unlogged build of relation %u/%u/%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)))));
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_PERMANENT)
{
Assert(unlogged_build_phase == UNLOGGED_BUILD_PHASE_2);
Assert(reln->smgr_relpersistence == RELPERSISTENCE_UNLOGGED);
/* Make the relation look permanent again */
reln->smgr_relpersistence = RELPERSISTENCE_PERMANENT;
/* Remove local copy */
rinfob = InfoBFromSMgrRel(reln);
for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
{
neon_log(SmgrTrace, "forgetting cached relsize for %u/%u/%u.%u",
RelFileInfoFmt(InfoFromNInfoB(rinfob)),
forknum);
forget_cached_relsize(InfoFromNInfoB(rinfob), forknum);
mdclose(reln, forknum);
/* use isRedo == true, so that we drop it immediately */
mdunlink(rinfob, forknum, true);
}
}
unlogged_build_rel = NULL;
unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
}
#define STRPREFIX(str, prefix) (strncmp(str, prefix, strlen(prefix)) == 0)
static int
@@ -3176,40 +3032,6 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
return n_blocks;
}
static void
AtEOXact_neon(XactEvent event, void *arg)
{
switch (event)
{
case XACT_EVENT_ABORT:
case XACT_EVENT_PARALLEL_ABORT:
/*
* Forget about any build we might have had in progress. The local
* file will be unlinked by smgrDoPendingDeletes()
*/
unlogged_build_rel = NULL;
unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
break;
case XACT_EVENT_COMMIT:
case XACT_EVENT_PARALLEL_COMMIT:
case XACT_EVENT_PREPARE:
case XACT_EVENT_PRE_COMMIT:
case XACT_EVENT_PARALLEL_PRE_COMMIT:
case XACT_EVENT_PRE_PREPARE:
if (unlogged_build_phase != UNLOGGED_BUILD_NOT_IN_PROGRESS)
{
unlogged_build_rel = NULL;
unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
(errmsg(NEON_TAG "unlogged index build was not properly finished"))));
}
break;
}
}
static const struct f_smgr neon_smgr =
{
.smgr_init = neon_init,
@@ -3231,10 +3053,6 @@ static const struct f_smgr neon_smgr =
.smgr_truncate = neon_truncate,
.smgr_immedsync = neon_immedsync,
.smgr_start_unlogged_build = neon_start_unlogged_build,
.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
.smgr_end_unlogged_build = neon_end_unlogged_build,
.smgr_read_slru_segment = neon_read_slru_segment,
};
@@ -3252,8 +3070,6 @@ smgr_neon(BackendId backend, NRelFileInfo rinfo)
void
smgr_init_neon(void)
{
RegisterXactCallback(AtEOXact_neon, NULL);
smgr_init_standard();
neon_init();
}

View File

@@ -1,5 +1,5 @@
{
"v16": ["16.3", "b228f20372ebcabfd7946647cb7adbd38bacb14a"],
"v15": ["15.7", "c2c3d40534db97d83dd7e185d1971e707fa2f445"],
"v14": ["14.12", "17e0f5ff4e1905691aa40e1e08f9b79b14c99652"]
"postgres-v16": "ed0c41966dc85ab21c09f787a475bdc5cc2cbf47",
"postgres-v15": "6b1b8c2ba07966f7377a76c73993ca6efdaa699c",
"postgres-v14": "417dce7bb986134213598c299b14e40d0f916345"
}