mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-16 01:42:55 +00:00
Fix cases where pageserver gets stuck waiting for LSN
The compute might make a request with an LSN that it hasn't even flushed yet.
This commit is contained in:
@@ -158,7 +158,6 @@ pub extern "C" fn bcomm_finish_cache_read(bs: &mut CommunicatorBackendStruct) ->
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Check if the local file cache contians the given block
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn bcomm_cache_contains(
|
||||
@@ -176,11 +175,10 @@ pub extern "C" fn bcomm_cache_contains(
|
||||
relnode: rel_number,
|
||||
forknum: fork_number,
|
||||
},
|
||||
block_number
|
||||
block_number,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
impl<'t> CommunicatorBackendStruct<'t> {
|
||||
/// Send a wakeup to the communicator process
|
||||
fn submit_request(self: &CommunicatorBackendStruct<'t>, request_idx: i32) {
|
||||
|
||||
@@ -239,12 +239,26 @@ impl<'t> CommunicatorWorkerProcessStruct<'t> {
|
||||
// Is it possible that the last-written LSN is ahead of last flush LSN? Generally not, we
|
||||
// shouldn't evict a page from the buffer cache before all its modifications have been
|
||||
// safely flushed. That's the "WAL before data" rule. However, such case does exist at index
|
||||
// building, _bt_blwritepage logs the full page without flushing WAL before smgrextend
|
||||
// building: _bt_blwritepage logs the full page without flushing WAL before smgrextend
|
||||
// (files are fsynced before build ends).
|
||||
//
|
||||
// FIXME: I'm seeing some other cases of this too in the regression tests.
|
||||
// Maybe it's OK? Would be nice to dig a little deeper.
|
||||
// See the old logic in neon_get_request_lsns() C function
|
||||
// XXX: If we make a request LSN greater than the current WAL flush LSN, the pageserver would
|
||||
// block waiting for the WAL arrive, until we flush it and it propagates through the
|
||||
// safekeepers to the pageserver. If there's nothing that forces the WAL to be flushed,
|
||||
// the pageserver would get stuck waiting forever. To avoid that, all the write-
|
||||
// functions in communicator_new.c call XLogSetAsyncXactLSN(). That nudges the WAL writer to
|
||||
// perform the flush relatively soon.
|
||||
//
|
||||
// It would perhaps be nicer to do the WAL flush here, but it's tricky to call back into
|
||||
// Postgres code to do that from here. That's why we rely on communicator_new.c to do the
|
||||
// calls "pre-emptively".
|
||||
//
|
||||
// FIXME: Because of the above, it can still happen that the flush LSN is ahead of
|
||||
// not_modified_since, if the WAL writer hasn't done the flush yet. It would be nice to know
|
||||
// if there are other cases like that that we have mised, but unfortunately we cannot turn
|
||||
// this into an assertion because of that legit case.
|
||||
//
|
||||
// See also the old logic in neon_get_request_lsns() C function
|
||||
if not_modified_since_lsn > request_lsn {
|
||||
tracing::info!(
|
||||
"not_modified_since_lsn {} is ahead of last flushed LSN {}",
|
||||
|
||||
@@ -847,6 +847,9 @@ communicator_new_write_page(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber
|
||||
};
|
||||
NeonIOResult result;
|
||||
|
||||
/* FIXME: see `request_lsns` in main_loop.rs for why this is needed */
|
||||
XLogSetAsyncXactLSN(lsn);
|
||||
|
||||
perform_request(&request, &result);
|
||||
switch (result.tag)
|
||||
{
|
||||
@@ -883,6 +886,9 @@ communicator_new_rel_extend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber
|
||||
};
|
||||
NeonIOResult result;
|
||||
|
||||
/* FIXME: see `request_lsns` in main_loop.rs for why this is needed */
|
||||
XLogSetAsyncXactLSN(lsn);
|
||||
|
||||
perform_request(&request, &result);
|
||||
switch (result.tag)
|
||||
{
|
||||
@@ -918,6 +924,9 @@ communicator_new_rel_zeroextend(NRelFileInfo rinfo, ForkNumber forkNum, BlockNum
|
||||
};
|
||||
NeonIOResult result;
|
||||
|
||||
/* FIXME: see `request_lsns` in main_loop.rs for why this is needed */
|
||||
XLogSetAsyncXactLSN(lsn);
|
||||
|
||||
perform_request(&request, &result);
|
||||
switch (result.tag)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user