create the test case to reproduce the issue

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-11-18 15:57:53 -05:00
parent ada84400b7
commit 45f6111ad9
3 changed files with 45 additions and 18 deletions

View File

@@ -10,6 +10,7 @@ use remote_storage::GenericRemoteStorage;
use remote_storage::RemotePath;
use remote_storage::TimeoutOrCancel;
use remote_storage::MAX_KEYS_PER_DELETE;
use utils::pausable_failpoint;
use std::time::Duration;
use tokio_util::sync::CancellationToken;
use tracing::info;
@@ -90,6 +91,7 @@ impl Deleter {
/// Block until everything in accumulator has been executed
async fn flush(&mut self) -> Result<(), DeletionQueueError> {
while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {
pausable_failpoint!("deletion-queue-before-execute-pause");
match self.remote_delete().await {
Ok(()) => {
// Note: we assume that the remote storage layer returns Ok(()) if some

View File

@@ -2608,7 +2608,9 @@ impl Timeline {
// See https://github.com/neondatabase/neon/issues/5878
//
// NB: generation numbers naturally protect against this because they disambiguate
// (1) and (4)
// (1) and (4) ONLY IF generation number gets bumped. There are some cases where
// we load a tenant without bumping the generation number (i.e., detach ancestor
// and timeline offload/un-offload). In those cases, we need to rely on the barrier.
self.remote_client.schedule_barrier()?;
// Tenant::create_timeline will wait for these uploads to happen before returning, or
// on retry.