diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs index 37fa300467..e74c8ecf5a 100644 --- a/pageserver/src/deletion_queue.rs +++ b/pageserver/src/deletion_queue.rs @@ -1144,18 +1144,24 @@ pub(crate) mod mock { rx: tokio::sync::mpsc::UnboundedReceiver, executor_rx: tokio::sync::mpsc::Receiver, cancel: CancellationToken, + executed: Arc, } impl ConsumerState { - async fn consume(&mut self, remote_storage: &GenericRemoteStorage) -> usize { - let mut executed = 0; - + async fn consume(&mut self, remote_storage: &GenericRemoteStorage) { info!("Executing all pending deletions"); // Transform all executor messages to generic frontend messages - while let Ok(msg) = self.executor_rx.try_recv() { + loop { + use either::Either; + let msg = tokio::select! { + left = self.executor_rx.recv() => Either::Left(left), + right = self.rx.recv() => Either::Right(right), + }; match msg { - DeleterMessage::Delete(objects) => { + Either::Left(None) => break, + Either::Right(None) => break, + Either::Left(Some(DeleterMessage::Delete(objects))) => { for path in objects { match remote_storage.delete(&path, &self.cancel).await { Ok(_) => { @@ -1165,18 +1171,13 @@ pub(crate) mod mock { error!("Failed to delete {path}, leaking object! ({e})"); } } - executed += 1; + self.executed.fetch_add(1, Ordering::Relaxed); } } - DeleterMessage::Flush(flush_op) => { + Either::Left(Some(DeleterMessage::Flush(flush_op))) => { flush_op.notify(); } - } - } - - while let Ok(msg) = self.rx.try_recv() { - match msg { - ListWriterQueueMessage::Delete(op) => { + Either::Right(Some(ListWriterQueueMessage::Delete(op))) => { let mut objects = op.objects; for (layer, meta) in op.layers { objects.push(remote_layer_path( @@ -1198,33 +1199,27 @@ pub(crate) mod mock { error!("Failed to delete {path}, leaking object! ({e})"); } } - executed += 1; + self.executed.fetch_add(1, Ordering::Relaxed); } } - ListWriterQueueMessage::Flush(op) => { + Either::Right(Some(ListWriterQueueMessage::Flush(op))) => { op.notify(); } - ListWriterQueueMessage::FlushExecute(op) => { + Either::Right(Some(ListWriterQueueMessage::FlushExecute(op))) => { // We have already executed all prior deletions because mock does them inline op.notify(); } - ListWriterQueueMessage::Recover(_) => { + Either::Right(Some(ListWriterQueueMessage::Recover(_))) => { // no-op in mock } } - info!("All pending deletions have been executed"); } - - executed } } pub struct MockDeletionQueue { tx: tokio::sync::mpsc::UnboundedSender, executor_tx: tokio::sync::mpsc::Sender, - executed: Arc, - remote_storage: Option, - consumer: std::sync::Mutex, lsn_table: Arc>, } @@ -1235,29 +1230,34 @@ pub(crate) mod mock { let executed = Arc::new(AtomicUsize::new(0)); + let mut consumer = ConsumerState { + rx, + executor_rx, + cancel: CancellationToken::new(), + executed: executed.clone(), + }; + + tokio::spawn(async move { + if let Some(remote_storage) = &remote_storage { + consumer.consume(remote_storage).await; + } + }); + Self { tx, executor_tx, - executed, - remote_storage, - consumer: std::sync::Mutex::new(ConsumerState { - rx, - executor_rx, - cancel: CancellationToken::new(), - }), lsn_table: Arc::new(std::sync::RwLock::new(VisibleLsnUpdates::new())), } } #[allow(clippy::await_holding_lock)] pub async fn pump(&self) { - if let Some(remote_storage) = &self.remote_storage { - // Permit holding mutex across await, because this is only ever - // called once at a time in tests. - let mut locked = self.consumer.lock().unwrap(); - let count = locked.consume(remote_storage).await; - self.executed.fetch_add(count, Ordering::Relaxed); - } + let (tx, rx) = tokio::sync::oneshot::channel(); + self.executor_tx + .send(DeleterMessage::Flush(FlushOp { tx })) + .await + .expect("Failed to send flush message"); + rx.await.ok(); } pub(crate) fn new_client(&self) -> DeletionQueueClient { diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index bddcb534a1..339a3ca1bb 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3215,6 +3215,18 @@ impl Tenant { } } + if let ShutdownMode::Reload = shutdown_mode { + tracing::info!("Flushing deletion queue"); + if let Err(e) = self.deletion_queue_client.flush().await { + match e { + DeletionQueueError::ShuttingDown => { + // This is the only error we expect for now. In the future, if more error + // variants are added, we should handle them here. + } + } + } + } + // We cancel the Tenant's cancellation token _after_ the timelines have all shut down. This permits // them to continue to do work during their shutdown methods, e.g. flushing data. tracing::debug!("Cancelling CancellationToken"); diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 92b2200542..eb8191e43e 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1960,7 +1960,7 @@ impl TenantManager { attempt.before_reset_tenant(); let (_guard, progress) = utils::completion::channel(); - match tenant.shutdown(progress, ShutdownMode::Flush).await { + match tenant.shutdown(progress, ShutdownMode::Reload).await { Ok(()) => { slot_guard.drop_old_value().expect("it was just shutdown"); } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index afd4664d01..730477a7f4 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -894,10 +894,11 @@ pub(crate) enum ShutdownMode { /// While we are flushing, we continue to accept read I/O for LSNs ingested before /// the call to [`Timeline::shutdown`]. FreezeAndFlush, - /// Only flush the layers to the remote storage without freezing any open layers. This is the - /// mode used by ancestor detach and any other operations that reloads a tenant but not increasing - /// the generation number. - Flush, + /// Only flush the layers to the remote storage without freezing any open layers. Flush the deletion + /// queue. This is the mode used by ancestor detach and any other operations that reloads a tenant + /// but not increasing the generation number. Note that this mode cannot be used at tenant shutdown, + /// as flushing the deletion queue at that time will cause shutdown-in-progress errors. + Reload, /// Shut down immediately, without waiting for any open layers to flush. Hard, } @@ -1818,7 +1819,7 @@ impl Timeline { } } - if let ShutdownMode::Flush = mode { + if let ShutdownMode::Reload = mode { // drain the upload queue self.remote_client.shutdown().await; if !self.remote_client.no_pending_work() { diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 3595d743bc..3bfbfb5061 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -58,7 +58,7 @@ pub(crate) async fn offload_timeline( } // Now that the Timeline is in Stopping state, request all the related tasks to shut down. - timeline.shutdown(super::ShutdownMode::Flush).await; + timeline.shutdown(super::ShutdownMode::Reload).await; // TODO extend guard mechanism above with method // to make deletions possible while offloading is in progress