diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 049a6d445d..773e5fc051 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -36,12 +36,11 @@ use bytes::{Bytes, BytesMut}; use pageserver_api::key::key_to_rel_block; use pageserver_api::models::WalRedoManagerStatus; use pageserver_api::shard::TenantShardId; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use std::time::Duration; use std::time::Instant; use tracing::*; use utils::lsn::Lsn; -use utils::sync::heavier_once_cell; /// /// This is the real implementation that uses a Postgres process to @@ -54,7 +53,7 @@ pub struct PostgresRedoManager { tenant_shard_id: TenantShardId, conf: &'static PageServerConf, last_redo_at: std::sync::Mutex>, - redo_process: heavier_once_cell::OnceCell>, + redo_process: RwLock>>, } /// @@ -102,7 +101,6 @@ impl PostgresRedoManager { self.conf.wal_redo_timeout, pg_version, ) - .await }; img = Some(result?); @@ -123,7 +121,6 @@ impl PostgresRedoManager { self.conf.wal_redo_timeout, pg_version, ) - .await } } @@ -137,7 +134,7 @@ impl PostgresRedoManager { chrono::Utc::now().checked_sub_signed(chrono::Duration::from_std(age).ok()?) }) }, - pid: self.redo_process.get().map(|p| p.id()), + pid: self.redo_process.read().unwrap().as_ref().map(|p| p.id()), }) } } @@ -155,7 +152,7 @@ impl PostgresRedoManager { tenant_shard_id, conf, last_redo_at: std::sync::Mutex::default(), - redo_process: heavier_once_cell::OnceCell::default(), + redo_process: RwLock::new(None), } } @@ -167,11 +164,8 @@ impl PostgresRedoManager { if let Some(last_redo_at) = *g { if last_redo_at.elapsed() >= idle_timeout { drop(g); - drop( - self.redo_process - .get() - .map(|mut guard| guard.take_and_deinit()), - ); + let mut guard = self.redo_process.write().unwrap(); + *guard = None; } } } @@ -181,7 +175,7 @@ impl PostgresRedoManager { /// Process one request for WAL redo using wal-redo postgres /// #[allow(clippy::too_many_arguments)] - async fn apply_batch_postgres( + fn apply_batch_postgres( &self, key: Key, lsn: Lsn, @@ -198,28 +192,41 @@ impl PostgresRedoManager { let mut n_attempts = 0u32; loop { // launch the WAL redo process on first use - let proc = self - .redo_process - .get_or_init(|init_permit| async move { - let start = Instant::now(); - let proc = Arc::new( - process::WalRedoProcess::launch( - self.conf, - self.tenant_shard_id, - pg_version, - ) - .context("launch walredo process")?, - ); - let duration = start.elapsed(); - WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64()); - info!( - duration_ms = duration.as_millis(), - pid = proc.id(), - "launched walredo process" - ); - anyhow::Ok((proc, init_permit)) - }) - .await?; + let proc: Arc = { + let proc_guard = self.redo_process.read().unwrap(); + match &*proc_guard { + None => { + // "upgrade" to write lock to launch the process + drop(proc_guard); + let mut proc_guard = self.redo_process.write().unwrap(); + match &*proc_guard { + None => { + let start = Instant::now(); + let proc = Arc::new( + process::WalRedoProcess::launch( + self.conf, + self.tenant_shard_id, + pg_version, + ) + .context("launch walredo process")?, + ); + let duration = start.elapsed(); + WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM + .observe(duration.as_secs_f64()); + info!( + duration_ms = duration.as_millis(), + pid = proc.id(), + "launched walredo process" + ); + *proc_guard = Some(Arc::clone(&proc)); + proc + } + Some(proc) => Arc::clone(proc), + } + } + Some(proc) => Arc::clone(proc), + } + }; let started_at = std::time::Instant::now(); @@ -268,11 +275,12 @@ impl PostgresRedoManager { // Avoid concurrent callers hitting the same issue. // We can't prevent it from happening because we want to enable parallelism. { - match self.redo_process.get() { - Some(mut guard) => { - if Arc::ptr_eq(&*guard, &proc) { + let mut guard = self.redo_process.write().unwrap(); + match &*guard { + Some(current_field_value) => { + if Arc::ptr_eq(current_field_value, &proc) { // We're the first to observe an error from `proc`, it's our job to take it out of rotation. - drop(guard.take_and_deinit()); + *guard = None; } } None => {