This commit is contained in:
Christian Schwarz
2024-02-02 11:50:43 +00:00
parent aa0e9fdaef
commit 014147a644
9 changed files with 189 additions and 23 deletions

View File

@@ -40,6 +40,7 @@ license = "Apache-2.0"
[workspace.dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
arc-swap = "1.6"
async-channel = "2.1.1"
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
azure_core = "0.18"
azure_identity = "0.18"

View File

@@ -12,6 +12,7 @@ testing = ["fail/failpoints"]
[dependencies]
arc-swap.workspace = true
async-channel.workspace = true
sentry.workspace = true
async-trait.workspace = true
anyhow.workspace = true

View File

@@ -87,6 +87,8 @@ pub mod failpoint_support;
pub mod yielding_loop;
pub mod pre_spawned_pool;
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
///
/// we have several cases:

View File

@@ -0,0 +1,138 @@
use std::{
collections::VecDeque,
num::NonZeroUsize,
sync::{Arc, RwLock},
thread::JoinHandle,
};
use tokio::sync::{mpsc, OwnedSemaphorePermit};
use tokio_util::sync::CancellationToken;
use tracing::{debug, instrument};
pub struct Client<T> {
cmds_tx: mpsc::UnboundedSender<Command>,
items_rx: async_channel::Receiver<CreatedItem<T>>,
}
pub trait Launcher<T> {
fn create(&self) -> anyhow::Result<T>;
}
#[derive(Debug)]
enum Command {
SetSlotCount(usize),
}
enum GetError {
ShuttingDown,
}
impl<T> Client<T> {
pub async fn get(&self) -> Result<T, GetError> {
self.items_rx
.recv()
.await
.map_err(|_| GetError::ShuttingDown)
.map(|CreatedItem { permit, item }| {
drop(permit); // allow a new one to be pre-spanwed
item
})
}
pub fn set_slot_count_nowait(&self, count: usize) {
self.cmds_tx.send(Command::SetSlotCount(count));
}
}
pub struct Pool<T, L>
where
T: Send + 'static,
L: Send + Launcher<T> + 'static,
{
launcher: L,
cmds_rx: mpsc::UnboundedReceiver<Command>,
items_tx: async_channel::Sender<CreatedItem<T>>,
}
struct CreatedItem<T> {
permit: OwnedSemaphorePermit,
item: T,
}
impl<T, L> Pool<T, L>
where
T: Send + 'static,
L: Send + Launcher<T> + 'static,
{
pub async fn launch(launcher: L) -> Client<T> {
let (cmds_tx, cmds_rx) = mpsc::unbounded_channel(); // callers are limited to mgmt api
let (items_tx, items_rx) = async_channel::unbounded(); // task() limits pending items itself
// task gets cancelled by dropping the last Client
tokio::spawn(
Self {
launcher,
cmds_rx,
items_tx,
}
.task(),
);
Client { cmds_tx, items_rx }
}
#[instrument(skip_all)]
async fn task(mut self) {
let initial = 0;
let mut configured = initial;
let mut pending_items = Arc::new(tokio::sync::Semaphore::new(initial));
let mut need_forget = 0;
loop {
debug!(
configured,
need_forget,
available = pending_items.available_permits(),
"iteration"
);
let cmd = tokio::select! {
res = self.cmds_rx.recv() => {
match res {
Some(cmd) => cmd,
None => return, // dropping tx acts as cancellation
}
}
permit = Arc::clone(&pending_items).acquire_owned() => {
let permit = permit.expect("we never close this semaphore");
if need_forget > 0 {
debug!("fogetting permit to reduce semaphore count");
need_forget -= 1;
permit.forget();
continue;
}
debug!("creating item");
let item = match self.launcher.create() {
Ok(item) => item,
Err(e) => todo!(),
};
match self.items_tx.send(CreatedItem { permit, item }).await {
Ok(()) => continue,
Err(_) => {
debug!("stopping, client has gone away");
return;
}
}
}
};
debug!(?cmd, "received command");
match cmd {
Command::SetSlotCount(desired) => {
if desired > configured {
pending_items.add_permits(desired - configured);
} else if desired < configured {
need_forget += configured - desired;
}
configured = desired;
}
}
}
}
}

View File

@@ -614,10 +614,7 @@ impl Tenant {
mode: SpawnMode,
ctx: &RequestContext,
) -> anyhow::Result<Arc<Tenant>> {
let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(
conf,
tenant_shard_id,
)));
let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new(conf)));
let TenantSharedResources {
broker_client,

View File

@@ -21,6 +21,8 @@
/// Process lifecycle and abstracction for the IPC protocol.
mod process;
mod process_pool;
/// Code to apply [`NeonWalRecord`]s.
mod apply_neon;
@@ -146,6 +148,7 @@ impl PostgresRedoManager {
pub fn new(
conf: &'static PageServerConf,
tenant_shard_id: TenantShardId,
pool: process_pool::Pool,
) -> PostgresRedoManager {
// The actual process is launched lazily, on first request.
PostgresRedoManager {

View File

@@ -52,12 +52,8 @@ impl WalRedoProcess {
//
// Start postgres binary in special WAL redo mode.
//
#[instrument(skip_all,fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), pg_version=pg_version))]
pub(crate) fn launch(
conf: &'static PageServerConf,
tenant_shard_id: TenantShardId,
pg_version: u32,
) -> anyhow::Result<Self> {
#[instrument(skip_all,fields(pg_version=pg_version))]
pub(crate) fn launch(conf: &'static PageServerConf, pg_version: u32) -> anyhow::Result<Self> {
let pg_bin_dir_path = conf.pg_bin_dir(pg_version).context("pg_bin_dir")?; // TODO these should be infallible.
let pg_lib_dir_path = conf.pg_lib_dir(pg_version).context("pg_lib_dir")?;
@@ -66,9 +62,6 @@ impl WalRedoProcess {
let child = Command::new(pg_bin_dir_path.join("postgres"))
// the first arg must be --wal-redo so the child process enters into walredo mode
.arg("--wal-redo")
// the child doesn't process this arg, but, having it in the argv helps indentify the
// walredo process for a particular tenant when debugging a pagserver
.args(["--tenant-shard-id", &format!("{tenant_shard_id}")])
.stdin(Stdio::piped())
.stderr(Stdio::piped())
.stdout(Stdio::piped())
@@ -83,7 +76,7 @@ impl WalRedoProcess {
// the files it opens, and
// 2. to use seccomp to sandbox itself before processing the first
// walredo request.
.spawn_no_leak_child(tenant_shard_id)
.spawn_no_leak_child()
.context("spawn process")?;
WAL_REDO_PROCESS_COUNTERS.started.inc();
let mut child = scopeguard::guard(child, |child| {
@@ -144,12 +137,11 @@ impl WalRedoProcess {
error!(error=?e, "failed to read from walredo stderr");
}
}
}.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %pg_version))
}.instrument(tracing::info_span!(parent: None, "wal-redo-postgres-stderr", pid = child.id(), %pg_version))
);
Ok(Self {
conf,
tenant_shard_id,
child: Some(child),
stdin: Mutex::new(ProcessInput {
stdin,
@@ -175,7 +167,7 @@ impl WalRedoProcess {
// Apply given WAL records ('records') over an old page image. Returns
// new page image.
//
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), pid=%self.id()))]
#[instrument(skip_all, fields(pid=%self.id()))]
pub(crate) fn apply_wal_records(
&self,
rel: RelTag,

View File

@@ -20,7 +20,6 @@ use pageserver_api::shard::TenantShardId;
/// Wrapper type around `std::process::Child` which guarantees that the child
/// will be killed and waited-for by this process before being dropped.
pub(crate) struct NoLeakChild {
pub(crate) tenant_id: TenantShardId,
pub(crate) child: Option<Child>,
}
@@ -39,12 +38,9 @@ impl DerefMut for NoLeakChild {
}
impl NoLeakChild {
pub(crate) fn spawn(tenant_id: TenantShardId, command: &mut Command) -> io::Result<Self> {
pub(crate) fn spawn(command: &mut Command) -> io::Result<Self> {
let child = command.spawn()?;
Ok(NoLeakChild {
tenant_id,
child: Some(child),
})
Ok(NoLeakChild { child: Some(child) })
}
pub(crate) fn kill_and_wait(mut self, cause: WalRedoKillCause) {

View File

@@ -0,0 +1,36 @@
use std::sync::Arc;
use utils::pre_spawned_pool;
use crate::config::PageServerConf;
use super::process::WalRedoProcess;
pub struct Pool {
v14: pre_spawned_pool::Pool<Arc<WalRedoProcess>, Launcher>,
v15: pre_spawned_pool::Pool<Arc<WalRedoProcess>, Launcher>,
v16: pre_spawned_pool::Pool<Arc<WalRedoProcess>, Launcher>,
}
struct Launcher {
pg_version: u32,
conf: &'static PageServerConf,
}
impl utils::pre_spawned_pool::Launcher<Arc<WalRedoProcess>> for Launcher{
fn create(&self) -> anyhow::Result<Arc<WalRedoProcess>> {
WalRedoProcess::launch(self.conf, self.pg_version)
}
}
impl Pool {
pub fn get(&self, pg_version: usize) -> anyhow::Result<Arc<WalRedoProcess>> {
let pool = match pg_version {
14 => &self.v14,
15 => &self.v15,
16 => &self.v16,
x => anyhow::bail!("unknown pg version: {x}"),
};
pool.get()
}
}