feat: support lazy, queued tenant attaches (#6907)

Add off-by-default support for lazy queued tenant activation on attach.
This should be useful on bulk migrations as some tenants will be
activated faster due to operations or endpoint startup. Eventually all
tenants will get activated by reusing the same mechanism we have at
startup (`PageserverConf::concurrent_tenant_warmup`).

The difference to lazy attached tenants to startup ones is that we leave
their initial logical size calculation be triggered by WalReceiver or
consumption metrics.

Fixes: #6315

Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
This commit is contained in:
Joonas Koivunen
2024-02-29 13:26:29 +02:00
committed by GitHub
parent d04af08567
commit 4d426f6fbe
8 changed files with 255 additions and 73 deletions

View File

@@ -227,7 +227,11 @@ pub(crate) struct TenantPreload {
/// When we spawn a tenant, there is a special mode for tenant creation that
/// avoids trying to read anything from remote storage.
pub(crate) enum SpawnMode {
Normal,
/// Activate as soon as possible
Eager,
/// Lazy activation in the background, with the option to skip the queue if the need comes up
Lazy,
/// Tenant has been created during the lifetime of this process
Create,
}
@@ -700,41 +704,37 @@ impl Tenant {
.and_then(|x| x.initial_tenant_load_remote.take());
enum AttachType<'a> {
// During pageserver startup, we are attaching this tenant lazily in the background
Warmup(tokio::sync::SemaphorePermit<'a>),
// During pageserver startup, we are attaching this tenant as soon as we can,
// because a client tried to access it.
/// We are attaching this tenant lazily in the background.
Warmup {
_permit: tokio::sync::SemaphorePermit<'a>,
during_startup: bool
},
/// We are attaching this tenant as soon as we can, because for example an
/// endpoint tried to access it.
OnDemand,
// During normal operations after startup, we are attaching a tenant.
/// During normal operations after startup, we are attaching a tenant, and
/// eager attach was requested.
Normal,
}
// Before doing any I/O, wait for either or:
// - A client to attempt to access to this tenant (on-demand loading)
// - A permit to become available in the warmup semaphore (background warmup)
//
// Some-ness of init_order is how we know if we're attaching during startup or later
// in process lifetime.
let attach_type = if init_order.is_some() {
let attach_type = if matches!(mode, SpawnMode::Lazy) {
// Before doing any I/O, wait for at least one of:
// - A client attempting to access to this tenant (on-demand loading)
// - A permit becoming available in the warmup semaphore (background warmup)
tokio::select!(
_ = tenant_clone.activate_now_sem.acquire() => {
permit = tenant_clone.activate_now_sem.acquire() => {
let _ = permit.expect("activate_now_sem is never closed");
tracing::info!("Activating tenant (on-demand)");
AttachType::OnDemand
},
permit_result = conf.concurrent_tenant_warmup.inner().acquire() => {
match permit_result {
Ok(p) => {
tracing::info!("Activating tenant (warmup)");
AttachType::Warmup(p)
}
Err(_) => {
// This is unexpected: the warmup semaphore should stay alive
// for the lifetime of init_order. Log a warning and proceed.
tracing::warn!("warmup_limit semaphore unexpectedly closed");
AttachType::Normal
}
permit = conf.concurrent_tenant_warmup.inner().acquire() => {
let _permit = permit.expect("concurrent_tenant_warmup semaphore is never closed");
tracing::info!("Activating tenant (warmup)");
AttachType::Warmup {
_permit,
during_startup: init_order.is_some()
}
}
_ = tenant_clone.cancel.cancelled() => {
// This is safe, but should be pretty rare: it is interesting if a tenant
@@ -749,6 +749,8 @@ impl Tenant {
},
)
} else {
// SpawnMode::{Create,Eager} always cause jumping ahead of the
// concurrent_tenant_warmup queue
AttachType::Normal
};
@@ -756,7 +758,7 @@ impl Tenant {
(SpawnMode::Create, _) => {
None
},
(SpawnMode::Normal, Some(remote_storage)) => {
(SpawnMode::Eager | SpawnMode::Lazy, Some(remote_storage)) => {
let _preload_timer = TENANT.preload.start_timer();
let res = tenant_clone
.preload(remote_storage, task_mgr::shutdown_token())
@@ -769,7 +771,7 @@ impl Tenant {
}
}
}
(SpawnMode::Normal, None) => {
(_, None) => {
let _preload_timer = TENANT.preload.start_timer();
None
}
@@ -828,7 +830,7 @@ impl Tenant {
let attached = {
let _attach_timer = match mode {
SpawnMode::Create => None,
SpawnMode::Normal => {Some(TENANT.attach.start_timer())}
SpawnMode::Eager | SpawnMode::Lazy => Some(TENANT.attach.start_timer()),
};
tenant_clone.attach(preload, mode, &ctx).await
};
@@ -850,7 +852,7 @@ impl Tenant {
// It also prevents the warmup proccess competing with the concurrency limit on
// logical size calculations: if logical size calculation semaphore is saturated,
// then warmup will wait for that before proceeding to the next tenant.
if let AttachType::Warmup(_permit) = attach_type {
if matches!(attach_type, AttachType::Warmup { during_startup: true, .. }) {
let mut futs: FuturesUnordered<_> = tenant_clone.timelines.lock().unwrap().values().cloned().map(|t| t.await_initial_logical_size()).collect();
tracing::info!("Waiting for initial logical sizes while warming up...");
while futs.next().await.is_some() {}
@@ -923,7 +925,7 @@ impl Tenant {
deleting: false,
timelines: HashMap::new(),
},
(None, SpawnMode::Normal) => {
(None, _) => {
anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
}
};
@@ -3769,7 +3771,7 @@ pub(crate) mod harness {
let preload = tenant
.preload(&self.remote_storage, CancellationToken::new())
.await?;
tenant.attach(Some(preload), SpawnMode::Normal, ctx).await?;
tenant.attach(Some(preload), SpawnMode::Eager, ctx).await?;
tenant.state.send_replace(TenantState::Active);
for timeline in tenant.timelines.lock().unwrap().values() {