mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
fix: delay eviction_task as well (#4397)
As seen on deployment of 2023-06-01 release, times were improving but there were some outliers caused by: - timelines `eviction_task` starting while activating and running imitation - timelines `initial logical size` calculation This PR fixes it so that `eviction_task` is delayed like other background tasks fixing an oversight from earlier #4372. After this PR activation will be two phases: 1. load and activate tenants AND calculate some initial logical sizes 2. rest of initial logical sizes AND background tasks - compaction, gc, disk usage based eviction, timelines `eviction_task`, consumption metrics
This commit is contained in:
@@ -335,9 +335,13 @@ fn start_pageserver(
|
||||
// Set up remote storage client
|
||||
let remote_storage = create_remote_storage_client(conf)?;
|
||||
|
||||
// All tenant load operations carry this while they are ongoing; it will be dropped once those
|
||||
// operations finish either successfully or in some other manner. However, the initial load
|
||||
// will be then done, and we can start the global background tasks.
|
||||
// Startup staging or optimizing:
|
||||
//
|
||||
// (init_done_tx, init_done_rx) are used to control when do background loops start. This is to
|
||||
// avoid starving out the BACKGROUND_RUNTIME async worker threads doing heavy work, like
|
||||
// initial repartitioning while we still have Loading tenants.
|
||||
//
|
||||
// init_done_rx is a barrier which stops waiting once all init_done_tx clones are dropped.
|
||||
let (init_done_tx, init_done_rx) = utils::completion::channel();
|
||||
|
||||
// Scan the local 'tenants/' directory and start loading the tenants
|
||||
|
||||
@@ -267,7 +267,7 @@ impl UninitializedTimeline<'_> {
|
||||
// updated it for the layers that we created during the import.
|
||||
let mut timelines = self.owning_tenant.timelines.lock().unwrap();
|
||||
let tl = self.initialize_with_lock(ctx, &mut timelines, false)?;
|
||||
tl.activate(broker_client, ctx);
|
||||
tl.activate(broker_client, None, ctx);
|
||||
Ok(tl)
|
||||
}
|
||||
|
||||
@@ -879,7 +879,6 @@ impl Tenant {
|
||||
))
|
||||
}
|
||||
|
||||
///
|
||||
/// Load a tenant that's available on local disk
|
||||
///
|
||||
/// This is used at pageserver startup, to rebuild the in-memory
|
||||
@@ -890,6 +889,8 @@ impl Tenant {
|
||||
/// If the loading fails for some reason, the Tenant will go into Broken
|
||||
/// state.
|
||||
///
|
||||
/// `init_done` is an optional channel used during initial load to delay background task
|
||||
/// start. It is not used later.
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant_id))]
|
||||
pub fn spawn_load(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -1358,7 +1359,7 @@ impl Tenant {
|
||||
}
|
||||
};
|
||||
|
||||
loaded_timeline.activate(broker_client, ctx);
|
||||
loaded_timeline.activate(broker_client, None, ctx);
|
||||
|
||||
if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
|
||||
// Wait for the upload of the 'index_part.json` file to finish, so that when we return
|
||||
@@ -1682,6 +1683,9 @@ impl Tenant {
|
||||
}
|
||||
|
||||
/// Changes tenant status to active, unless shutdown was already requested.
|
||||
///
|
||||
/// `init_done` is an optional channel used during initial load to delay background task
|
||||
/// start. It is not used later.
|
||||
fn activate(
|
||||
self: &Arc<Self>,
|
||||
broker_client: BrokerClientChannel,
|
||||
@@ -1723,7 +1727,7 @@ impl Tenant {
|
||||
let mut activated_timelines = 0;
|
||||
|
||||
for timeline in not_broken_timelines {
|
||||
timeline.activate(broker_client.clone(), ctx);
|
||||
timeline.activate(broker_client.clone(), init_done, ctx);
|
||||
activated_timelines += 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -155,6 +155,8 @@ pub async fn init_tenant_mgr(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `init_done` is an optional channel used during initial load to delay background task
|
||||
/// start. It is not used later.
|
||||
pub fn schedule_local_tenant_processing(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
|
||||
@@ -14,6 +14,10 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::completion;
|
||||
|
||||
/// Start per tenant background loops: compaction and gc.
|
||||
///
|
||||
/// `init_done` is an optional channel used during initial load to delay background task
|
||||
/// start. It is not used later.
|
||||
pub fn start_background_loops(tenant: &Arc<Tenant>, init_done: Option<&completion::Barrier>) {
|
||||
let tenant_id = tenant.tenant_id;
|
||||
task_mgr::spawn(
|
||||
|
||||
@@ -57,6 +57,7 @@ use pageserver_api::reltag::RelTag;
|
||||
use postgres_connection::PgConnectionConfig;
|
||||
use postgres_ffi::to_pg_timestamp;
|
||||
use utils::{
|
||||
completion,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::{AtomicLsn, Lsn, RecordLsn},
|
||||
seqwait::SeqWait,
|
||||
@@ -928,10 +929,15 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn activate(self: &Arc<Self>, broker_client: BrokerClientChannel, ctx: &RequestContext) {
|
||||
pub fn activate(
|
||||
self: &Arc<Self>,
|
||||
broker_client: BrokerClientChannel,
|
||||
init_done: Option<&completion::Barrier>,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
self.launch_wal_receiver(ctx, broker_client);
|
||||
self.set_state(TimelineState::Active);
|
||||
self.launch_eviction_task();
|
||||
self.launch_eviction_task(init_done);
|
||||
}
|
||||
|
||||
pub fn set_state(&self, new_state: TimelineState) {
|
||||
|
||||
@@ -34,6 +34,8 @@ use crate::{
|
||||
},
|
||||
};
|
||||
|
||||
use utils::completion;
|
||||
|
||||
use super::Timeline;
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -47,8 +49,9 @@ pub struct EvictionTaskTenantState {
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
pub(super) fn launch_eviction_task(self: &Arc<Self>) {
|
||||
pub(super) fn launch_eviction_task(self: &Arc<Self>, init_done: Option<&completion::Barrier>) {
|
||||
let self_clone = Arc::clone(self);
|
||||
let init_done = init_done.cloned();
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Eviction,
|
||||
@@ -57,6 +60,7 @@ impl Timeline {
|
||||
&format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id),
|
||||
false,
|
||||
async move {
|
||||
completion::Barrier::maybe_wait(init_done).await;
|
||||
self_clone.eviction_task(task_mgr::shutdown_token()).await;
|
||||
info!("eviction task finishing");
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user