remove init_order delay for init logical size calculations

The priority mechanism introduced earlier should take care of it.
2026-07-06 13:40:37 +00:00 · 2023-11-30 15:06:52 +00:00
parent bc98824858
commit 5f83acb35b
6 changed files with 13 additions and 112 deletions
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -402,15 +402,11 @@ fn start_pageserver(
    let (init_remote_done_tx, init_remote_done_rx) = utils::completion::channel();
    let (init_done_tx, init_done_rx) = utils::completion::channel();

-    let (init_logical_size_done_tx, init_logical_size_done_rx) = utils::completion::channel();
-
    let (background_jobs_can_start, background_jobs_barrier) = utils::completion::channel();

    let order = pageserver::InitializationOrder {
        initial_tenant_load_remote: Some(init_done_tx),
        initial_tenant_load: Some(init_remote_done_tx),
-        initial_logical_size_can_start: init_done_rx.clone(),
-        initial_logical_size_attempt: Some(init_logical_size_done_tx),
        background_jobs_can_start: background_jobs_barrier.clone(),
    };

@@ -464,7 +460,7 @@ fn start_pageserver(
            });

            let WaitForPhaseResult {
-                timeout_remaining: timeout,
+                timeout_remaining: _timeout,
                skipped: init_load_skipped,
            } = wait_for_phase("initial_tenant_load", init_load_done, timeout).await;

@@ -476,20 +472,6 @@ fn start_pageserver(
                tracing::info!("Cancelled before initial logical sizes completed")
            });

-            let logical_sizes_done = std::pin::pin!(async {
-                init_logical_size_done_rx.wait().await;
-                startup_checkpoint(
-                    started_startup_at,
-                    "initial_logical_sizes",
-                    "Initial logical sizes completed",
-                );
-            });
-
-            let WaitForPhaseResult {
-                timeout_remaining: _,
-                skipped: logical_sizes_skipped,
-            } = wait_for_phase("initial_logical_sizes", logical_sizes_done, timeout).await;
-
            scopeguard::ScopeGuard::into_inner(guard);

            // allow background jobs to start: we either completed prior stages, or they reached timeout
@@ -514,9 +496,6 @@ fn start_pageserver(
            if let Some(f) = init_load_skipped {
                f.await;
            }
-            if let Some(f) = logical_sizes_skipped {
-                f.await;
-            }
            scopeguard::ScopeGuard::into_inner(guard);

            startup_checkpoint(started_startup_at, "complete", "Startup complete");
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -186,13 +186,6 @@ pub struct InitializationOrder {
    /// Each initial tenant load task carries this until completion.
    pub initial_tenant_load: Option<utils::completion::Completion>,

-    /// Barrier for when we can start initial logical size calculations.
-    pub initial_logical_size_can_start: utils::completion::Barrier,
-
-    /// Each timeline owns a clone of this to be consumed on the initial logical size calculation
-    /// attempt. It is important to drop this once the attempt has completed.
-    pub initial_logical_size_attempt: Option<utils::completion::Completion>,
-
    /// Barrier for when we can start any background jobs.
    ///
    /// This can be broken up later on, but right now there is just one class of a background job.
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -463,7 +463,6 @@ impl Tenant {
        index_part: Option<IndexPart>,
        metadata: TimelineMetadata,
        ancestor: Option<Arc<Timeline>>,
-        init_order: Option<&InitializationOrder>,
        _ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        let tenant_id = self.tenant_id;
@@ -473,7 +472,6 @@ impl Tenant {
            &metadata,
            ancestor.clone(),
            resources,
-            init_order,
            CreateTimelineCause::Load,
        )?;
        let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
@@ -672,10 +670,6 @@ impl Tenant {
                    // as we are no longer loading, signal completion by dropping
                    // the completion while we resume deletion
                    drop(_completion);
-// do not hold to initial_logical_size_attempt as it will prevent loading from proceeding without timeout
-                    let _ = init_order
-                        .as_mut()
-                        .and_then(|x| x.initial_logical_size_attempt.take());
                    let background_jobs_can_start =
                        init_order.as_ref().map(|x| &x.background_jobs_can_start);
                    if let Some(background) = background_jobs_can_start {
@@ -689,7 +683,6 @@ impl Tenant {
                        &tenant_clone,
                        preload,
                        tenants,
-init_order,
                        &ctx,
                    )
                    .await
@@ -702,7 +695,7 @@ init_order,
                    }
                }

-                match tenant_clone.attach(init_order, preload, &ctx).await {
+                match tenant_clone.attach(preload, &ctx).await {
                    Ok(()) => {
                        info!("attach finished, activating");
                        tenant_clone.activate(broker_client, None, &ctx);
@@ -765,7 +758,6 @@ init_order,
    ///
    async fn attach(
        self: &Arc<Tenant>,
-        init_order: Option<InitializationOrder>,
        preload: Option<TenantPreload>,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
@@ -778,7 +770,7 @@ init_order,
            None => {
                // Deprecated dev mode: load from local disk state instead of remote storage
                // https://github.com/neondatabase/neon/issues/5624
-                return self.load_local(init_order, ctx).await;
+                return self.load_local(ctx).await;
            }
        };

@@ -862,7 +854,6 @@ init_order,
                &index_part.metadata,
                Some(remote_timeline_client),
                self.deletion_queue_client.clone(),
-                None,
            )
            .await
            .context("resume_deletion")
@@ -987,10 +978,6 @@ init_order,
            None
        };

-        // we can load remote timelines during init, but they are assumed to be so rare that
-        // initialization order is not passed to here.
-        let init_order = None;
-
        // timeline loading after attach expects to find metadata file for each metadata
        save_metadata(self.conf, &self.tenant_id, &timeline_id, &remote_metadata)
            .await
@@ -1003,7 +990,6 @@ init_order,
            Some(index_part),
            remote_metadata,
            ancestor,
-            init_order,
            ctx,
        )
        .await
@@ -1243,11 +1229,7 @@ init_order,
    /// files on disk. Used at pageserver startup.
    ///
    /// No background tasks are started as part of this routine.
-    async fn load_local(
-        self: &Arc<Tenant>,
-        init_order: Option<InitializationOrder>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    async fn load_local(self: &Arc<Tenant>, ctx: &RequestContext) -> anyhow::Result<()> {
        span::debug_assert_current_span_has_tenant_id();

        debug!("loading tenant task");
@@ -1273,7 +1255,7 @@ init_order,
        // Process loadable timelines first
        for (timeline_id, local_metadata) in scan.sorted_timelines_to_load {
            if let Err(e) = self
-                .load_local_timeline(timeline_id, local_metadata, init_order.as_ref(), ctx, false)
+                .load_local_timeline(timeline_id, local_metadata, ctx, false)
                .await
            {
                match e {
@@ -1307,13 +1289,7 @@ init_order,
                }
                Some(local_metadata) => {
                    if let Err(e) = self
-                        .load_local_timeline(
-                            timeline_id,
-                            local_metadata,
-                            init_order.as_ref(),
-                            ctx,
-                            true,
-                        )
+                        .load_local_timeline(timeline_id, local_metadata, ctx, true)
                        .await
                    {
                        match e {
@@ -1341,12 +1317,11 @@ init_order,
    /// Subroutine of `load_tenant`, to load an individual timeline
    ///
    /// NB: The parent is assumed to be already loaded!
-    #[instrument(skip(self, local_metadata, init_order, ctx))]
+    #[instrument(skip(self, local_metadata, ctx))]
    async fn load_local_timeline(
        self: &Arc<Self>,
        timeline_id: TimelineId,
        local_metadata: TimelineMetadata,
-        init_order: Option<&InitializationOrder>,
        ctx: &RequestContext,
        found_delete_mark: bool,
    ) -> Result<(), LoadLocalTimelineError> {
@@ -1363,7 +1338,6 @@ init_order,
                &local_metadata,
                None,
                self.deletion_queue_client.clone(),
-                init_order,
            )
            .await
            .context("resume deletion")
@@ -1380,17 +1354,9 @@ init_order,
            None
        };

-        self.timeline_init_and_sync(
-            timeline_id,
-            resources,
-            None,
-            local_metadata,
-            ancestor,
-            init_order,
-            ctx,
-        )
-        .await
-        .map_err(LoadLocalTimelineError::Load)
+        self.timeline_init_and_sync(timeline_id, resources, None, local_metadata, ancestor, ctx)
+            .await
+            .map_err(LoadLocalTimelineError::Load)
    }

    pub fn tenant_id(&self) -> TenantId {
@@ -2281,7 +2247,6 @@ impl Tenant {
        new_metadata: &TimelineMetadata,
        ancestor: Option<Arc<Timeline>>,
        resources: TimelineResources,
-        init_order: Option<&InitializationOrder>,
        cause: CreateTimelineCause,
    ) -> anyhow::Result<Arc<Timeline>> {
        let state = match cause {
@@ -2296,9 +2261,6 @@ impl Tenant {
            CreateTimelineCause::Delete => TimelineState::Stopping,
        };

-        let initial_logical_size_can_start = init_order.map(|x| &x.initial_logical_size_can_start);
-        let initial_logical_size_attempt = init_order.map(|x| &x.initial_logical_size_attempt);
-
        let pg_version = new_metadata.pg_version();

        let timeline = Timeline::new(
@@ -2312,8 +2274,6 @@ impl Tenant {
            Arc::clone(&self.walredo_mgr),
            resources,
            pg_version,
-            initial_logical_size_can_start.cloned(),
-            initial_logical_size_attempt.cloned().flatten(),
            state,
            self.cancel.child_token(),
        );
@@ -3104,7 +3064,6 @@ impl Tenant {
                new_metadata,
                ancestor,
                resources,
-                None,
                CreateTimelineCause::Load,
            )
            .context("Failed to create timeline data structure")?;
@@ -3703,7 +3662,7 @@ pub(crate) mod harness {
            match mode {
                LoadMode::Local => {
                    tenant
-                        .load_local(None, ctx)
+                        .load_local(ctx)
                        .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
                        .await?;
                }
@@ -3713,7 +3672,7 @@ pub(crate) mod harness {
                        .instrument(info_span!("try_load_preload", tenant_id=%self.tenant_id))
                        .await?;
                    tenant
-                        .attach(None, Some(preload), ctx)
+                        .attach(Some(preload), ctx)
                        .instrument(info_span!("try_load", tenant_id=%self.tenant_id))
                        .await?;
                }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -18,7 +18,6 @@ use crate::{
    context::RequestContext,
    task_mgr::{self, TaskKind},
    tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
-    InitializationOrder,
 };

 use super::{
@@ -391,7 +390,6 @@ impl DeleteTenantFlow {
        tenant: &Arc<Tenant>,
        preload: Option<TenantPreload>,
        tenants: &'static std::sync::RwLock<TenantsMap>,
-        init_order: Option<InitializationOrder>,
        ctx: &RequestContext,
    ) -> Result<(), DeleteTenantError> {
        let (_, progress) = completion::channel();
@@ -401,10 +399,7 @@ impl DeleteTenantFlow {
            .await
            .expect("cant be stopping or broken");

-        tenant
-            .attach(init_order, preload, ctx)
-            .await
-            .context("attach")?;
+        tenant.attach(preload, ctx).await.context("attach")?;

        Self::background(
            guard,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -298,13 +298,6 @@ pub struct Timeline {

    eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,

-    /// Barrier to wait before doing initial logical size calculation. Used only during startup.
-    initial_logical_size_can_start: Option<completion::Barrier>,
-
-    /// Completion shared between all timelines loaded during startup; used to delay heavier
-    /// background tasks until some logical sizes have been calculated.
-    initial_logical_size_attempt: Mutex<Option<completion::Completion>>,
-
    /// Load or creation time information about the disk_consistent_lsn and when the loading
    /// happened. Used for consumption metrics.
    pub(crate) loaded_at: (Lsn, SystemTime),
@@ -1012,17 +1005,6 @@ impl Timeline {
                error!("Not activating a Stopping timeline");
            }
            (_, new_state) => {
-                if matches!(
-                    new_state,
-                    TimelineState::Stopping | TimelineState::Broken { .. }
-                ) {
-                    // drop the completion guard, if any; it might be holding off the completion
-                    // forever needlessly
-                    self.initial_logical_size_attempt
-                        .lock()
-                        .unwrap_or_else(|e| e.into_inner())
-                        .take();
-                }
                self.state.send_replace(new_state);
            }
        }
@@ -1340,8 +1322,6 @@ impl Timeline {
        walredo_mgr: Arc<super::WalRedoManager>,
        resources: TimelineResources,
        pg_version: u32,
-        initial_logical_size_can_start: Option<completion::Barrier>,
-        initial_logical_size_attempt: Option<completion::Completion>,
        state: TimelineState,
        cancel: CancellationToken,
    ) -> Arc<Self> {
@@ -1441,8 +1421,6 @@ impl Timeline {
                ),
                delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTimelineFlow::default())),

-                initial_logical_size_can_start,
-                initial_logical_size_attempt: Mutex::new(initial_logical_size_attempt),
                cancel,
                gate: Gate::new(format!("Timeline<{tenant_id}/{timeline_id}>")),

--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -24,7 +24,6 @@ use crate::{
        },
        CreateTimelineCause, DeleteTimelineError, Tenant,
    },
-    InitializationOrder,
 };

 use super::{Timeline, TimelineResources};
@@ -405,7 +404,6 @@ impl DeleteTimelineFlow {
        local_metadata: &TimelineMetadata,
        remote_client: Option<RemoteTimelineClient>,
        deletion_queue_client: DeletionQueueClient,
-        init_order: Option<&InitializationOrder>,
    ) -> anyhow::Result<()> {
        // Note: here we even skip populating layer map. Timeline is essentially uninitialized.
        // RemoteTimelineClient is the only functioning part.
@@ -418,7 +416,6 @@ impl DeleteTimelineFlow {
                    remote_client,
                    deletion_queue_client,
                },
-                init_order,
                // Important. We dont pass ancestor above because it can be missing.
                // Thus we need to skip the validation here.
                CreateTimelineCause::Delete,