From c66444ea1538349d13ab5e87bca880394434004b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 11 Apr 2025 16:10:27 +0200 Subject: [PATCH] Add timeline_import http endpoint (#11484) The added `timleine_import` endpoint allows us to migrate safekeeper timelines from control plane managed to storcon managed. Part of #9011 --- libs/pageserver_api/src/controller_api.rs | 12 +++++- storage_controller/src/http.rs | 42 +++++++++++++++++++ .../src/service/safekeeper_service.rs | 30 ++++++++++++- 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 3cb62f9d18..91f9c03ba4 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -7,7 +7,8 @@ use std::time::{Duration, Instant}; /// API (`/control/v1` prefix). Implemented by the server /// in [`storage_controller::http`] use serde::{Deserialize, Serialize}; -use utils::id::{NodeId, TenantId}; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; use crate::models::{PageserverUtilization, ShardParameters, TenantConfig}; use crate::shard::{ShardStripeSize, TenantShardId}; @@ -499,6 +500,15 @@ pub struct SafekeeperSchedulingPolicyRequest { pub scheduling_policy: SkSchedulingPolicy, } +/// Import request for safekeeper timelines. +#[derive(Serialize, Deserialize, Clone)] +pub struct TimelineImportRequest { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub start_lsn: Lsn, + pub sk_set: Vec, +} + #[cfg(test)] mod test { use serde_json; diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 4f3613b687..fb4530d0d2 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -22,6 +22,7 @@ use pageserver_api::controller_api::{ MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse, NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, SafekeeperSchedulingPolicyRequest, ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest, + TimelineImportRequest, }; use pageserver_api::models::{ DetachBehavior, LsnLeaseRequest, TenantConfigPatchRequest, TenantConfigRequest, @@ -1286,6 +1287,37 @@ async fn handle_tenant_import(req: Request) -> Result, ApiE ) } +async fn handle_timeline_import(req: Request) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; + check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; + + let mut req = match maybe_forward(req).await { + ForwardOutcome::Forwarded(res) => { + return res; + } + ForwardOutcome::NotForwarded(req) => req, + }; + + let import_req = json_request::(&mut req).await?; + + let state = get_state(&req); + + if import_req.tenant_id != tenant_id || import_req.timeline_id != timeline_id { + return Err(ApiError::BadRequest(anyhow::anyhow!( + "tenant id or timeline id mismatch: url={tenant_id}/{timeline_id}, body={}/{}", + import_req.tenant_id, + import_req.timeline_id + ))); + } + + json_response( + StatusCode::OK, + state.service.timeline_import(import_req).await?, + ) +} + async fn handle_tenants_dump(req: Request) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; @@ -1959,6 +1991,16 @@ pub fn make_router( RequestName("debug_v1_tenant_locate"), ) }) + .post( + "/debug/v1/tenant/:tenant_id/timeline/:timeline_id/import", + |r| { + named_request_span( + r, + handle_timeline_import, + RequestName("debug_v1_timeline_import"), + ) + }, + ) .get("/debug/v1/scheduler", |r| { named_request_span(r, handle_scheduler_dump, RequestName("debug_v1_scheduler")) }) diff --git a/storage_controller/src/service/safekeeper_service.rs b/storage_controller/src/service/safekeeper_service.rs index 099d0305ba..a23b9a4a02 100644 --- a/storage_controller/src/service/safekeeper_service.rs +++ b/storage_controller/src/service/safekeeper_service.rs @@ -12,13 +12,16 @@ use crate::persistence::{ use crate::safekeeper::Safekeeper; use anyhow::Context; use http_utils::error::ApiError; -use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy}; +use pageserver_api::controller_api::{ + SafekeeperDescribeResponse, SkSchedulingPolicy, TimelineImportRequest, +}; use pageserver_api::models::{self, SafekeeperInfo, SafekeepersInfo, TimelineInfo}; use safekeeper_api::membership::{MemberSet, SafekeeperId}; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use utils::id::{NodeId, TenantId, TimelineId}; use utils::logging::SecretString; +use utils::lsn::Lsn; use super::Service; @@ -298,6 +301,31 @@ impl Service { timeline_id, }) } + + /// Directly insert the timeline into the database without reconciling it with safekeepers. + /// + /// Useful if the timeline already exists on the specified safekeepers, + /// but we want to make it storage controller managed. + pub(crate) async fn timeline_import(&self, req: TimelineImportRequest) -> Result<(), ApiError> { + let persistence = TimelinePersistence { + tenant_id: req.tenant_id.to_string(), + timeline_id: req.timeline_id.to_string(), + start_lsn: Lsn::INVALID.into(), + generation: 1, + sk_set: req.sk_set.iter().map(|sk_id| sk_id.0 as i64).collect(), + new_sk_set: None, + cplane_notified_generation: 1, + deleted_at: None, + }; + let inserted = self.persistence.insert_timeline(persistence).await?; + if inserted { + tracing::info!("imported timeline into db"); + } else { + tracing::info!("didn't import timeline into db, as it is already present in db"); + } + Ok(()) + } + /// Perform timeline deletion on safekeepers. Will return success: we persist the deletion into the reconciler. pub(super) async fn tenant_timeline_delete_safekeepers( self: &Arc,