From 47d47000dfd5cdbe4425eaae43f955974f1595d7 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:16:42 -0400 Subject: [PATCH] fix(pageserver): passthrough lsn lease in storcon API (#11386) ## Problem part of https://github.com/neondatabase/cloud/issues/23667 ## Summary of changes lsn_lease API can only be used on pageservers. This patch enables storcon passthrough. Signed-off-by: Alex Chi Z --- storage_controller/src/http.rs | 43 +++++++++++- storage_controller/src/pageserver_client.rs | 19 +++++- storage_controller/src/service.rs | 75 ++++++++++++++++++++- test_runner/regress/test_tenant_size.py | 25 +++++++ 4 files changed, 156 insertions(+), 6 deletions(-) diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index a5e00e18e8..79332ea304 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -24,9 +24,9 @@ use pageserver_api::controller_api::{ ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest, }; use pageserver_api::models::{ - DetachBehavior, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, - TenantShardSplitRequest, TenantTimeTravelRequest, TimelineArchivalConfigRequest, - TimelineCreateRequest, + DetachBehavior, LsnLeaseRequest, TenantConfigPatchRequest, TenantConfigRequest, + TenantLocationConfigRequest, TenantShardSplitRequest, TenantTimeTravelRequest, + TimelineArchivalConfigRequest, TimelineCreateRequest, }; use pageserver_api::shard::TenantShardId; use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; @@ -582,6 +582,32 @@ async fn handle_tenant_timeline_download_heatmap_layers( json_response(StatusCode::OK, ()) } +async fn handle_tenant_timeline_lsn_lease( + service: Arc, + req: Request, +) -> Result, ApiError> { + let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; + + check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; + + let mut req = match maybe_forward(req).await { + ForwardOutcome::Forwarded(res) => { + return res; + } + ForwardOutcome::NotForwarded(req) => req, + }; + + let lsn_lease_request = json_request::(&mut req).await?; + + service + .tenant_timeline_lsn_lease(tenant_id, timeline_id, lsn_lease_request.lsn) + .await?; + + json_response(StatusCode::OK, ()) +} + // For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters // and tenant/timeline IDs. Since we are proxying to arbitrary paths, we don't have routing templates to // compare to, so we can just filter out our well known ID format with regexes. @@ -2192,6 +2218,17 @@ pub fn make_router( ) }, ) + // LSN lease passthrough to all shards + .post( + "/v1/tenant/:tenant_id/timeline/:timeline_id/lsn_lease", + |r| { + tenant_service_handler( + r, + handle_tenant_timeline_lsn_lease, + RequestName("v1_tenant_timeline_lsn_lease"), + ) + }, + ) // Tenant detail GET passthrough to shard zero: .get("/v1/tenant/:tenant_id", |r| { tenant_service_handler( diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index c6c21107f1..d14fc35b39 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -1,6 +1,6 @@ use pageserver_api::models::detach_ancestor::AncestorDetached; use pageserver_api::models::{ - DetachBehavior, LocationConfig, LocationConfigListResponse, PageserverUtilization, + DetachBehavior, LocationConfig, LocationConfigListResponse, LsnLease, PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse, @@ -10,6 +10,7 @@ use pageserver_client::BlockUnblock; use pageserver_client::mgmt_api::{Client, Result}; use reqwest::StatusCode; use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage /// controller to collect metrics in a non-intrusive manner. @@ -195,6 +196,22 @@ impl PageserverClient { ) } + pub(crate) async fn timeline_lease_lsn( + &self, + tenant_shard_id: TenantShardId, + timeline_id: TimelineId, + lsn: Lsn, + ) -> Result { + measured_request!( + "timeline_lease_lsn", + crate::metrics::Method::Post, + &self.node_id_label, + self.inner + .timeline_init_lsn_lease(tenant_shard_id, timeline_id, lsn) + .await + ) + } + pub(crate) async fn tenant_shard_split( &self, tenant_shard_id: TenantShardId, diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index d3c8cad0bd..9f308d9a0b 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -12,7 +12,7 @@ use std::ops::{Deref, DerefMut}; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::{Duration, Instant, SystemTime}; use anyhow::Context; use context_iterator::TenantShardContextIterator; @@ -34,7 +34,7 @@ use pageserver_api::controller_api::{ TenantShardMigrateRequest, TenantShardMigrateResponse, }; use pageserver_api::models::{ - self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode, + self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, PageserverUtilization, SecondaryProgress, ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest, @@ -60,6 +60,7 @@ use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; use utils::completion::Barrier; use utils::generation::Generation; use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; use utils::sync::gate::Gate; use utils::{failpoint_support, pausable_failpoint}; @@ -152,6 +153,7 @@ enum TenantOperations { TimelineGcBlockUnblock, DropDetached, DownloadHeatmapLayers, + TimelineLsnLease, } #[derive(Clone, strum_macros::Display)] @@ -3987,6 +3989,75 @@ impl Service { Ok(()) } + pub(crate) async fn tenant_timeline_lsn_lease( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + lsn: Lsn, + ) -> Result { + let _tenant_lock = trace_shared_lock( + &self.tenant_op_locks, + tenant_id, + TenantOperations::TimelineLsnLease, + ) + .await; + + let targets = { + let locked = self.inner.read().unwrap(); + let mut targets = Vec::new(); + + // If the request got an unsharded tenant id, then apply + // the operation to all shards. Otherwise, apply it to a specific shard. + let shards_range = TenantShardId::tenant_range(tenant_id); + + for (tenant_shard_id, shard) in locked.tenants.range(shards_range) { + if let Some(node_id) = shard.intent.get_attached() { + let node = locked + .nodes + .get(node_id) + .expect("Pageservers may not be deleted while referenced"); + + targets.push((*tenant_shard_id, node.clone())); + } + } + targets + }; + + let res = self + .tenant_for_shards_api( + targets, + |tenant_shard_id, client| async move { + client + .timeline_lease_lsn(tenant_shard_id, timeline_id, lsn) + .await + }, + 1, + 1, + SHORT_RECONCILE_TIMEOUT, + &self.cancel, + ) + .await; + + let mut valid_until = None; + for r in res { + match r { + Ok(lease) => { + if let Some(ref mut valid_until) = valid_until { + *valid_until = std::cmp::min(*valid_until, lease.valid_until); + } else { + valid_until = Some(lease.valid_until); + } + } + Err(e) => { + return Err(ApiError::InternalServerError(anyhow::anyhow!(e))); + } + } + } + Ok(LsnLease { + valid_until: valid_until.unwrap_or_else(SystemTime::now), + }) + } + pub(crate) async fn tenant_timeline_download_heatmap_layers( &self, tenant_shard_id: TenantShardId, diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index a50a1beed6..a9df5f2d49 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -757,6 +757,31 @@ def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, env.stop(immediate=True) +def test_lsn_lease_storcon(neon_env_builder: NeonEnvBuilder): + conf = { + "pitr_interval": "0s", + "gc_period": "0s", + "compaction_period": "0s", + } + env = neon_env_builder.init_start(initial_tenant_conf=conf) + with env.endpoints.create_start( + "main", + ) as ep: + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + last_flush_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) + env.storage_controller.pageserver_api().timeline_lsn_lease( + env.initial_tenant, env.initial_timeline, last_flush_lsn + ) + env.storage_controller.tenant_shard_split(env.initial_tenant, 8) + # TODO: do we preserve LSN leases across shard splits? + env.storage_controller.pageserver_api().timeline_lsn_lease( + env.initial_tenant, env.initial_timeline, last_flush_lsn + ) + + def insert_with_action( env: NeonEnv, tenant: TenantId,