fix(pageserver): passthrough lsn lease in storcon API (#11386)

## Problem

part of https://github.com/neondatabase/cloud/issues/23667

## Summary of changes

lsn_lease API can only be used on pageservers. This patch enables
storcon passthrough.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-03-31 15:16:42 -04:00
committed by GitHub
parent e5b95bc9dc
commit 47d47000df
4 changed files with 156 additions and 6 deletions

View File

@@ -24,9 +24,9 @@ use pageserver_api::controller_api::{
ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest,
};
use pageserver_api::models::{
DetachBehavior, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
TenantShardSplitRequest, TenantTimeTravelRequest, TimelineArchivalConfigRequest,
TimelineCreateRequest,
DetachBehavior, LsnLeaseRequest, TenantConfigPatchRequest, TenantConfigRequest,
TenantLocationConfigRequest, TenantShardSplitRequest, TenantTimeTravelRequest,
TimelineArchivalConfigRequest, TimelineCreateRequest,
};
use pageserver_api::shard::TenantShardId;
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
@@ -582,6 +582,32 @@ async fn handle_tenant_timeline_download_heatmap_layers(
json_response(StatusCode::OK, ())
}
async fn handle_tenant_timeline_lsn_lease(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
return res;
}
ForwardOutcome::NotForwarded(req) => req,
};
let lsn_lease_request = json_request::<LsnLeaseRequest>(&mut req).await?;
service
.tenant_timeline_lsn_lease(tenant_id, timeline_id, lsn_lease_request.lsn)
.await?;
json_response(StatusCode::OK, ())
}
// For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
// and tenant/timeline IDs. Since we are proxying to arbitrary paths, we don't have routing templates to
// compare to, so we can just filter out our well known ID format with regexes.
@@ -2192,6 +2218,17 @@ pub fn make_router(
)
},
)
// LSN lease passthrough to all shards
.post(
"/v1/tenant/:tenant_id/timeline/:timeline_id/lsn_lease",
|r| {
tenant_service_handler(
r,
handle_tenant_timeline_lsn_lease,
RequestName("v1_tenant_timeline_lsn_lease"),
)
},
)
// Tenant detail GET passthrough to shard zero:
.get("/v1/tenant/:tenant_id", |r| {
tenant_service_handler(

View File

@@ -1,6 +1,6 @@
use pageserver_api::models::detach_ancestor::AncestorDetached;
use pageserver_api::models::{
DetachBehavior, LocationConfig, LocationConfigListResponse, PageserverUtilization,
DetachBehavior, LocationConfig, LocationConfigListResponse, LsnLease, PageserverUtilization,
SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest,
TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest,
TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
@@ -10,6 +10,7 @@ use pageserver_client::BlockUnblock;
use pageserver_client::mgmt_api::{Client, Result};
use reqwest::StatusCode;
use utils::id::{NodeId, TenantId, TimelineId};
use utils::lsn::Lsn;
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
/// controller to collect metrics in a non-intrusive manner.
@@ -195,6 +196,22 @@ impl PageserverClient {
)
}
pub(crate) async fn timeline_lease_lsn(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<LsnLease> {
measured_request!(
"timeline_lease_lsn",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner
.timeline_init_lsn_lease(tenant_shard_id, timeline_id, lsn)
.await
)
}
pub(crate) async fn tenant_shard_split(
&self,
tenant_shard_id: TenantShardId,

View File

@@ -12,7 +12,7 @@ use std::ops::{Deref, DerefMut};
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::time::{Duration, Instant, SystemTime};
use anyhow::Context;
use context_iterator::TenantShardContextIterator;
@@ -34,7 +34,7 @@ use pageserver_api::controller_api::{
TenantShardMigrateRequest, TenantShardMigrateResponse,
};
use pageserver_api::models::{
self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode,
self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,
PageserverUtilization, SecondaryProgress, ShardParameters, TenantConfig,
TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest,
TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest,
@@ -60,6 +60,7 @@ use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
use utils::completion::Barrier;
use utils::generation::Generation;
use utils::id::{NodeId, TenantId, TimelineId};
use utils::lsn::Lsn;
use utils::sync::gate::Gate;
use utils::{failpoint_support, pausable_failpoint};
@@ -152,6 +153,7 @@ enum TenantOperations {
TimelineGcBlockUnblock,
DropDetached,
DownloadHeatmapLayers,
TimelineLsnLease,
}
#[derive(Clone, strum_macros::Display)]
@@ -3987,6 +3989,75 @@ impl Service {
Ok(())
}
pub(crate) async fn tenant_timeline_lsn_lease(
&self,
tenant_id: TenantId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<LsnLease, ApiError> {
let _tenant_lock = trace_shared_lock(
&self.tenant_op_locks,
tenant_id,
TenantOperations::TimelineLsnLease,
)
.await;
let targets = {
let locked = self.inner.read().unwrap();
let mut targets = Vec::new();
// If the request got an unsharded tenant id, then apply
// the operation to all shards. Otherwise, apply it to a specific shard.
let shards_range = TenantShardId::tenant_range(tenant_id);
for (tenant_shard_id, shard) in locked.tenants.range(shards_range) {
if let Some(node_id) = shard.intent.get_attached() {
let node = locked
.nodes
.get(node_id)
.expect("Pageservers may not be deleted while referenced");
targets.push((*tenant_shard_id, node.clone()));
}
}
targets
};
let res = self
.tenant_for_shards_api(
targets,
|tenant_shard_id, client| async move {
client
.timeline_lease_lsn(tenant_shard_id, timeline_id, lsn)
.await
},
1,
1,
SHORT_RECONCILE_TIMEOUT,
&self.cancel,
)
.await;
let mut valid_until = None;
for r in res {
match r {
Ok(lease) => {
if let Some(ref mut valid_until) = valid_until {
*valid_until = std::cmp::min(*valid_until, lease.valid_until);
} else {
valid_until = Some(lease.valid_until);
}
}
Err(e) => {
return Err(ApiError::InternalServerError(anyhow::anyhow!(e)));
}
}
}
Ok(LsnLease {
valid_until: valid_until.unwrap_or_else(SystemTime::now),
})
}
pub(crate) async fn tenant_timeline_download_heatmap_layers(
&self,
tenant_shard_id: TenantShardId,