mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 08:00:37 +00:00
Merge remote-tracking branch 'origin/main' into HEAD
This commit is contained in:
@@ -850,6 +850,31 @@ async fn handle_tenant_describe(
|
||||
json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
async fn handle_tenant_timeline_describe(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Scrubber)?;
|
||||
|
||||
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
|
||||
match maybe_forward(req).await {
|
||||
ForwardOutcome::Forwarded(res) => {
|
||||
return res;
|
||||
}
|
||||
ForwardOutcome::NotForwarded(_req) => {}
|
||||
};
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
service
|
||||
.tenant_timeline_describe(tenant_id, timeline_id)
|
||||
.await?,
|
||||
)
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
async fn handle_tenant_list(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
@@ -2480,6 +2505,13 @@ pub fn make_router(
|
||||
)
|
||||
})
|
||||
// Timeline operations
|
||||
.get("/control/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
tenant_service_handler(
|
||||
r,
|
||||
handle_tenant_timeline_describe,
|
||||
RequestName("v1_tenant_timeline_describe"),
|
||||
)
|
||||
})
|
||||
.delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
tenant_service_handler(
|
||||
r,
|
||||
|
||||
@@ -222,6 +222,9 @@ struct Cli {
|
||||
/// Primarily useful for testing to reduce test execution time.
|
||||
#[arg(long, default_value = "false", action=ArgAction::Set)]
|
||||
kick_secondary_downloads: bool,
|
||||
|
||||
#[arg(long)]
|
||||
shard_split_request_timeout: Option<humantime::Duration>,
|
||||
}
|
||||
|
||||
enum StrictMode {
|
||||
@@ -470,6 +473,10 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
timeline_safekeeper_count: args.timeline_safekeeper_count,
|
||||
posthog_config: posthog_config.clone(),
|
||||
kick_secondary_downloads: args.kick_secondary_downloads,
|
||||
shard_split_request_timeout: args
|
||||
.shard_split_request_timeout
|
||||
.map(humantime::Duration::into)
|
||||
.unwrap_or(Duration::MAX),
|
||||
};
|
||||
|
||||
// Validate that we can connect to the database
|
||||
|
||||
@@ -86,6 +86,23 @@ impl PageserverClient {
|
||||
)
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
pub(crate) async fn tenant_timeline_describe(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
) -> Result<TimelineInfo> {
|
||||
measured_request!(
|
||||
"tenant_timeline_describe",
|
||||
crate::metrics::Method::Get,
|
||||
&self.node_id_label,
|
||||
self.inner
|
||||
.tenant_timeline_describe(tenant_shard_id, timeline_id,)
|
||||
.await
|
||||
)
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
pub(crate) async fn tenant_scan_remote_storage(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
|
||||
@@ -32,7 +32,7 @@ use pageserver_api::controller_api::{
|
||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||
SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse, TenantCreateResponseShard,
|
||||
TenantDescribeResponse, TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse, TenantTimelineDescribeResponse,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease,
|
||||
@@ -60,6 +60,7 @@ use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
use utils::completion::Barrier;
|
||||
use utils::env;
|
||||
use utils::generation::Generation;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -483,6 +484,9 @@ pub struct Config {
|
||||
|
||||
/// When set, actively checks and initiates heatmap downloads/uploads.
|
||||
pub kick_secondary_downloads: bool,
|
||||
|
||||
/// Timeout used for HTTP client of split requests. [`Duration::MAX`] if None.
|
||||
pub shard_split_request_timeout: Duration,
|
||||
}
|
||||
|
||||
impl From<DatabaseError> for ApiError {
|
||||
@@ -5206,6 +5210,9 @@ impl Service {
|
||||
match res {
|
||||
Ok(ok) => Ok(ok),
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::CONFLICT, _)) => Ok(StatusCode::CONFLICT),
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::PRECONDITION_FAILED, msg)) if msg.contains("Requested tenant is missing") => {
|
||||
Err(ApiError::ResourceUnavailable("Tenant migration in progress".into()))
|
||||
},
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::SERVICE_UNAVAILABLE, msg)) => Err(ApiError::ResourceUnavailable(msg.into())),
|
||||
Err(e) => {
|
||||
Err(
|
||||
@@ -5486,6 +5493,92 @@ impl Service {
|
||||
.ok_or_else(|| ApiError::NotFound(anyhow::anyhow!("Tenant {tenant_id} not found").into()))
|
||||
}
|
||||
|
||||
/* BEGIN_HADRON */
|
||||
pub(crate) async fn tenant_timeline_describe(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
) -> Result<TenantTimelineDescribeResponse, ApiError> {
|
||||
self.tenant_remote_mutation(tenant_id, |locations| async move {
|
||||
if locations.0.is_empty() {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Tenant not found").into(),
|
||||
));
|
||||
};
|
||||
|
||||
let locations: Vec<(TenantShardId, Node)> = locations
|
||||
.0
|
||||
.iter()
|
||||
.map(|t| (*t.0, t.1.latest.node.clone()))
|
||||
.collect();
|
||||
let mut futs = FuturesUnordered::new();
|
||||
|
||||
for (shard_id, node) in locations {
|
||||
futs.push({
|
||||
async move {
|
||||
let result = node
|
||||
.with_client_retries(
|
||||
|client| async move {
|
||||
client
|
||||
.tenant_timeline_describe(&shard_id, &timeline_id)
|
||||
.await
|
||||
},
|
||||
&self.http_client,
|
||||
&self.config.pageserver_jwt_token,
|
||||
3,
|
||||
3,
|
||||
Duration::from_secs(30),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
(result, shard_id, node.get_id())
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let mut results: Vec<TimelineInfo> = Vec::new();
|
||||
while let Some((result, tenant_shard_id, node_id)) = futs.next().await {
|
||||
match result {
|
||||
Some(Ok(timeline_info)) => results.push(timeline_info),
|
||||
Some(Err(e)) => {
|
||||
tracing::warn!(
|
||||
"Failed to describe tenant {} timeline {} for pageserver {}: {e}",
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
node_id,
|
||||
);
|
||||
return Err(ApiError::ResourceUnavailable(format!("{e}").into()));
|
||||
}
|
||||
None => return Err(ApiError::Cancelled),
|
||||
}
|
||||
}
|
||||
let mut image_consistent_lsn: Option<Lsn> = Some(Lsn::MAX);
|
||||
for timeline_info in &results {
|
||||
if let Some(tline_image_consistent_lsn) = timeline_info.image_consistent_lsn {
|
||||
image_consistent_lsn = Some(std::cmp::min(
|
||||
image_consistent_lsn.unwrap(),
|
||||
tline_image_consistent_lsn,
|
||||
));
|
||||
} else {
|
||||
tracing::warn!(
|
||||
"Timeline {} on shard {} does not have image consistent lsn",
|
||||
timeline_info.timeline_id,
|
||||
timeline_info.tenant_id
|
||||
);
|
||||
image_consistent_lsn = None;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(TenantTimelineDescribeResponse {
|
||||
shards: results,
|
||||
image_consistent_lsn,
|
||||
})
|
||||
})
|
||||
.await?
|
||||
}
|
||||
/* END_HADRON */
|
||||
|
||||
/// limit & offset are pagination parameters. Since we are walking an in-memory HashMap, `offset` does not
|
||||
/// avoid traversing data, it just avoid returning it. This is suitable for our purposes, since our in memory
|
||||
/// maps are small enough to traverse fast, our pagination is just to avoid serializing huge JSON responses
|
||||
@@ -6317,18 +6410,39 @@ impl Service {
|
||||
// TODO: issue split calls concurrently (this only matters once we're splitting
|
||||
// N>1 shards into M shards -- initially we're usually splitting 1 shard into N).
|
||||
|
||||
// HADRON: set a timeout for splitting individual shards on page servers.
|
||||
// Currently we do not perform any retry because it's not clear if page server can handle
|
||||
// partially split shards correctly.
|
||||
let shard_split_timeout =
|
||||
if let Some(env::DeploymentMode::Local) = env::get_deployment_mode() {
|
||||
Duration::from_secs(30)
|
||||
} else {
|
||||
self.config.shard_split_request_timeout
|
||||
};
|
||||
let mut http_client_builder = reqwest::ClientBuilder::new()
|
||||
.pool_max_idle_per_host(0)
|
||||
.timeout(shard_split_timeout);
|
||||
|
||||
for ssl_ca_cert in &self.config.ssl_ca_certs {
|
||||
http_client_builder = http_client_builder.add_root_certificate(ssl_ca_cert.clone());
|
||||
}
|
||||
let http_client = http_client_builder
|
||||
.build()
|
||||
.expect("Failed to construct HTTP client");
|
||||
for target in &targets {
|
||||
let ShardSplitTarget {
|
||||
parent_id,
|
||||
node,
|
||||
child_ids,
|
||||
} = target;
|
||||
|
||||
let client = PageserverClient::new(
|
||||
node.get_id(),
|
||||
self.http_client.clone(),
|
||||
http_client.clone(),
|
||||
node.base_url(),
|
||||
self.config.pageserver_jwt_token.as_deref(),
|
||||
);
|
||||
|
||||
let response = client
|
||||
.tenant_shard_split(
|
||||
*parent_id,
|
||||
|
||||
@@ -25,7 +25,8 @@ use pageserver_api::models::{SafekeeperInfo, SafekeepersInfo, TimelineInfo};
|
||||
use safekeeper_api::PgVersionId;
|
||||
use safekeeper_api::membership::{self, MemberSet, SafekeeperGeneration};
|
||||
use safekeeper_api::models::{
|
||||
PullTimelineRequest, TimelineMembershipSwitchRequest, TimelineMembershipSwitchResponse,
|
||||
PullTimelineRequest, TimelineLocateResponse, TimelineMembershipSwitchRequest,
|
||||
TimelineMembershipSwitchResponse,
|
||||
};
|
||||
use safekeeper_api::{INITIAL_TERM, Term};
|
||||
use safekeeper_client::mgmt_api;
|
||||
@@ -37,21 +38,14 @@ use utils::lsn::Lsn;
|
||||
|
||||
use super::Service;
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Clone)]
|
||||
pub struct TimelineLocateResponse {
|
||||
pub generation: SafekeeperGeneration,
|
||||
pub sk_set: Vec<NodeId>,
|
||||
pub new_sk_set: Option<Vec<NodeId>>,
|
||||
}
|
||||
|
||||
impl Service {
|
||||
fn make_member_set(safekeepers: &[Safekeeper]) -> Result<MemberSet, ApiError> {
|
||||
fn make_member_set(safekeepers: &[Safekeeper]) -> Result<MemberSet, anyhow::Error> {
|
||||
let members = safekeepers
|
||||
.iter()
|
||||
.map(|sk| sk.get_safekeeper_id())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
MemberSet::new(members).map_err(ApiError::InternalServerError)
|
||||
MemberSet::new(members)
|
||||
}
|
||||
|
||||
fn get_safekeepers(&self, ids: &[i64]) -> Result<Vec<Safekeeper>, ApiError> {
|
||||
@@ -86,7 +80,7 @@ impl Service {
|
||||
) -> Result<Vec<NodeId>, ApiError> {
|
||||
let safekeepers = self.get_safekeepers(&timeline_persistence.sk_set)?;
|
||||
|
||||
let mset = Self::make_member_set(&safekeepers)?;
|
||||
let mset = Self::make_member_set(&safekeepers).map_err(ApiError::InternalServerError)?;
|
||||
let mconf = safekeeper_api::membership::Configuration::new(mset);
|
||||
|
||||
let req = safekeeper_api::models::TimelineCreateRequest {
|
||||
@@ -1111,6 +1105,26 @@ impl Service {
|
||||
}
|
||||
}
|
||||
|
||||
if new_sk_set.is_empty() {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"new safekeeper set is empty"
|
||||
)));
|
||||
}
|
||||
|
||||
if new_sk_set.len() < self.config.timeline_safekeeper_count {
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"new safekeeper set must have at least {} safekeepers",
|
||||
self.config.timeline_safekeeper_count
|
||||
)));
|
||||
}
|
||||
|
||||
let new_sk_set_i64 = new_sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
|
||||
let new_safekeepers = self.get_safekeepers(&new_sk_set_i64)?;
|
||||
// Construct new member set in advance to validate it.
|
||||
// E.g. validates that there is no duplicate safekeepers.
|
||||
let new_sk_member_set =
|
||||
Self::make_member_set(&new_safekeepers).map_err(ApiError::BadRequest)?;
|
||||
|
||||
// TODO(diko): per-tenant lock is too wide. Consider introducing per-timeline locks.
|
||||
let _tenant_lock = trace_shared_lock(
|
||||
&self.tenant_op_locks,
|
||||
@@ -1141,6 +1155,18 @@ impl Service {
|
||||
.map(|&id| NodeId(id as u64))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Validate that we are not migrating to a decomissioned safekeeper.
|
||||
for sk in new_safekeepers.iter() {
|
||||
if !cur_sk_set.contains(&sk.get_id())
|
||||
&& sk.scheduling_policy() == SkSchedulingPolicy::Decomissioned
|
||||
{
|
||||
return Err(ApiError::BadRequest(anyhow::anyhow!(
|
||||
"safekeeper {} is decomissioned",
|
||||
sk.get_id()
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
?cur_sk_set,
|
||||
?new_sk_set,
|
||||
@@ -1183,11 +1209,8 @@ impl Service {
|
||||
}
|
||||
|
||||
let cur_safekeepers = self.get_safekeepers(&timeline.sk_set)?;
|
||||
let cur_sk_member_set = Self::make_member_set(&cur_safekeepers)?;
|
||||
|
||||
let new_sk_set_i64 = new_sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
|
||||
let new_safekeepers = self.get_safekeepers(&new_sk_set_i64)?;
|
||||
let new_sk_member_set = Self::make_member_set(&new_safekeepers)?;
|
||||
let cur_sk_member_set =
|
||||
Self::make_member_set(&cur_safekeepers).map_err(ApiError::InternalServerError)?;
|
||||
|
||||
let joint_config = membership::Configuration {
|
||||
generation,
|
||||
|
||||
Reference in New Issue
Block a user