pageserver: API updates for sharding (#6330)

The theme of the changes in this PR is that they're enablers for #6251
which are superficial struct/api changes.

This is a spinoff from #6251:
- Various APIs + clients thereof take TenantShardId rather than TenantId
- The creation API gets a ShardParameters member, which may be used to
configure shard count and stripe size. This enables the attachment
service to present a "virtual pageserver" creation endpoint that creates
multiple shards.
- The attachment service will use tenant size information to drive shard
splitting. Make a version of `TenantHistorySize` that is usable for
decoding these API responses.
- ComputeSpec includes a shard stripe size.
This commit is contained in:
John Spray
2024-01-16 09:21:00 +00:00
committed by GitHub
parent 3f2187eb92
commit df9e9de541
14 changed files with 191 additions and 44 deletions

View File

@@ -288,6 +288,21 @@ async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiErr
)
}
async fn handle_tenant_create(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
let inspect_req = json_request::<InspectRequest>(&mut req).await?;
let state = get_state(&req).inner.clone();
let locked = state.write().await;
let tenant_state = locked.tenants.get(&inspect_req.tenant_id);
json_response(
StatusCode::OK,
InspectResponse {
attachment: tenant_state.and_then(|s| s.pageserver.map(|ps| (s.generation, ps))),
},
)
}
fn make_router(persistent_state: PersistentState) -> RouterBuilder<hyper::Body, ApiError> {
endpoint::make_router()
.data(Arc::new(State::new(persistent_state)))
@@ -295,6 +310,9 @@ fn make_router(persistent_state: PersistentState) -> RouterBuilder<hyper::Body,
.post("/validate", |r| request_span(r, handle_validate))
.post("/attach-hook", |r| request_span(r, handle_attach_hook))
.post("/inspect", |r| request_span(r, handle_inspect))
.post("/tenant/:tenant_id", |r| {
request_span(r, handle_tenant_create)
})
}
#[tokio::main]

View File

@@ -16,6 +16,7 @@ use control_plane::safekeeper::SafekeeperNode;
use control_plane::tenant_migration::migrate_tenant;
use control_plane::{broker, local_env};
use pageserver_api::models::TimelineInfo;
use pageserver_api::shard::TenantShardId;
use pageserver_api::{
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -279,7 +280,7 @@ async fn get_timeline_infos(
tenant_id: &TenantId,
) -> Result<HashMap<TimelineId, TimelineInfo>> {
Ok(get_default_pageserver(env)
.timeline_list(tenant_id)
.timeline_list(&TenantShardId::unsharded(*tenant_id))
.await?
.into_iter()
.map(|timeline_info| (timeline_info.timeline_id, timeline_info))
@@ -490,7 +491,9 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
match timeline_match.subcommand() {
Some(("list", list_match)) => {
let tenant_id = get_tenant_id(list_match, env)?;
let timelines = pageserver.timeline_list(&tenant_id).await?;
let timelines = pageserver
.timeline_list(&TenantShardId::unsharded(tenant_id))
.await?;
print_timelines_tree(timelines, env.timeline_name_mappings())?;
}
Some(("create", create_match)) => {

View File

@@ -48,6 +48,7 @@ use anyhow::{anyhow, bail, Context, Result};
use compute_api::spec::RemoteExtSpec;
use nix::sys::signal::kill;
use nix::sys::signal::Signal;
use pageserver_api::models::ShardParameters;
use serde::{Deserialize, Serialize};
use utils::id::{NodeId, TenantId, TimelineId};
@@ -543,6 +544,7 @@ impl Endpoint {
storage_auth_token: auth_token.clone(),
remote_extensions,
pgbouncer_settings: None,
shard_stripe_size: Some(ShardParameters::DEFAULT_STRIPE_SIZE.0 as usize),
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -17,7 +17,7 @@ use std::time::Duration;
use anyhow::{bail, Context};
use camino::Utf8PathBuf;
use futures::SinkExt;
use pageserver_api::models::{self, LocationConfig, TenantInfo, TimelineInfo};
use pageserver_api::models::{self, LocationConfig, ShardParameters, TenantInfo, TimelineInfo};
use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api;
use postgres_backend::AuthType;
@@ -376,6 +376,7 @@ impl PageServerNode {
new_tenant_id: TenantShardId::unsharded(new_tenant_id),
generation,
config,
shard_parameters: ShardParameters::default(),
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")
@@ -471,18 +472,21 @@ impl PageServerNode {
pub async fn location_config(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
config: LocationConfig,
flush_ms: Option<Duration>,
) -> anyhow::Result<()> {
Ok(self
.http_client
.location_config(tenant_id, config, flush_ms)
.location_config(tenant_shard_id, config, flush_ms)
.await?)
}
pub async fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
Ok(self.http_client.list_timelines(*tenant_id).await?)
pub async fn timeline_list(
&self,
tenant_shard_id: &TenantShardId,
) -> anyhow::Result<Vec<TimelineInfo>> {
Ok(self.http_client.list_timelines(*tenant_shard_id).await?)
}
pub async fn tenant_secondary_download(&self, tenant_id: &TenantShardId) -> anyhow::Result<()> {

View File

@@ -24,7 +24,9 @@ async fn get_lsns(
tenant_id: TenantId,
pageserver: &PageServerNode,
) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
let timelines = pageserver.timeline_list(&tenant_id).await?;
let timelines = pageserver
.timeline_list(&TenantShardId::unsharded(tenant_id))
.await?;
Ok(timelines
.into_iter()
.map(|t| (t.timeline_id, t.last_record_lsn))
@@ -120,7 +122,9 @@ pub async fn migrate_tenant(
.attach_hook(tenant_id, dest_ps.conf.id)
.await?;
let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
dest_ps.location_config(tenant_id, dest_conf, None).await?;
dest_ps
.location_config(TenantShardId::unsharded(tenant_id), dest_conf, None)
.await?;
println!("✅ Migration complete");
return Ok(());
}
@@ -130,7 +134,11 @@ pub async fn migrate_tenant(
let stale_conf =
build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
origin_ps
.location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))
.location_config(
TenantShardId::unsharded(tenant_id),
stale_conf,
Some(Duration::from_secs(10)),
)
.await?;
baseline_lsns = Some(get_lsns(tenant_id, &origin_ps).await?);
@@ -156,7 +164,9 @@ pub async fn migrate_tenant(
let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
dest_ps.location_config(tenant_id, dest_conf, None).await?;
dest_ps
.location_config(TenantShardId::unsharded(tenant_id), dest_conf, None)
.await?;
if let Some(baseline) = baseline_lsns {
println!("🕑 Waiting for LSN to catch up...");
@@ -203,7 +213,7 @@ pub async fn migrate_tenant(
other_ps.conf.id
);
other_ps
.location_config(tenant_id, secondary_conf, None)
.location_config(TenantShardId::unsharded(tenant_id), secondary_conf, None)
.await?;
}
@@ -212,7 +222,9 @@ pub async fn migrate_tenant(
dest_ps.conf.id
);
let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
dest_ps.location_config(tenant_id, dest_conf, None).await?;
dest_ps
.location_config(TenantShardId::unsharded(tenant_id), dest_conf, None)
.await?;
println!("✅ Migration complete");

View File

@@ -75,6 +75,10 @@ pub struct ComputeSpec {
pub remote_extensions: Option<RemoteExtSpec>,
pub pgbouncer_settings: Option<HashMap<String, String>>,
// Stripe size for pageserver sharding, in pages
#[serde(default)]
pub shard_stripe_size: Option<usize>,
}
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.

View File

@@ -18,7 +18,10 @@ use utils::{
lsn::Lsn,
};
use crate::{reltag::RelTag, shard::TenantShardId};
use crate::{
reltag::RelTag,
shard::{ShardCount, ShardStripeSize, TenantShardId},
};
use anyhow::bail;
use bytes::{Buf, BufMut, Bytes, BytesMut};
@@ -188,6 +191,31 @@ pub struct TimelineCreateRequest {
pub pg_version: Option<u32>,
}
/// Parameters that apply to all shards in a tenant. Used during tenant creation.
#[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct ShardParameters {
pub count: ShardCount,
pub stripe_size: ShardStripeSize,
}
impl ShardParameters {
pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
pub fn is_unsharded(&self) -> bool {
self.count == ShardCount(0)
}
}
impl Default for ShardParameters {
fn default() -> Self {
Self {
count: ShardCount(0),
stripe_size: Self::DEFAULT_STRIPE_SIZE,
}
}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct TenantCreateRequest {
@@ -195,6 +223,12 @@ pub struct TenantCreateRequest {
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub generation: Option<u32>,
// If omitted, create a single shard with TenantShardId::unsharded()
#[serde(default)]
#[serde(skip_serializing_if = "ShardParameters::is_unsharded")]
pub shard_parameters: ShardParameters,
#[serde(flatten)]
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
}
@@ -297,7 +331,7 @@ pub struct StatusResponse {
#[derive(Serialize, Deserialize, Debug)]
#[serde(deny_unknown_fields)]
pub struct TenantLocationConfigRequest {
pub tenant_id: TenantId,
pub tenant_id: TenantShardId,
#[serde(flatten)]
pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
}
@@ -660,6 +694,17 @@ pub struct PagestreamDbSizeResponse {
pub db_size: i64,
}
// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
// that require pageserver-internal types. It is sufficient to get the total size.
#[derive(Serialize, Deserialize, Debug)]
pub struct TenantHistorySize {
pub id: TenantId,
/// Size is a mixture of WAL and logical size, so the unit is bytes.
///
/// Will be none if `?inputs_only=true` was given.
pub size: Option<u64>,
}
impl PagestreamFeMessage {
pub fn serialize(&self) -> Bytes {
let mut bytes = BytesMut::new();

View File

@@ -1,6 +1,9 @@
use std::{ops::RangeInclusive, str::FromStr};
use crate::key::{is_rel_block_key, Key};
use crate::{
key::{is_rel_block_key, Key},
models::ShardParameters,
};
use hex::FromHex;
use serde::{Deserialize, Serialize};
use thiserror;
@@ -403,6 +406,17 @@ impl ShardIdentity {
}
}
/// For use when creating ShardIdentity instances for new shards, where a creation request
/// specifies the ShardParameters that apply to all shards.
pub fn from_params(number: ShardNumber, params: &ShardParameters) -> Self {
Self {
number,
count: params.count,
layout: LAYOUT_V1,
stripe_size: params.stripe_size,
}
}
fn is_broken(&self) -> bool {
self.layout == LAYOUT_BROKEN
}

View File

@@ -1,5 +1,5 @@
use pageserver_api::{models::*, shard::TenantShardId};
use reqwest::{IntoUrl, Method};
use reqwest::{IntoUrl, Method, StatusCode};
use utils::{
http::error::HttpErrorBody,
id::{TenantId, TimelineId},
@@ -22,8 +22,8 @@ pub enum Error {
#[error("receive error body: {0}")]
ReceiveErrorBody(String),
#[error("pageserver API: {0}")]
ApiError(String),
#[error("pageserver API: {1}")]
ApiError(StatusCode, String),
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -41,7 +41,7 @@ impl ResponseErrorMessageExt for reqwest::Response {
let url = self.url().to_owned();
Err(match self.json::<HttpErrorBody>().await {
Ok(HttpErrorBody { msg }) => Error::ApiError(msg),
Ok(HttpErrorBody { msg }) => Error::ApiError(status, msg),
Err(_) => {
Error::ReceiveErrorBody(format!("Http error ({}) at {}.", status.as_u16(), url))
}
@@ -71,9 +71,9 @@ impl Client {
pub async fn tenant_details(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
) -> Result<pageserver_api::models::TenantDetails> {
let uri = format!("{}/v1/tenant/{tenant_id}", self.mgmt_api_endpoint);
let uri = format!("{}/v1/tenant/{tenant_shard_id}", self.mgmt_api_endpoint);
self.get(uri)
.await?
.json()
@@ -83,9 +83,12 @@ impl Client {
pub async fn list_timelines(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
) -> Result<Vec<pageserver_api::models::TimelineInfo>> {
let uri = format!("{}/v1/tenant/{tenant_id}/timeline", self.mgmt_api_endpoint);
let uri = format!(
"{}/v1/tenant/{tenant_shard_id}/timeline",
self.mgmt_api_endpoint
);
self.get(&uri)
.await?
.json()
@@ -179,23 +182,23 @@ impl Client {
"{}/v1/tenant/{}/secondary/download",
self.mgmt_api_endpoint, tenant_id
);
self.request(Method::POST, &uri, ())
.await?
.error_for_status()
.map(|_| ())
.map_err(|e| Error::ApiError(format!("{}", e)))
self.request(Method::POST, &uri, ()).await?;
Ok(())
}
pub async fn location_config(
&self,
tenant_id: TenantId,
tenant_shard_id: TenantShardId,
config: LocationConfig,
flush_ms: Option<std::time::Duration>,
) -> Result<()> {
let req_body = TenantLocationConfigRequest { tenant_id, config };
let req_body = TenantLocationConfigRequest {
tenant_id: tenant_shard_id,
config,
};
let path = format!(
"{}/v1/tenant/{}/location_config",
self.mgmt_api_endpoint, tenant_id
self.mgmt_api_endpoint, tenant_shard_id
);
let path = if let Some(flush_ms) = flush_ms {
format!("{}?flush_ms={}", path, flush_ms.as_millis())
@@ -233,4 +236,19 @@ impl Client {
.await
.map_err(Error::ReceiveBody)
}
pub async fn tenant_synthetic_size(
&self,
tenant_shard_id: TenantShardId,
) -> Result<TenantHistorySize> {
let uri = format!(
"{}/v1/tenant/{}/synthetic_size",
self.mgmt_api_endpoint, tenant_shard_id
);
self.get(&uri)
.await?
.json()
.await
.map_err(Error::ReceiveBody)
}
}

View File

@@ -2,6 +2,7 @@
use std::sync::Arc;
use pageserver_api::shard::TenantShardId;
use tokio::task::JoinSet;
use utils::id::{TenantId, TenantTimelineId};
@@ -31,7 +32,10 @@ pub async fn get_pageserver_tenant_timelines_unsharded(
async move {
(
tenant_id,
mgmt_api_client.tenant_details(tenant_id).await.unwrap(),
mgmt_api_client
.tenant_details(TenantShardId::unsharded(tenant_id))
.await
.unwrap(),
)
}
});

View File

@@ -14,6 +14,7 @@ use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TenantDetails;
use pageserver_api::models::TenantState;
use pageserver_api::models::{
@@ -265,7 +266,7 @@ impl From<SetNewTenantConfigError> for ApiError {
SetNewTenantConfigError::GetTenant(tid) => {
ApiError::NotFound(anyhow!("tenant {}", tid).into())
}
e @ SetNewTenantConfigError::Persist(_) => {
e @ (SetNewTenantConfigError::Persist(_) | SetNewTenantConfigError::Other(_)) => {
ApiError::InternalServerError(anyhow::Error::new(e))
}
}
@@ -704,7 +705,9 @@ async fn tenant_attach_handler(
}
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
let location_conf = LocationConf::attached_single(tenant_conf, generation);
let shard_params = ShardParameters::default();
let location_conf = LocationConf::attached_single(tenant_conf, generation, &shard_params);
let tenant = state
.tenant_manager
.upsert_location(
@@ -1194,7 +1197,8 @@ async fn tenant_create_handler(
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
let location_conf = LocationConf::attached_single(tenant_conf, generation);
let location_conf =
LocationConf::attached_single(tenant_conf, generation, &request_data.shard_parameters);
let new_tenant = state
.tenant_manager
@@ -1213,7 +1217,6 @@ async fn tenant_create_handler(
"Upsert succeeded but didn't return tenant!"
)));
};
// We created the tenant. Existing API semantics are that the tenant
// is Active when this function returns.
if let res @ Err(_) = new_tenant

View File

@@ -18,6 +18,7 @@ use enumset::EnumSet;
use futures::stream::FuturesUnordered;
use futures::FutureExt;
use futures::StreamExt;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TimelineState;
use pageserver_api::shard::ShardIdentity;
use pageserver_api::shard::TenantShardId;
@@ -2674,10 +2675,11 @@ impl Tenant {
}
}
// Legacy configs are implicitly in attached state
// Legacy configs are implicitly in attached state, and do not support sharding
Ok(LocationConf::attached_single(
tenant_conf,
Generation::none(),
&ShardParameters::default(),
))
} else {
// FIXME If the config file is not found, assume that we're attaching
@@ -3962,6 +3964,7 @@ pub(crate) mod harness {
AttachedTenantConf::try_from(LocationConf::attached_single(
TenantConfOpt::from(self.tenant_conf),
self.generation,
&ShardParameters::default(),
))
.unwrap(),
// This is a legacy/test code path: sharding isn't supported here.

View File

@@ -9,7 +9,7 @@
//! may lead to a data loss.
//!
use anyhow::bail;
use pageserver_api::models;
use pageserver_api::models::{self, ShardParameters};
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
use serde::de::IntoDeserializer;
use serde::{Deserialize, Serialize};
@@ -167,14 +167,17 @@ impl LocationConf {
/// For use when loading from a legacy configuration: presence of a tenant
/// implies it is in AttachmentMode::Single, which used to be the only
/// possible state. This function should eventually be removed.
pub(crate) fn attached_single(tenant_conf: TenantConfOpt, generation: Generation) -> Self {
pub(crate) fn attached_single(
tenant_conf: TenantConfOpt,
generation: Generation,
shard_params: &ShardParameters,
) -> Self {
Self {
mode: LocationMode::Attached(AttachedLocationConfig {
generation,
attach_mode: AttachmentMode::Single,
}),
// Legacy configuration loads are always from tenants created before sharding existed.
shard: ShardIdentity::unsharded(),
shard: ShardIdentity::from_params(ShardNumber(0), shard_params),
tenant_conf,
}
}

View File

@@ -3,7 +3,8 @@
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use pageserver_api::key::Key;
use pageserver_api::shard::{ShardIdentity, ShardNumber, TenantShardId};
use pageserver_api::models::ShardParameters;
use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId};
use rand::{distributions::Alphanumeric, Rng};
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
@@ -760,6 +761,8 @@ pub(crate) enum SetNewTenantConfigError {
GetTenant(#[from] GetTenantError),
#[error(transparent)]
Persist(anyhow::Error),
#[error(transparent)]
Other(anyhow::Error),
}
pub(crate) async fn set_new_tenant_config(
@@ -773,10 +776,21 @@ pub(crate) async fn set_new_tenant_config(
info!("configuring tenant {tenant_id}");
let tenant = get_tenant(tenant_shard_id, true)?;
if tenant.tenant_shard_id().shard_count > ShardCount(0) {
// Note that we use ShardParameters::default below.
return Err(SetNewTenantConfigError::Other(anyhow::anyhow!(
"This API may only be used on single-sharded tenants, use the /location_config API for sharded tenants"
)));
}
// This is a legacy API that only operates on attached tenants: the preferred
// API to use is the location_config/ endpoint, which lets the caller provide
// the full LocationConf.
let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
let location_conf = LocationConf::attached_single(
new_tenant_conf,
tenant.generation,
&ShardParameters::default(),
);
Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
.await