From 1ab0cfc8cb4800e30024a2e271f90f08823d0507 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 28 Nov 2023 13:14:51 +0000 Subject: [PATCH] pageserver: add sharding metadata to `LocationConf` (#5932) ## Problem The TenantShardId in API URLs is sufficient to uniquely identify a tenant shard, but not for it to function: it also needs to know its full sharding configuration (stripe size, layout version) in order to map keys to shards. ## Summary of changes - Introduce ShardIdentity: this is the superset of ShardIndex (#5924 ) that is required for translating keys to shard numbers. - Include ShardIdentity as an optional attribute of LocationConf - Extend the public `LocationConfig` API structure with a flat representation of shard attributes. The net result is that at the point we construct a `Tenant`, we have a `ShardIdentity` (inside LocationConf). This enables the next steps to actually use the ShardIdentity to split WAL and validate that page service requires are reaching the correct shard. --- Cargo.lock | 1 + control_plane/src/tenant_migration.rs | 57 +++++++------- libs/pageserver_api/Cargo.toml | 1 + libs/pageserver_api/src/models.rs | 12 ++- libs/pageserver_api/src/shard.rs | 106 ++++++++++++++++++++++++++ pageserver/src/tenant/config.rs | 29 ++++++- 6 files changed, 172 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16fcd0c4c9..551866e34a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3011,6 +3011,7 @@ dependencies = [ "serde_with", "strum", "strum_macros", + "thiserror", "utils", "workspace_hack", ] diff --git a/control_plane/src/tenant_migration.rs b/control_plane/src/tenant_migration.rs index d28d1f9fe8..42780db85c 100644 --- a/control_plane/src/tenant_migration.rs +++ b/control_plane/src/tenant_migration.rs @@ -14,7 +14,6 @@ use pageserver_api::models::{ use std::collections::HashMap; use std::time::Duration; use utils::{ - generation::Generation, id::{TenantId, TimelineId}, lsn::Lsn, }; @@ -93,6 +92,22 @@ pub fn migrate_tenant( // Get a new generation let attachment_service = AttachmentService::from_env(env); + fn build_location_config( + mode: LocationConfigMode, + generation: Option, + secondary_conf: Option, + ) -> LocationConfig { + LocationConfig { + mode, + generation, + secondary_conf, + tenant_conf: TenantConfig::default(), + shard_number: 0, + shard_count: 0, + shard_stripe_size: 0, + } + } + let previous = attachment_service.inspect(tenant_id)?; let mut baseline_lsns = None; if let Some((generation, origin_ps_id)) = &previous { @@ -101,12 +116,7 @@ pub fn migrate_tenant( if origin_ps_id == &dest_ps.conf.id { println!("🔁 Already attached to {origin_ps_id}, freshening..."); let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?; - let dest_conf = LocationConfig { - mode: LocationConfigMode::AttachedSingle, - generation: gen.map(Generation::new), - secondary_conf: None, - tenant_conf: TenantConfig::default(), - }; + let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None); dest_ps.location_config(tenant_id, dest_conf)?; println!("✅ Migration complete"); return Ok(()); @@ -114,24 +124,15 @@ pub fn migrate_tenant( println!("🔁 Switching origin pageserver {origin_ps_id} to stale mode"); - let stale_conf = LocationConfig { - mode: LocationConfigMode::AttachedStale, - generation: Some(Generation::new(*generation)), - secondary_conf: None, - tenant_conf: TenantConfig::default(), - }; + let stale_conf = + build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None); origin_ps.location_config(tenant_id, stale_conf)?; baseline_lsns = Some(get_lsns(tenant_id, &origin_ps)?); } let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?; - let dest_conf = LocationConfig { - mode: LocationConfigMode::AttachedMulti, - generation: gen.map(Generation::new), - secondary_conf: None, - tenant_conf: TenantConfig::default(), - }; + let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None); println!("🔁 Attaching to pageserver {}", dest_ps.conf.id); dest_ps.location_config(tenant_id, dest_conf)?; @@ -170,12 +171,11 @@ pub fn migrate_tenant( } // Downgrade to a secondary location - let secondary_conf = LocationConfig { - mode: LocationConfigMode::Secondary, - generation: None, - secondary_conf: Some(LocationConfigSecondary { warm: true }), - tenant_conf: TenantConfig::default(), - }; + let secondary_conf = build_location_config( + LocationConfigMode::Secondary, + None, + Some(LocationConfigSecondary { warm: true }), + ); println!( "💤 Switching to secondary mode on pageserver {}", @@ -188,12 +188,7 @@ pub fn migrate_tenant( "🔁 Switching to AttachedSingle mode on pageserver {}", dest_ps.conf.id ); - let dest_conf = LocationConfig { - mode: LocationConfigMode::AttachedSingle, - generation: gen.map(Generation::new), - secondary_conf: None, - tenant_conf: TenantConfig::default(), - }; + let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None); dest_ps.location_config(tenant_id, dest_conf)?; println!("✅ Migration complete"); diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index df9796b039..4d08d78e87 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -18,6 +18,7 @@ enum-map.workspace = true strum.workspace = true strum_macros.workspace = true hex.workspace = true +thiserror.workspace = true workspace_hack.workspace = true diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 71e32e479f..c792a5eff7 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -10,7 +10,6 @@ use serde_with::serde_as; use strum_macros; use utils::{ completion, - generation::Generation, history_buffer::HistoryBufferWithDropCounter, id::{NodeId, TenantId, TimelineId}, lsn::Lsn, @@ -262,10 +261,19 @@ pub struct LocationConfig { pub mode: LocationConfigMode, /// If attaching, in what generation? #[serde(default)] - pub generation: Option, + pub generation: Option, #[serde(default)] pub secondary_conf: Option, + // Shard parameters: if shard_count is nonzero, then other shard_* fields + // must be set accurately. + #[serde(default)] + pub shard_number: u8, + #[serde(default)] + pub shard_count: u8, + #[serde(default)] + pub shard_stripe_size: u32, + // If requesting mode `Secondary`, configuration for that. // Custom storage configuration for the tenant, if any pub tenant_conf: TenantConfig, diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index 16f2d13770..688b911425 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -2,6 +2,7 @@ use std::{ops::RangeInclusive, str::FromStr}; use hex::FromHex; use serde::{Deserialize, Serialize}; +use thiserror; use utils::id::TenantId; #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug)] @@ -292,6 +293,80 @@ impl<'de> Deserialize<'de> for TenantShardId { } } +/// Stripe size in number of pages +#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)] +pub struct ShardStripeSize(pub u32); + +/// Layout version: for future upgrades where we might change how the key->shard mapping works +#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)] +pub struct ShardLayout(u8); + +const LAYOUT_V1: ShardLayout = ShardLayout(1); + +/// Default stripe size in pages: 256MiB divided by 8kiB page size. +const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8); + +/// The ShardIdentity contains the information needed for one member of map +/// to resolve a key to a shard, and then check whether that shard is ==self. +#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)] +pub struct ShardIdentity { + pub layout: ShardLayout, + pub number: ShardNumber, + pub count: ShardCount, + pub stripe_size: ShardStripeSize, +} + +#[derive(thiserror::Error, Debug, PartialEq, Eq)] +pub enum ShardConfigError { + #[error("Invalid shard count")] + InvalidCount, + #[error("Invalid shard number")] + InvalidNumber, + #[error("Invalid stripe size")] + InvalidStripeSize, +} + +impl ShardIdentity { + /// An identity with number=0 count=0 is a "none" identity, which represents legacy + /// tenants. Modern single-shard tenants should not use this: they should + /// have number=0 count=1. + pub fn unsharded() -> Self { + Self { + number: ShardNumber(0), + count: ShardCount(0), + layout: LAYOUT_V1, + stripe_size: DEFAULT_STRIPE_SIZE, + } + } + + pub fn is_unsharded(&self) -> bool { + self.number == ShardNumber(0) && self.count == ShardCount(0) + } + + /// Count must be nonzero, and number must be < count. To construct + /// the legacy case (count==0), use Self::unsharded instead. + pub fn new( + number: ShardNumber, + count: ShardCount, + stripe_size: ShardStripeSize, + ) -> Result { + if count.0 == 0 { + Err(ShardConfigError::InvalidCount) + } else if number.0 > count.0 - 1 { + Err(ShardConfigError::InvalidNumber) + } else if stripe_size.0 == 0 { + Err(ShardConfigError::InvalidStripeSize) + } else { + Ok(Self { + number, + count, + layout: LAYOUT_V1, + stripe_size, + }) + } + } +} + impl Serialize for ShardIndex { fn serialize(&self, serializer: S) -> Result where @@ -473,6 +548,37 @@ mod tests { Ok(()) } + #[test] + fn shard_identity_validation() -> Result<(), ShardConfigError> { + // Happy cases + ShardIdentity::new(ShardNumber(0), ShardCount(1), DEFAULT_STRIPE_SIZE)?; + ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(1))?; + ShardIdentity::new(ShardNumber(254), ShardCount(255), ShardStripeSize(1))?; + + assert_eq!( + ShardIdentity::new(ShardNumber(0), ShardCount(0), DEFAULT_STRIPE_SIZE), + Err(ShardConfigError::InvalidCount) + ); + assert_eq!( + ShardIdentity::new(ShardNumber(10), ShardCount(10), DEFAULT_STRIPE_SIZE), + Err(ShardConfigError::InvalidNumber) + ); + assert_eq!( + ShardIdentity::new(ShardNumber(11), ShardCount(10), DEFAULT_STRIPE_SIZE), + Err(ShardConfigError::InvalidNumber) + ); + assert_eq!( + ShardIdentity::new(ShardNumber(255), ShardCount(255), DEFAULT_STRIPE_SIZE), + Err(ShardConfigError::InvalidNumber) + ); + assert_eq!( + ShardIdentity::new(ShardNumber(0), ShardCount(1), ShardStripeSize(0)), + Err(ShardConfigError::InvalidStripeSize) + ); + + Ok(()) + } + #[test] fn shard_index_human_encoding() -> Result<(), hex::FromHexError> { let example = ShardIndex { diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 5f8c7f6c59..4ad6a71f67 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -10,6 +10,7 @@ //! use anyhow::Context; use pageserver_api::models; +use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize}; use serde::{Deserialize, Serialize}; use std::num::NonZeroU64; use std::time::Duration; @@ -88,6 +89,14 @@ pub(crate) struct LocationConf { /// The location-specific part of the configuration, describes the operating /// mode of this pageserver for this tenant. pub(crate) mode: LocationMode, + + /// The detailed shard identity. This structure is already scoped within + /// a TenantShardId, but we need the full ShardIdentity to enable calculating + /// key->shard mappings. + #[serde(default = "ShardIdentity::unsharded")] + #[serde(skip_serializing_if = "ShardIdentity::is_unsharded")] + pub(crate) shard: ShardIdentity, + /// The pan-cluster tenant configuration, the same on all locations pub(crate) tenant_conf: TenantConfOpt, } @@ -160,6 +169,8 @@ impl LocationConf { generation, attach_mode: AttachmentMode::Single, }), + // Legacy configuration loads are always from tenants created before sharding existed. + shard: ShardIdentity::unsharded(), tenant_conf, } } @@ -187,6 +198,7 @@ impl LocationConf { fn get_generation(conf: &'_ models::LocationConfig) -> Result { conf.generation + .map(Generation::new) .ok_or_else(|| anyhow::anyhow!("Generation must be set when attaching")) } @@ -226,7 +238,21 @@ impl LocationConf { } }; - Ok(Self { mode, tenant_conf }) + let shard = if conf.shard_count == 0 { + ShardIdentity::unsharded() + } else { + ShardIdentity::new( + ShardNumber(conf.shard_number), + ShardCount(conf.shard_count), + ShardStripeSize(conf.shard_stripe_size), + )? + }; + + Ok(Self { + shard, + mode, + tenant_conf, + }) } } @@ -241,6 +267,7 @@ impl Default for LocationConf { attach_mode: AttachmentMode::Single, }), tenant_conf: TenantConfOpt::default(), + shard: ShardIdentity::unsharded(), } } }