From 4cec95ba13cfa9cad5bfe112f749216f118af2bb Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 17 Jan 2024 13:34:51 +0000 Subject: [PATCH] pageserver: add list API for LocationConf (#6329) ## Problem The `/v1/tenant` listing API only applies to attached tenants. For an external service to implement a global reconciliation of its list of shards vs. what's on the pageserver, we need a full view of what's in TenantManager, including secondary tenant locations, and InProgress locations. Dependency of https://github.com/neondatabase/neon/pull/6251 ## Summary of changes - Add methods to Tenant and SecondaryTenant to reconstruct the LocationConf used to create them. - Add `GET /v1/location_config` API --- Cargo.lock | 1 + libs/pageserver_api/Cargo.toml | 1 + libs/pageserver_api/src/models.rs | 44 ++++++++++--- libs/pageserver_api/src/shard.rs | 2 +- pageserver/client/src/mgmt_api.rs | 9 +++ pageserver/src/config.rs | 3 +- pageserver/src/http/routes.rs | 28 ++++++++- pageserver/src/tenant.rs | 31 +++++++++- pageserver/src/tenant/config.rs | 61 +++++++++++-------- pageserver/src/tenant/mgr.rs | 23 ++++++- pageserver/src/tenant/secondary.rs | 46 +++++++++++++- pageserver/src/tenant/timeline.rs | 13 ++-- .../src/tenant/timeline/eviction_task.rs | 6 +- 13 files changed, 215 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c5c2523e29..d4ebdd3cc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3340,6 +3340,7 @@ dependencies = [ "const_format", "enum-map", "hex", + "humantime-serde", "postgres_ffi", "rand 0.8.5", "serde", diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index 4146597d8d..96c6c10d3e 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -19,6 +19,7 @@ strum.workspace = true strum_macros.workspace = true hex.workspace = true thiserror.workspace = true +humantime-serde.workspace = true workspace_hack.workspace = true diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 6c43399179..d423888c8b 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -4,7 +4,7 @@ use std::{ collections::HashMap, io::{BufRead, Read}, num::{NonZeroU64, NonZeroUsize}, - time::SystemTime, + time::{Duration, SystemTime}, }; use byteorder::{BigEndian, ReadBytesExt}; @@ -266,17 +266,37 @@ pub struct TenantConfig { pub lagging_wal_timeout: Option, pub max_lsn_wal_lag: Option, pub trace_read_requests: Option, - // We defer the parsing of the eviction_policy field to the request handler. - // Otherwise we'd have to move the types for eviction policy into this package. - // We might do that once the eviction feature has stabilizied. - // For now, this field is not even documented in the openapi_spec.yml. - pub eviction_policy: Option, + pub eviction_policy: Option, pub min_resident_size_override: Option, pub evictions_low_residence_duration_metric_threshold: Option, pub gc_feedback: Option, pub heatmap_period: Option, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind")] +pub enum EvictionPolicy { + NoEviction, + LayerAccessThreshold(EvictionPolicyLayerAccessThreshold), +} + +impl EvictionPolicy { + pub fn discriminant_str(&self) -> &'static str { + match self { + EvictionPolicy::NoEviction => "NoEviction", + EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct EvictionPolicyLayerAccessThreshold { + #[serde(with = "humantime_serde")] + pub period: Duration, + #[serde(with = "humantime_serde")] + pub threshold: Duration, +} + /// A flattened analog of a `pagesever::tenant::LocationMode`, which /// lists out all possible states (and the virtual "Detached" state) /// in a flat form rather than using rust-style enums. @@ -302,6 +322,8 @@ pub struct LocationConfig { /// If attaching, in what generation? #[serde(default)] pub generation: Option, + + // If requesting mode `Secondary`, configuration for that. #[serde(default)] pub secondary_conf: Option, @@ -314,11 +336,17 @@ pub struct LocationConfig { #[serde(default)] pub shard_stripe_size: u32, - // If requesting mode `Secondary`, configuration for that. - // Custom storage configuration for the tenant, if any + // This configuration only affects attached mode, but should be provided irrespective + // of the mode, as a secondary location might transition on startup if the response + // to the `/re-attach` control plane API requests it. pub tenant_conf: TenantConfig, } +#[derive(Serialize, Deserialize)] +pub struct LocationConfigListResponse { + pub tenant_shards: Vec<(TenantShardId, Option)>, +} + #[derive(Serialize, Deserialize)] #[serde(transparent)] pub struct TenantCreateResponse(pub TenantId); diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index ec4c6cd2fc..e27aad8156 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -342,7 +342,7 @@ const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8); pub struct ShardIdentity { pub number: ShardNumber, pub count: ShardCount, - stripe_size: ShardStripeSize, + pub stripe_size: ShardStripeSize, layout: ShardLayout, } diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index 2b7c26e509..a0b934e48d 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -209,6 +209,15 @@ impl Client { Ok(()) } + pub async fn list_location_config(&self) -> Result { + let path = format!("{}/v1/location_config", self.mgmt_api_endpoint); + self.request(Method::GET, &path, ()) + .await? + .json() + .await + .map_err(Error::ReceiveBody) + } + pub async fn timeline_create( &self, tenant_id: TenantId, diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 7c03dc1bdd..52277d7f24 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -1126,11 +1126,12 @@ mod tests { }; use camino_tempfile::{tempdir, Utf8TempDir}; + use pageserver_api::models::EvictionPolicy; use remote_storage::{RemoteStorageKind, S3Config}; use utils::serde_percent::Percent; use super::*; - use crate::{tenant::config::EvictionPolicy, DEFAULT_PG_VERSION}; + use crate::DEFAULT_PG_VERSION; const ALL_BASE_VALUES_TOML: &str = r#" # Initial configuration file created by 'pageserver --init' diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 07b0671537..0a18ac4af1 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -14,6 +14,7 @@ use hyper::header; use hyper::StatusCode; use hyper::{Body, Request, Response, Uri}; use metrics::launch_timestamp::LaunchTimestamp; +use pageserver_api::models::LocationConfigListResponse; use pageserver_api::models::ShardParameters; use pageserver_api::models::TenantDetails; use pageserver_api::models::TenantState; @@ -39,11 +40,11 @@ use crate::pgdatadir_mapping::LsnForTimestamp; use crate::task_mgr::TaskKind; use crate::tenant::config::{LocationConf, TenantConfOpt}; use crate::tenant::mgr::GetActiveTenantError; -use crate::tenant::mgr::UpsertLocationError; use crate::tenant::mgr::{ GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError, TenantSlotUpsertError, TenantStateError, }; +use crate::tenant::mgr::{TenantSlot, UpsertLocationError}; use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::LayerAccessStatsReset; @@ -1354,6 +1355,28 @@ async fn put_tenant_location_config_handler( json_response(StatusCode::OK, ()) } +async fn list_location_config_handler( + request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let state = get_state(&request); + let slots = state.tenant_manager.list(); + let result = LocationConfigListResponse { + tenant_shards: slots + .into_iter() + .map(|(tenant_shard_id, slot)| { + let v = match slot { + TenantSlot::Attached(t) => Some(t.get_location_conf()), + TenantSlot::Secondary(s) => Some(s.get_location_conf()), + TenantSlot::InProgress(_) => None, + }; + (tenant_shard_id, v) + }) + .collect(), + }; + json_response(StatusCode::OK, result) +} + /// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`]. async fn handle_tenant_break( r: Request, @@ -1896,6 +1919,9 @@ pub fn make_router( .put("/v1/tenant/:tenant_shard_id/location_config", |r| { api_handler(r, put_tenant_location_config_handler) }) + .get("/v1/location_config", |r| { + api_handler(r, list_location_config_handler) + }) .get("/v1/tenant/:tenant_shard_id/timeline", |r| { api_handler(r, timeline_list_handler) }) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 28e2426c45..5240cc217a 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -18,7 +18,7 @@ use enumset::EnumSet; use futures::stream::FuturesUnordered; use futures::FutureExt; use futures::StreamExt; -use pageserver_api::models::ShardParameters; +use pageserver_api::models; use pageserver_api::models::TimelineState; use pageserver_api::shard::ShardIdentity; use pageserver_api::shard::TenantShardId; @@ -2326,6 +2326,32 @@ impl Tenant { .clone() } + /// For API access: generate a LocationConfig equivalent to the one that would be used to + /// create a Tenant in the same state. Do not use this in hot paths: it's for relatively + /// rare external API calls, like a reconciliation at startup. + pub(crate) fn get_location_conf(&self) -> models::LocationConfig { + let conf = self.tenant_conf.read().unwrap(); + + let location_config_mode = match conf.location.attach_mode { + AttachmentMode::Single => models::LocationConfigMode::AttachedSingle, + AttachmentMode::Multi => models::LocationConfigMode::AttachedMulti, + AttachmentMode::Stale => models::LocationConfigMode::AttachedStale, + }; + + // We have a pageserver TenantConf, we need the API-facing TenantConfig. + let tenant_config: models::TenantConfig = conf.tenant_conf.into(); + + models::LocationConfig { + mode: location_config_mode, + generation: self.generation.into(), + secondary_conf: None, + shard_number: self.shard_identity.number.0, + shard_count: self.shard_identity.count.0, + shard_stripe_size: self.shard_identity.stripe_size.0, + tenant_conf: tenant_config, + } + } + pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId { &self.tenant_shard_id } @@ -2680,7 +2706,7 @@ impl Tenant { Ok(LocationConf::attached_single( tenant_conf, Generation::none(), - &ShardParameters::default(), + &models::ShardParameters::default(), )) } else { // FIXME If the config file is not found, assume that we're attaching @@ -3793,6 +3819,7 @@ pub(crate) mod harness { use bytes::{Bytes, BytesMut}; use camino::Utf8PathBuf; use once_cell::sync::OnceCell; + use pageserver_api::models::ShardParameters; use pageserver_api::shard::ShardIndex; use std::fs; use std::sync::Arc; diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index fa5e4af13e..c44164c12d 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -9,7 +9,8 @@ //! may lead to a data loss. //! use anyhow::bail; -use pageserver_api::models::{self, ShardParameters}; +use pageserver_api::models; +use pageserver_api::models::EvictionPolicy; use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize}; use serde::de::IntoDeserializer; use serde::{Deserialize, Serialize}; @@ -170,7 +171,7 @@ impl LocationConf { pub(crate) fn attached_single( tenant_conf: TenantConfOpt, generation: Generation, - shard_params: &ShardParameters, + shard_params: &models::ShardParameters, ) -> Self { Self { mode: LocationMode::Attached(AttachedLocationConfig { @@ -431,30 +432,6 @@ pub struct TenantConfOpt { pub heatmap_period: Option, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(tag = "kind")] -pub enum EvictionPolicy { - NoEviction, - LayerAccessThreshold(EvictionPolicyLayerAccessThreshold), -} - -impl EvictionPolicy { - pub fn discriminant_str(&self) -> &'static str { - match self { - EvictionPolicy::NoEviction => "NoEviction", - EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold", - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -pub struct EvictionPolicyLayerAccessThreshold { - #[serde(with = "humantime_serde")] - pub period: Duration, - #[serde(with = "humantime_serde")] - pub threshold: Duration, -} - impl TenantConfOpt { pub fn merge(&self, global_conf: TenantConf) -> TenantConf { TenantConf { @@ -579,6 +556,38 @@ impl TryFrom for TenantConfOpt { } } +/// This is a conversion from our internal tenant config object to the one used +/// in external APIs. +impl From for models::TenantConfig { + fn from(value: TenantConfOpt) -> Self { + fn humantime(d: Duration) -> String { + format!("{}s", d.as_secs()) + } + Self { + checkpoint_distance: value.checkpoint_distance, + checkpoint_timeout: value.checkpoint_timeout.map(humantime), + compaction_target_size: value.compaction_target_size, + compaction_period: value.compaction_period.map(humantime), + compaction_threshold: value.compaction_threshold, + gc_horizon: value.gc_horizon, + gc_period: value.gc_period.map(humantime), + image_creation_threshold: value.image_creation_threshold, + pitr_interval: value.pitr_interval.map(humantime), + walreceiver_connect_timeout: value.walreceiver_connect_timeout.map(humantime), + lagging_wal_timeout: value.lagging_wal_timeout.map(humantime), + max_lsn_wal_lag: value.max_lsn_wal_lag, + trace_read_requests: value.trace_read_requests, + eviction_policy: value.eviction_policy, + min_resident_size_override: value.min_resident_size_override, + evictions_low_residence_duration_metric_threshold: value + .evictions_low_residence_duration_metric_threshold + .map(humantime), + gc_feedback: value.gc_feedback, + heatmap_period: value.heatmap_period.map(humantime), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 997d76ff17..4b21fb9a9c 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -57,6 +57,7 @@ use super::TenantSharedResources; /// that way we avoid having to carefully switch a tenant's ingestion etc on and off during /// its lifetime, and we can preserve some important safety invariants like `Tenant` always /// having a properly acquired generation (Secondary doesn't need a generation) +#[derive(Clone)] pub(crate) enum TenantSlot { Attached(Arc), Secondary(Arc), @@ -477,6 +478,8 @@ pub async fn init_tenant_mgr( tenant_shard_id, TenantSlot::Secondary(SecondaryTenant::new( tenant_shard_id, + location_conf.shard, + location_conf.tenant_conf, secondary_config, )), ); @@ -921,6 +924,7 @@ impl TenantManager { Some(TenantSlot::Secondary(secondary_tenant)), ) => { secondary_tenant.set_config(secondary_conf); + secondary_tenant.set_tenant_conf(&new_location_config.tenant_conf); Some(FastPathModified::Secondary(secondary_tenant.clone())) } _ => { @@ -1053,7 +1057,13 @@ impl TenantManager { let new_slot = match &new_location_config.mode { LocationMode::Secondary(secondary_config) => { - TenantSlot::Secondary(SecondaryTenant::new(tenant_shard_id, secondary_config)) + let shard_identity = new_location_config.shard; + TenantSlot::Secondary(SecondaryTenant::new( + tenant_shard_id, + shard_identity, + new_location_config.tenant_conf, + secondary_config, + )) } LocationMode::Attached(_attach_config) => { let shard_identity = new_location_config.shard; @@ -1203,6 +1213,17 @@ impl TenantManager { } } + /// Total list of all tenant slots: this includes attached, secondary, and InProgress. + pub(crate) fn list(&self) -> Vec<(TenantShardId, TenantSlot)> { + let locked = self.tenants.read().unwrap(); + match &*locked { + TenantsMap::Initializing => Vec::new(), + TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => { + map.iter().map(|(k, v)| (*k, v.clone())).collect() + } + } + } + pub(crate) async fn delete_tenant( &self, tenant_shard_id: TenantShardId, diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 4a13814154..d00d901be6 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -18,11 +18,16 @@ use self::{ }; use super::{ - config::SecondaryLocationConfig, mgr::TenantManager, - span::debug_assert_current_span_has_tenant_id, storage_layer::LayerFileName, + config::{SecondaryLocationConfig, TenantConfOpt}, + mgr::TenantManager, + span::debug_assert_current_span_has_tenant_id, + storage_layer::LayerFileName, }; -use pageserver_api::shard::TenantShardId; +use pageserver_api::{ + models, + shard::{ShardIdentity, TenantShardId}, +}; use remote_storage::GenericRemoteStorage; use tokio_util::sync::CancellationToken; @@ -84,12 +89,20 @@ pub(crate) struct SecondaryTenant { pub(crate) gate: Gate, + // Secondary mode does not need the full shard identity or the TenantConfOpt. However, + // storing these enables us to report our full LocationConf, enabling convenient reconciliation + // by the control plane (see [`Self::get_location_conf`]) + shard_identity: ShardIdentity, + tenant_conf: std::sync::Mutex, + detail: std::sync::Mutex, } impl SecondaryTenant { pub(crate) fn new( tenant_shard_id: TenantShardId, + shard_identity: ShardIdentity, + tenant_conf: TenantConfOpt, config: &SecondaryLocationConfig, ) -> Arc { Arc::new(Self { @@ -101,6 +114,9 @@ impl SecondaryTenant { cancel: CancellationToken::new(), gate: Gate::new(format!("SecondaryTenant {tenant_shard_id}")), + shard_identity, + tenant_conf: std::sync::Mutex::new(tenant_conf), + detail: std::sync::Mutex::new(SecondaryDetail::new(config.clone())), }) } @@ -116,6 +132,30 @@ impl SecondaryTenant { self.detail.lock().unwrap().config = config.clone(); } + pub(crate) fn set_tenant_conf(&self, config: &TenantConfOpt) { + *(self.tenant_conf.lock().unwrap()) = *config; + } + + /// For API access: generate a LocationConfig equivalent to the one that would be used to + /// create a Tenant in the same state. Do not use this in hot paths: it's for relatively + /// rare external API calls, like a reconciliation at startup. + pub(crate) fn get_location_conf(&self) -> models::LocationConfig { + let conf = self.detail.lock().unwrap().config.clone(); + + let conf = models::LocationConfigSecondary { warm: conf.warm }; + + let tenant_conf = *self.tenant_conf.lock().unwrap(); + models::LocationConfig { + mode: models::LocationConfigMode::Secondary, + generation: None, + secondary_conf: Some(conf), + shard_number: self.tenant_shard_id.shard_number.0, + shard_count: self.tenant_shard_id.shard_count.0, + shard_stripe_size: self.shard_identity.stripe_size.0, + tenant_conf: tenant_conf.into(), + } + } + pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId { &self.tenant_shard_id } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 52b2de843d..6b1487259f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -15,9 +15,10 @@ use fail::fail_point; use itertools::Itertools; use pageserver_api::{ models::{ - DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, LayerMapInfo, - TimelineState, + DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, + LayerMapInfo, TimelineState, }, + reltag::BlockNumber, shard::{ShardIdentity, TenantShardId}, }; use rand::Rng; @@ -42,7 +43,6 @@ use std::{ ops::ControlFlow, }; -use crate::tenant::tasks::BackgroundLoopKind; use crate::tenant::timeline::logical_size::CurrentLogicalSize; use crate::tenant::{ layer_map::{LayerMap, SearchResult}, @@ -65,16 +65,17 @@ use crate::{ use crate::{ disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry, }; +use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind}; use crate::config::PageServerConf; use crate::keyspace::{KeyPartitioning, KeySpace, KeySpaceRandomAccum}; use crate::metrics::{ TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT, }; +use crate::pgdatadir_mapping::CalculateLogicalSizeError; use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key}; -use crate::pgdatadir_mapping::{CalculateLogicalSizeError, LsnForTimestamp}; -use crate::tenant::config::{EvictionPolicy, TenantConfOpt}; -use pageserver_api::reltag::{BlockNumber, RelTag}; +use crate::tenant::config::TenantConfOpt; +use pageserver_api::reltag::RelTag; use pageserver_api::shard::ShardIndex; use postgres_connection::PgConnectionConfig; diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 86e36189d2..01a5bfc32b 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -20,6 +20,7 @@ use std::{ time::{Duration, SystemTime}, }; +use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold}; use tokio::time::Instant; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, info_span, instrument, warn, Instrument}; @@ -29,10 +30,7 @@ use crate::{ pgdatadir_mapping::CollectKeySpaceError, task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}, tenant::{ - config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold}, - tasks::BackgroundLoopKind, - timeline::EvictionError, - LogicalSizeCalculationCause, Tenant, + tasks::BackgroundLoopKind, timeline::EvictionError, LogicalSizeCalculationCause, Tenant, }, };