From 89cf714890237862eb3fd52f473e4dbe15cd6e4a Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 12 Mar 2024 11:36:27 +0000 Subject: [PATCH] tests/neon_local: rename "attachment service" -> "storage controller" (#7087) Not a user-facing change, but can break any existing `.neon` directories created by neon_local, as the name of the database used by the storage controller changes. This PR changes all the locations apart from the path of `control_plane/attachment_service` (waiting for an opportune moment to do that one, because it's the most conflict-ish wrt ongoing PRs like #6676 ) --- Makefile | 2 +- control_plane/attachment_service/src/http.rs | 2 +- control_plane/attachment_service/src/main.rs | 6 - .../attachment_service/src/persistence.rs | 4 +- .../attachment_service/src/service.rs | 4 +- control_plane/src/bin/neon_local.rs | 86 +++++----- control_plane/src/endpoint.rs | 10 +- control_plane/src/lib.rs | 2 +- control_plane/src/local_env.rs | 12 +- control_plane/src/pageserver.rs | 8 +- ...hment_service.rs => storage_controller.rs} | 38 ++--- docs/authentication.md | 4 +- libs/pageserver_api/src/controller_api.rs | 2 - test_runner/fixtures/neon_fixtures.py | 108 ++++++------ .../fixtures/pageserver/many_tenants.py | 2 +- .../interactive/test_many_small_tenants.py | 2 +- .../pagebench/test_large_slru_basebackup.py | 2 +- ...er_max_throughput_getpage_at_latest_lsn.py | 2 +- test_runner/performance/test_bulk_insert.py | 4 +- .../regress/test_attach_tenant_config.py | 2 +- test_runner/regress/test_change_pageserver.py | 8 +- test_runner/regress/test_compatibility.py | 2 +- .../regress/test_layers_from_future.py | 2 +- test_runner/regress/test_neon_cli.py | 4 +- test_runner/regress/test_pageserver_api.py | 2 +- .../regress/test_pageserver_generations.py | 14 +- .../regress/test_pageserver_secondary.py | 10 +- test_runner/regress/test_remote_storage.py | 4 +- test_runner/regress/test_s3_restore.py | 4 +- test_runner/regress/test_sharding.py | 30 ++-- test_runner/regress/test_sharding_service.py | 156 +++++++++--------- test_runner/regress/test_timeline_size.py | 4 +- 32 files changed, 267 insertions(+), 275 deletions(-) rename control_plane/src/{attachment_service.rs => storage_controller.rs} (94%) diff --git a/Makefile b/Makefile index ea782cb369..f13f080f1a 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS)) CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+) # Force cargo not to print progress bar CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1 -# Set PQ_LIB_DIR to make sure `attachment_service` get linked with bundled libpq (through diesel) +# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel) CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib # diff --git a/control_plane/attachment_service/src/http.rs b/control_plane/attachment_service/src/http.rs index 7e4030b221..27ba5bdb65 100644 --- a/control_plane/attachment_service/src/http.rs +++ b/control_plane/attachment_service/src/http.rs @@ -30,7 +30,7 @@ use pageserver_api::controller_api::{ }; use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; -use control_plane::attachment_service::{AttachHookRequest, InspectRequest}; +use control_plane::storage_controller::{AttachHookRequest, InspectRequest}; /// State available to HTTP request handlers #[derive(Clone)] diff --git a/control_plane/attachment_service/src/main.rs b/control_plane/attachment_service/src/main.rs index d9acbc0abd..333c3911e3 100644 --- a/control_plane/attachment_service/src/main.rs +++ b/control_plane/attachment_service/src/main.rs @@ -1,9 +1,3 @@ -/// The attachment service mimics the aspects of the control plane API -/// that are required for a pageserver to operate. -/// -/// This enables running & testing pageservers without a full-blown -/// deployment of the Neon cloud platform. -/// use anyhow::{anyhow, Context}; use attachment_service::http::make_router; use attachment_service::metrics::preinitialize_metrics; diff --git a/control_plane/attachment_service/src/persistence.rs b/control_plane/attachment_service/src/persistence.rs index d5c6d74ebe..aa08945834 100644 --- a/control_plane/attachment_service/src/persistence.rs +++ b/control_plane/attachment_service/src/persistence.rs @@ -20,7 +20,7 @@ use crate::node::Node; /// ## What do we store? /// -/// The attachment service does not store most of its state durably. +/// The storage controller service does not store most of its state durably. /// /// The essential things to store durably are: /// - generation numbers, as these must always advance monotonically to ensure data safety. @@ -34,7 +34,7 @@ use crate::node::Node; /// /// ## Performance/efficiency /// -/// The attachment service does not go via the database for most things: there are +/// The storage controller service does not go via the database for most things: there are /// a couple of places where we must, and where efficiency matters: /// - Incrementing generation numbers: the Reconciler has to wait for this to complete /// before it can attach a tenant, so this acts as a bound on how fast things like diff --git a/control_plane/attachment_service/src/service.rs b/control_plane/attachment_service/src/service.rs index f3d97c0dfb..3f245b5255 100644 --- a/control_plane/attachment_service/src/service.rs +++ b/control_plane/attachment_service/src/service.rs @@ -8,7 +8,7 @@ use std::{ }; use anyhow::Context; -use control_plane::attachment_service::{ +use control_plane::storage_controller::{ AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse, }; use diesel::result::DatabaseErrorKind; @@ -839,7 +839,7 @@ impl Service { tenant_state.generation = Some(new_generation); } else { // This is a detach notification. We must update placement policy to avoid re-attaching - // during background scheduling/reconciliation, or during attachment service restart. + // during background scheduling/reconciliation, or during storage controller restart. assert!(attach_req.node_id.is_none()); tenant_state.policy = PlacementPolicy::Detached; } diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 27abcb182a..86b9c0085d 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -8,11 +8,11 @@ use anyhow::{anyhow, bail, Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum}; use compute_api::spec::ComputeMode; -use control_plane::attachment_service::AttachmentService; use control_plane::endpoint::ComputeControlPlane; use control_plane::local_env::{InitForceMode, LocalEnv}; use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR}; use control_plane::safekeeper::SafekeeperNode; +use control_plane::storage_controller::StorageController; use control_plane::{broker, local_env}; use pageserver_api::controller_api::{ NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy, @@ -138,7 +138,7 @@ fn main() -> Result<()> { "start" => rt.block_on(handle_start_all(sub_args, &env)), "stop" => rt.block_on(handle_stop_all(sub_args, &env)), "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)), - "attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)), + "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)), "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)), "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)), "mappings" => handle_mappings(sub_args, &mut env), @@ -445,14 +445,14 @@ async fn handle_tenant( // If tenant ID was not specified, generate one let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate); - // We must register the tenant with the attachment service, so + // We must register the tenant with the storage controller, so // that when the pageserver restarts, it will be re-attached. - let attachment_service = AttachmentService::from_env(env); - attachment_service + let storage_controller = StorageController::from_env(env); + storage_controller .tenant_create(TenantCreateRequest { // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the - // attachment service expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest - // type is used both in attachment service (for creating tenants) and in pageserver (for creating shards) + // storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest + // type is used both in storage controller (for creating tenants) and in pageserver (for creating shards) new_tenant_id: TenantShardId::unsharded(tenant_id), generation: None, shard_parameters: ShardParameters { @@ -476,9 +476,9 @@ async fn handle_tenant( .context("Failed to parse postgres version from the argument string")?; // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have - // different shards picking different start lsns. Maybe we have to teach attachment service + // different shards picking different start lsns. Maybe we have to teach storage controller // to let shard 0 branch first and then propagate the chosen LSN to other shards. - attachment_service + storage_controller .tenant_timeline_create( tenant_id, TimelineCreateRequest { @@ -528,8 +528,8 @@ async fn handle_tenant( let new_pageserver = get_pageserver(env, matches)?; let new_pageserver_id = new_pageserver.conf.id; - let attachment_service = AttachmentService::from_env(env); - attachment_service + let storage_controller = StorageController::from_env(env); + storage_controller .tenant_migrate(tenant_shard_id, new_pageserver_id) .await?; @@ -543,8 +543,8 @@ async fn handle_tenant( let mut tenant_synthetic_size = None; - let attachment_service = AttachmentService::from_env(env); - for shard in attachment_service.tenant_locate(tenant_id).await?.shards { + let storage_controller = StorageController::from_env(env); + for shard in storage_controller.tenant_locate(tenant_id).await?.shards { let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?); @@ -586,8 +586,8 @@ async fn handle_tenant( let tenant_id = get_tenant_id(matches, env)?; let shard_count: u8 = matches.get_one::("shard-count").cloned().unwrap_or(0); - let attachment_service = AttachmentService::from_env(env); - let result = attachment_service + let storage_controller = StorageController::from_env(env); + let result = storage_controller .tenant_split(tenant_id, shard_count) .await?; println!( @@ -613,7 +613,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local match timeline_match.subcommand() { Some(("list", list_match)) => { - // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service + // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller // where shard 0 is attached, and query there. let tenant_shard_id = get_tenant_shard_id(list_match, env)?; let timelines = pageserver.timeline_list(&tenant_shard_id).await?; @@ -633,7 +633,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local let new_timeline_id_opt = parse_timeline_id(create_match)?; let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate()); - let attachment_service = AttachmentService::from_env(env); + let storage_controller = StorageController::from_env(env); let create_req = TimelineCreateRequest { new_timeline_id, ancestor_timeline_id: None, @@ -641,7 +641,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local ancestor_start_lsn: None, pg_version: Some(pg_version), }; - let timeline_info = attachment_service + let timeline_info = storage_controller .tenant_timeline_create(tenant_id, create_req) .await?; @@ -730,7 +730,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local .transpose() .context("Failed to parse ancestor start Lsn from the request")?; let new_timeline_id = TimelineId::generate(); - let attachment_service = AttachmentService::from_env(env); + let storage_controller = StorageController::from_env(env); let create_req = TimelineCreateRequest { new_timeline_id, ancestor_timeline_id: Some(ancestor_timeline_id), @@ -738,7 +738,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local ancestor_start_lsn: start_lsn, pg_version: None, }; - let timeline_info = attachment_service + let timeline_info = storage_controller .tenant_timeline_create(tenant_id, create_req) .await?; @@ -767,7 +767,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re match sub_name { "list" => { - // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service + // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller // where shard 0 is attached, and query there. let tenant_shard_id = get_tenant_shard_id(sub_args, env)?; let timeline_infos = get_timeline_infos(env, &tenant_shard_id) @@ -952,21 +952,21 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re ( vec![(parsed.0, parsed.1.unwrap_or(5432))], // If caller is telling us what pageserver to use, this is not a tenant which is - // full managed by attachment service, therefore not sharded. + // full managed by storage controller, therefore not sharded. ShardParameters::DEFAULT_STRIPE_SIZE, ) } else { // Look up the currently attached location of the tenant, and its striping metadata, // to pass these on to postgres. - let attachment_service = AttachmentService::from_env(env); - let locate_result = attachment_service.tenant_locate(endpoint.tenant_id).await?; + let storage_controller = StorageController::from_env(env); + let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?; let pageservers = locate_result .shards .into_iter() .map(|shard| { ( Host::parse(&shard.listen_pg_addr) - .expect("Attachment service reported bad hostname"), + .expect("Storage controller reported bad hostname"), shard.listen_pg_port, ) }) @@ -1015,8 +1015,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re pageserver.pg_connection_config.port(), )] } else { - let attachment_service = AttachmentService::from_env(env); - attachment_service + let storage_controller = StorageController::from_env(env); + storage_controller .tenant_locate(endpoint.tenant_id) .await? .shards @@ -1024,7 +1024,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re .map(|shard| { ( Host::parse(&shard.listen_pg_addr) - .expect("Attachment service reported malformed host"), + .expect("Storage controller reported malformed host"), shard.listen_pg_port, ) }) @@ -1144,8 +1144,8 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> let scheduling = subcommand_args.get_one("scheduling"); let availability = subcommand_args.get_one("availability"); - let attachment_service = AttachmentService::from_env(env); - attachment_service + let storage_controller = StorageController::from_env(env); + storage_controller .node_configure(NodeConfigureRequest { node_id: pageserver.conf.id, scheduling: scheduling.cloned(), @@ -1170,11 +1170,11 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Ok(()) } -async fn handle_attachment_service( +async fn handle_storage_controller( sub_match: &ArgMatches, env: &local_env::LocalEnv, ) -> Result<()> { - let svc = AttachmentService::from_env(env); + let svc = StorageController::from_env(env); match sub_match.subcommand() { Some(("start", _start_match)) => { if let Err(e) = svc.start().await { @@ -1194,8 +1194,8 @@ async fn handle_attachment_service( exit(1); } } - Some((sub_name, _)) => bail!("Unexpected attachment_service subcommand '{}'", sub_name), - None => bail!("no attachment_service subcommand provided"), + Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name), + None => bail!("no storage_controller subcommand provided"), } Ok(()) } @@ -1280,11 +1280,11 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> broker::start_broker_process(env).await?; - // Only start the attachment service if the pageserver is configured to need it + // Only start the storage controller if the pageserver is configured to need it if env.control_plane_api.is_some() { - let attachment_service = AttachmentService::from_env(env); - if let Err(e) = attachment_service.start().await { - eprintln!("attachment_service start failed: {:#}", e); + let storage_controller = StorageController::from_env(env); + if let Err(e) = storage_controller.start().await { + eprintln!("storage_controller start failed: {:#}", e); try_stop_all(env, true).await; exit(1); } @@ -1356,9 +1356,9 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) { } if env.control_plane_api.is_some() { - let attachment_service = AttachmentService::from_env(env); - if let Err(e) = attachment_service.stop(immediate).await { - eprintln!("attachment service stop failed: {e:#}"); + let storage_controller = StorageController::from_env(env); + if let Err(e) = storage_controller.stop(immediate).await { + eprintln!("storage controller stop failed: {e:#}"); } } } @@ -1618,9 +1618,9 @@ fn cli() -> Command { ) ) .subcommand( - Command::new("attachment_service") + Command::new("storage_controller") .arg_required_else_help(true) - .about("Manage attachment_service") + .about("Manage storage_controller") .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone())) .subcommand(Command::new("stop").about("Stop local pageserver") .arg(stop_mode_arg.clone())) diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index ac0a8417ae..646bc2e8bc 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -57,9 +57,9 @@ use serde::{Deserialize, Serialize}; use url::Host; use utils::id::{NodeId, TenantId, TimelineId}; -use crate::attachment_service::AttachmentService; use crate::local_env::LocalEnv; use crate::postgresql_conf::PostgresConf; +use crate::storage_controller::StorageController; use compute_api::responses::{ComputeState, ComputeStatus}; use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec}; @@ -750,17 +750,17 @@ impl Endpoint { let postgresql_conf = self.read_postgresql_conf()?; spec.cluster.postgresql_conf = Some(postgresql_conf); - // If we weren't given explicit pageservers, query the attachment service + // If we weren't given explicit pageservers, query the storage controller if pageservers.is_empty() { - let attachment_service = AttachmentService::from_env(&self.env); - let locate_result = attachment_service.tenant_locate(self.tenant_id).await?; + let storage_controller = StorageController::from_env(&self.env); + let locate_result = storage_controller.tenant_locate(self.tenant_id).await?; pageservers = locate_result .shards .into_iter() .map(|shard| { ( Host::parse(&shard.listen_pg_addr) - .expect("Attachment service reported bad hostname"), + .expect("Storage controller reported bad hostname"), shard.listen_pg_port, ) }) diff --git a/control_plane/src/lib.rs b/control_plane/src/lib.rs index bb79d36bfc..2af272f388 100644 --- a/control_plane/src/lib.rs +++ b/control_plane/src/lib.rs @@ -6,7 +6,6 @@ //! local installations. #![deny(clippy::undocumented_unsafe_blocks)] -pub mod attachment_service; mod background_process; pub mod broker; pub mod endpoint; @@ -14,3 +13,4 @@ pub mod local_env; pub mod pageserver; pub mod postgresql_conf; pub mod safekeeper; +pub mod storage_controller; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 03270723a6..2e64489432 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -72,13 +72,13 @@ pub struct LocalEnv { #[serde(default)] pub safekeepers: Vec, - // Control plane upcall API for pageserver: if None, we will not run attachment_service. If set, this will + // Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will // be propagated into each pageserver's configuration. #[serde(default)] pub control_plane_api: Option, - // Control plane upcall API for attachment service. If set, this will be propagated into the - // attachment service's configuration. + // Control plane upcall API for storage controller. If set, this will be propagated into the + // storage controller's configuration. #[serde(default)] pub control_plane_compute_hook_api: Option, @@ -227,10 +227,10 @@ impl LocalEnv { self.neon_distrib_dir.join("pageserver") } - pub fn attachment_service_bin(&self) -> PathBuf { - // Irrespective of configuration, attachment service binary is always + pub fn storage_controller_bin(&self) -> PathBuf { + // Irrespective of configuration, storage controller binary is always // run from the same location as neon_local. This means that for compatibility - // tests that run old pageserver/safekeeper, they still run latest attachment service. + // tests that run old pageserver/safekeeper, they still run latest storage controller. let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned(); neon_local_bin_dir.join("storage_controller") } diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index ae1bd60c52..021b9aca34 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -31,8 +31,8 @@ use utils::{ lsn::Lsn, }; -use crate::attachment_service::AttachmentService; use crate::local_env::PageServerConf; +use crate::storage_controller::StorageController; use crate::{background_process, local_env::LocalEnv}; /// Directory within .neon which will be used by default for LocalFs remote storage. @@ -111,7 +111,7 @@ impl PageServerNode { control_plane_api.as_str() )); - // Attachment service uses the same auth as pageserver: if JWT is enabled + // Storage controller uses the same auth as pageserver: if JWT is enabled // for us, we will also need it to talk to them. if matches!(self.conf.http_auth_type, AuthType::NeonJWT) { let jwt_token = self @@ -214,12 +214,12 @@ impl PageServerNode { // Register the node with the storage controller before starting pageserver: pageserver must be registered to // successfully call /re-attach and finish starting up. if register { - let attachment_service = AttachmentService::from_env(&self.env); + let storage_controller = StorageController::from_env(&self.env); let (pg_host, pg_port) = parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr"); let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr) .expect("Unable to parse listen_http_addr"); - attachment_service + storage_controller .node_register(NodeRegisterRequest { node_id: self.conf.id, listen_pg_addr: pg_host.to_string(), diff --git a/control_plane/src/attachment_service.rs b/control_plane/src/storage_controller.rs similarity index 94% rename from control_plane/src/attachment_service.rs rename to control_plane/src/storage_controller.rs index 5c97561985..c505e67770 100644 --- a/control_plane/src/attachment_service.rs +++ b/control_plane/src/storage_controller.rs @@ -24,7 +24,7 @@ use utils::{ id::{NodeId, TenantId}, }; -pub struct AttachmentService { +pub struct StorageController { env: LocalEnv, listen: String, path: Utf8PathBuf, @@ -36,7 +36,7 @@ pub struct AttachmentService { const COMMAND: &str = "storage_controller"; -const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16; +const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16; #[derive(Serialize, Deserialize)] pub struct AttachHookRequest { @@ -59,7 +59,7 @@ pub struct InspectResponse { pub attachment: Option<(u32, NodeId)>, } -impl AttachmentService { +impl StorageController { pub fn from_env(env: &LocalEnv) -> Self { let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone()) .unwrap() @@ -136,27 +136,27 @@ impl AttachmentService { } fn pid_file(&self) -> Utf8PathBuf { - Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("attachment_service.pid")) + Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_controller.pid")) .expect("non-Unicode path") } - /// PIDFile for the postgres instance used to store attachment service state + /// PIDFile for the postgres instance used to store storage controller state fn postgres_pid_file(&self) -> Utf8PathBuf { Utf8PathBuf::from_path_buf( self.env .base_data_dir - .join("attachment_service_postgres.pid"), + .join("storage_controller_postgres.pid"), ) .expect("non-Unicode path") } /// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl` /// - /// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back + /// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back /// to other versions if that one isn't found. Some automated tests create circumstances /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`. pub async fn get_pg_bin_dir(&self) -> anyhow::Result { - let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14]; + let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14]; for v in prefer_versions { let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap(); @@ -189,7 +189,7 @@ impl AttachmentService { /// /// Returns the database url pub async fn setup_database(&self) -> anyhow::Result { - const DB_NAME: &str = "attachment_service"; + const DB_NAME: &str = "storage_controller"; let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port); let pg_bin_dir = self.get_pg_bin_dir().await?; @@ -219,10 +219,10 @@ impl AttachmentService { } pub async fn start(&self) -> anyhow::Result<()> { - // Start a vanilla Postgres process used by the attachment service for persistence. + // Start a vanilla Postgres process used by the storage controller for persistence. let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone()) .unwrap() - .join("attachment_service_db"); + .join("storage_controller_db"); let pg_bin_dir = self.get_pg_bin_dir().await?; let pg_log_path = pg_data_path.join("postgres.log"); @@ -245,7 +245,7 @@ impl AttachmentService { .await?; }; - println!("Starting attachment service database..."); + println!("Starting storage controller database..."); let db_start_args = [ "-w", "-D", @@ -256,7 +256,7 @@ impl AttachmentService { ]; background_process::start_process( - "attachment_service_db", + "storage_controller_db", &self.env.base_data_dir, pg_bin_dir.join("pg_ctl").as_std_path(), db_start_args, @@ -300,7 +300,7 @@ impl AttachmentService { background_process::start_process( COMMAND, &self.env.base_data_dir, - &self.env.attachment_service_bin(), + &self.env.storage_controller_bin(), args, [( "NEON_REPO_DIR".to_string(), @@ -322,10 +322,10 @@ impl AttachmentService { pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> { background_process::stop_process(immediate, COMMAND, &self.pid_file())?; - let pg_data_path = self.env.base_data_dir.join("attachment_service_db"); + let pg_data_path = self.env.base_data_dir.join("storage_controller_db"); let pg_bin_dir = self.get_pg_bin_dir().await?; - println!("Stopping attachment service database..."); + println!("Stopping storage controller database..."); let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"]; let stop_status = Command::new(pg_bin_dir.join("pg_ctl")) .args(pg_stop_args) @@ -344,10 +344,10 @@ impl AttachmentService { // fine that stop failed. Otherwise it is an error that stop failed. const PG_STATUS_NOT_RUNNING: i32 = 3; if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() { - println!("Attachment service data base is already stopped"); + println!("Storage controller database is already stopped"); return Ok(()); } else { - anyhow::bail!("Failed to stop attachment service database: {stop_status}") + anyhow::bail!("Failed to stop storage controller database: {stop_status}") } } @@ -368,7 +368,7 @@ impl AttachmentService { } } - /// Simple HTTP request wrapper for calling into attachment service + /// Simple HTTP request wrapper for calling into storage controller async fn dispatch( &self, method: hyper::Method, diff --git a/docs/authentication.md b/docs/authentication.md index faac7aa28e..522c5481b4 100644 --- a/docs/authentication.md +++ b/docs/authentication.md @@ -70,9 +70,9 @@ Should only be used e.g. for status check/tenant creation/list. Should only be used e.g. for status check. Currently also used for connection from any pageserver to any safekeeper. -"generations_api": Provides access to the upcall APIs served by the attachment service or the control plane. +"generations_api": Provides access to the upcall APIs served by the storage controller or the control plane. -"admin": Provides access to the control plane and admin APIs of the attachment service. +"admin": Provides access to the control plane and admin APIs of the storage controller. ### CLI CLI generates a key pair during call to `neon_local init` with the following commands: diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 38e61239c5..c172354e9f 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -88,8 +88,6 @@ impl FromStr for NodeAvailability { } } -/// FIXME: this is a duplicate of the type in the attachment_service crate, because the -/// type needs to be defined with diesel traits in there. #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)] pub enum NodeSchedulingPolicy { Active, diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 584d5fea48..234bfa8bf9 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1014,24 +1014,24 @@ class NeonEnv: self.initial_tenant = config.initial_tenant self.initial_timeline = config.initial_timeline - # Find two adjacent ports for attachment service and its postgres DB. This + # Find two adjacent ports for storage controller and its postgres DB. This # loop would eventually throw from get_port() if we run out of ports (extremely # unlikely): usually we find two adjacent free ports on the first iteration. while True: - self.attachment_service_port = self.port_distributor.get_port() - attachment_service_pg_port = self.port_distributor.get_port() - if attachment_service_pg_port == self.attachment_service_port + 1: + self.storage_controller_port = self.port_distributor.get_port() + storage_controller_pg_port = self.port_distributor.get_port() + if storage_controller_pg_port == self.storage_controller_port + 1: break # The URL for the pageserver to use as its control_plane_api config - self.control_plane_api: str = f"http://127.0.0.1:{self.attachment_service_port}/upcall/v1" - # The base URL of the attachment service - self.attachment_service_api: str = f"http://127.0.0.1:{self.attachment_service_port}" + self.control_plane_api: str = f"http://127.0.0.1:{self.storage_controller_port}/upcall/v1" + # The base URL of the storage controller + self.storage_controller_api: str = f"http://127.0.0.1:{self.storage_controller_port}" # For testing this with a fake HTTP server, enable passing through a URL from config self.control_plane_compute_hook_api = config.control_plane_compute_hook_api - self.attachment_service: NeonAttachmentService = NeonAttachmentService( + self.storage_controller: NeonStorageController = NeonStorageController( self, config.auth_enabled ) @@ -1113,16 +1113,16 @@ class NeonEnv: self.neon_cli.init(cfg, force=config.config_init_force) def start(self): - # Attachment service starts first, so that pageserver /re-attach calls don't + # storage controller starts first, so that pageserver /re-attach calls don't # bounce through retries on startup - self.attachment_service.start() + self.storage_controller.start() - def attachment_service_ready(): - assert self.attachment_service.ready() is True + def storage_controller_ready(): + assert self.storage_controller.ready() is True - # Wait for attachment service readiness to prevent unnecessary post start-up + # Wait for storage controller readiness to prevent unnecessary post start-up # reconcile. - wait_until(30, 1, attachment_service_ready) + wait_until(30, 1, storage_controller_ready) # Start up broker, pageserver and all safekeepers futs = [] @@ -1153,7 +1153,7 @@ class NeonEnv: if ps_assert_metric_no_errors: pageserver.assert_no_metric_errors() pageserver.stop(immediate=immediate) - self.attachment_service.stop(immediate=immediate) + self.storage_controller.stop(immediate=immediate) self.broker.stop(immediate=immediate) @property @@ -1188,9 +1188,9 @@ class NeonEnv: def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]): """ Get the NeonPageserver where this tenant shard is currently attached, according - to the attachment service. + to the storage controller. """ - meta = self.attachment_service.inspect(tenant_id) + meta = self.storage_controller.inspect(tenant_id) if meta is None: return None pageserver_id = meta[1] @@ -1697,12 +1697,12 @@ class NeonCli(AbstractNeonCli): res.check_returncode() return res - def attachment_service_start(self): - cmd = ["attachment_service", "start"] + def storage_controller_start(self): + cmd = ["storage_controller", "start"] return self.raw_cli(cmd) - def attachment_service_stop(self, immediate: bool): - cmd = ["attachment_service", "stop"] + def storage_controller_stop(self, immediate: bool): + cmd = ["storage_controller", "stop"] if immediate: cmd.extend(["-m", "immediate"]) return self.raw_cli(cmd) @@ -1942,14 +1942,14 @@ class Pagectl(AbstractNeonCli): return IndexPartDump.from_json(parsed) -class AttachmentServiceApiException(Exception): +class StorageControllerApiException(Exception): def __init__(self, message, status_code: int): super().__init__(message) self.message = message self.status_code = status_code -class NeonAttachmentService(MetricsGetter): +class NeonStorageController(MetricsGetter): def __init__(self, env: NeonEnv, auth_enabled: bool): self.env = env self.running = False @@ -1957,13 +1957,13 @@ class NeonAttachmentService(MetricsGetter): def start(self): assert not self.running - self.env.neon_cli.attachment_service_start() + self.env.neon_cli.storage_controller_start() self.running = True return self - def stop(self, immediate: bool = False) -> "NeonAttachmentService": + def stop(self, immediate: bool = False) -> "NeonStorageController": if self.running: - self.env.neon_cli.attachment_service_stop(immediate) + self.env.neon_cli.storage_controller_stop(immediate) self.running = False return self @@ -1976,22 +1976,22 @@ class NeonAttachmentService(MetricsGetter): msg = res.json()["msg"] except: # noqa: E722 msg = "" - raise AttachmentServiceApiException(msg, res.status_code) from e + raise StorageControllerApiException(msg, res.status_code) from e def pageserver_api(self) -> PageserverHttpClient: """ - The attachment service implements a subset of the pageserver REST API, for mapping + The storage controller implements a subset of the pageserver REST API, for mapping per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those functions via the HttpClient, as an implicit check that these APIs remain compatible. """ auth_token = None if self.auth_enabled: auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API) - return PageserverHttpClient(self.env.attachment_service_port, lambda: True, auth_token) + return PageserverHttpClient(self.env.storage_controller_port, lambda: True, auth_token) def request(self, method, *args, **kwargs) -> requests.Response: resp = requests.request(method, *args, **kwargs) - NeonAttachmentService.raise_api_exception(resp) + NeonStorageController.raise_api_exception(resp) return resp @@ -2004,15 +2004,15 @@ class NeonAttachmentService(MetricsGetter): return headers def get_metrics(self) -> Metrics: - res = self.request("GET", f"{self.env.attachment_service_api}/metrics") + res = self.request("GET", f"{self.env.storage_controller_api}/metrics") return parse_metrics(res.text) def ready(self) -> bool: status = None try: - resp = self.request("GET", f"{self.env.attachment_service_api}/ready") + resp = self.request("GET", f"{self.env.storage_controller_api}/ready") status = resp.status_code - except AttachmentServiceApiException as e: + except StorageControllerApiException as e: status = e.status_code if status == 503: @@ -2027,7 +2027,7 @@ class NeonAttachmentService(MetricsGetter): ) -> int: response = self.request( "POST", - f"{self.env.attachment_service_api}/debug/v1/attach-hook", + f"{self.env.storage_controller_api}/debug/v1/attach-hook", json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id}, headers=self.headers(TokenScope.ADMIN), ) @@ -2038,7 +2038,7 @@ class NeonAttachmentService(MetricsGetter): def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]): self.request( "POST", - f"{self.env.attachment_service_api}/debug/v1/attach-hook", + f"{self.env.storage_controller_api}/debug/v1/attach-hook", json={"tenant_shard_id": str(tenant_shard_id), "node_id": None}, headers=self.headers(TokenScope.ADMIN), ) @@ -2049,7 +2049,7 @@ class NeonAttachmentService(MetricsGetter): """ response = self.request( "POST", - f"{self.env.attachment_service_api}/debug/v1/inspect", + f"{self.env.storage_controller_api}/debug/v1/inspect", json={"tenant_shard_id": str(tenant_shard_id)}, headers=self.headers(TokenScope.ADMIN), ) @@ -2070,7 +2070,7 @@ class NeonAttachmentService(MetricsGetter): log.info(f"node_register({body})") self.request( "POST", - f"{self.env.attachment_service_api}/control/v1/node", + f"{self.env.storage_controller_api}/control/v1/node", json=body, headers=self.headers(TokenScope.ADMIN), ) @@ -2078,7 +2078,7 @@ class NeonAttachmentService(MetricsGetter): def node_list(self): response = self.request( "GET", - f"{self.env.attachment_service_api}/control/v1/node", + f"{self.env.storage_controller_api}/control/v1/node", headers=self.headers(TokenScope.ADMIN), ) return response.json() @@ -2088,7 +2088,7 @@ class NeonAttachmentService(MetricsGetter): body["node_id"] = node_id self.request( "PUT", - f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config", + f"{self.env.storage_controller_api}/control/v1/node/{node_id}/config", json=body, headers=self.headers(TokenScope.ADMIN), ) @@ -2118,7 +2118,7 @@ class NeonAttachmentService(MetricsGetter): response = self.request( "POST", - f"{self.env.attachment_service_api}/v1/tenant", + f"{self.env.storage_controller_api}/v1/tenant", json=body, headers=self.headers(TokenScope.PAGE_SERVER_API), ) @@ -2130,7 +2130,7 @@ class NeonAttachmentService(MetricsGetter): """ response = self.request( "GET", - f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate", + f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/locate", headers=self.headers(TokenScope.ADMIN), ) body = response.json() @@ -2140,7 +2140,7 @@ class NeonAttachmentService(MetricsGetter): def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]: response = self.request( "PUT", - f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split", + f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/shard_split", json={"new_shard_count": shard_count}, headers=self.headers(TokenScope.ADMIN), ) @@ -2152,7 +2152,7 @@ class NeonAttachmentService(MetricsGetter): def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int): self.request( "PUT", - f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate", + f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_shard_id}/migrate", json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id}, headers=self.headers(TokenScope.ADMIN), ) @@ -2165,12 +2165,12 @@ class NeonAttachmentService(MetricsGetter): """ self.request( "POST", - f"{self.env.attachment_service_api}/debug/v1/consistency_check", + f"{self.env.storage_controller_api}/debug/v1/consistency_check", headers=self.headers(TokenScope.ADMIN), ) - log.info("Attachment service passed consistency check") + log.info("storage controller passed consistency check") - def __enter__(self) -> "NeonAttachmentService": + def __enter__(self) -> "NeonStorageController": return self def __exit__( @@ -2401,7 +2401,7 @@ class NeonPageserver(PgProtocol): """ client = self.http_client() if generation is None: - generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) + generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id) return client.tenant_attach( tenant_id, config, @@ -2410,14 +2410,14 @@ class NeonPageserver(PgProtocol): ) def tenant_detach(self, tenant_id: TenantId): - self.env.attachment_service.attach_hook_drop(tenant_id) + self.env.storage_controller.attach_hook_drop(tenant_id) client = self.http_client() return client.tenant_detach(tenant_id) def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs): if config["mode"].startswith("Attached") and "generation" not in config: - config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) + config["generation"] = self.env.storage_controller.attach_hook_issue(tenant_id, self.id) client = self.http_client() return client.tenant_location_conf(tenant_id, config, **kwargs) @@ -2441,14 +2441,14 @@ class NeonPageserver(PgProtocol): generation: Optional[int] = None, ) -> TenantId: if generation is None: - generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) + generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id) client = self.http_client(auth_token=auth_token) return client.tenant_create(tenant_id, conf, generation=generation) def tenant_load(self, tenant_id: TenantId): client = self.http_client() return client.tenant_load( - tenant_id, generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id) + tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id) ) @@ -3907,7 +3907,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint psql_path = os.path.join(pg_bin.pg_bin_path, "psql") - pageserver_id = env.attachment_service.locate(endpoint.tenant_id)[0]["node_id"] + pageserver_id = env.storage_controller.locate(endpoint.tenant_id)[0]["node_id"] cmd = rf""" {psql_path} \ --no-psqlrc \ @@ -3994,7 +3994,7 @@ def tenant_get_shards( us to figure out the shards for a tenant. If the caller provides `pageserver_id`, it will be used for all shards, even - if the shard is indicated by attachment service to be on some other pageserver. + if the shard is indicated by storage controller to be on some other pageserver. Caller should over the response to apply their per-pageserver action to each shard @@ -4010,7 +4010,7 @@ def tenant_get_shards( TenantShardId.parse(s["shard_id"]), override_pageserver or env.get_pageserver(s["node_id"]), ) - for s in env.attachment_service.locate(tenant_id) + for s in env.storage_controller.locate(tenant_id) ] else: # Assume an unsharded tenant diff --git a/test_runner/fixtures/pageserver/many_tenants.py b/test_runner/fixtures/pageserver/many_tenants.py index bbb4ccee5b..f47a3ea043 100644 --- a/test_runner/fixtures/pageserver/many_tenants.py +++ b/test_runner/fixtures/pageserver/many_tenants.py @@ -43,7 +43,7 @@ def single_timeline( log.info("detach template tenant form pageserver") env.pageserver.tenant_detach(template_tenant) env.pageserver.allowed_errors.append( - # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely + # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely ".*Dropped remote consistent LSN updates.*", ) diff --git a/test_runner/performance/pageserver/interactive/test_many_small_tenants.py b/test_runner/performance/pageserver/interactive/test_many_small_tenants.py index 3fb28ace46..0ff9c8fdaa 100644 --- a/test_runner/performance/pageserver/interactive/test_many_small_tenants.py +++ b/test_runner/performance/pageserver/interactive/test_many_small_tenants.py @@ -56,7 +56,7 @@ def setup_env( template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) env.pageserver.tenant_detach(template_tenant) env.pageserver.allowed_errors.append( - # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely + # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely ".*Dropped remote consistent LSN updates.*", ) env.pageserver.tenant_attach(template_tenant, config) diff --git a/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py index 921b7c5b76..c98fa44b1a 100644 --- a/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py +++ b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py @@ -92,7 +92,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int): template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) env.pageserver.tenant_detach(template_tenant) env.pageserver.allowed_errors.append( - # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely + # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely ".*Dropped remote consistent LSN updates.*", ) env.pageserver.tenant_attach(template_tenant, config) diff --git a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py index 8cd3569ea5..1a0012397c 100644 --- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py +++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py @@ -114,7 +114,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int): template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) env.pageserver.tenant_detach(template_tenant) env.pageserver.allowed_errors.append( - # tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely + # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely ".*Dropped remote consistent LSN updates.*", ) env.pageserver.tenant_attach(template_tenant, config) diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 72173dc2a7..9e3f602237 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -56,12 +56,12 @@ def measure_recovery_time(env: NeonCompare): # Delete the Tenant in the pageserver: this will drop local and remote layers, such that # when we "create" the Tenant again, we will replay the WAL from the beginning. # - # This is a "weird" thing to do, and can confuse the attachment service as we're re-using + # This is a "weird" thing to do, and can confuse the storage controller as we're re-using # the same tenant ID for a tenant that is logically different from the pageserver's point # of view, but the same as far as the safekeeper/WAL is concerned. To work around that, # we will explicitly create the tenant in the same generation that it was previously # attached in. - attach_status = env.env.attachment_service.inspect(tenant_shard_id=env.tenant) + attach_status = env.env.storage_controller.inspect(tenant_shard_id=env.tenant) assert attach_status is not None (attach_gen, _) = attach_status diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 7fbce6a10c..3058926b25 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -137,7 +137,7 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]): ps_http.tenant_detach(tenant_id) assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()] - body = {"generation": env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)} + body = {"generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)} ps_http.post( f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", diff --git a/test_runner/regress/test_change_pageserver.py b/test_runner/regress/test_change_pageserver.py index adb67a579e..97ab69049d 100644 --- a/test_runner/regress/test_change_pageserver.py +++ b/test_runner/regress/test_change_pageserver.py @@ -85,9 +85,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder): # the endpoint. Whereas the previous reconfiguration was like a healthy migration, this # is more like what happens in an unexpected pageserver failure. # - # Since we're dual-attached, need to tip-off attachment service to treat the one we're + # Since we're dual-attached, need to tip-off storage controller to treat the one we're # about to start as the attached pageserver - env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id) + env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[0].id) env.pageservers[0].start() env.pageservers[1].stop() @@ -97,9 +97,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder): assert fetchone() == (100000,) env.pageservers[0].stop() - # Since we're dual-attached, need to tip-off attachment service to treat the one we're + # Since we're dual-attached, need to tip-off storage controller to treat the one we're # about to start as the attached pageserver - env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[1].id) + env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[1].id) env.pageservers[1].start() # Test a (former) bug where a child process spins without updating its connection string diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 0ea76d447e..618ac63785 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -133,7 +133,7 @@ def test_create_snapshot( for sk in env.safekeepers: sk.stop() env.pageserver.stop() - env.attachment_service.stop() + env.storage_controller.stop() # Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it compatibility_snapshot_dir = ( diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py index abdebb6d79..ca4295c5cb 100644 --- a/test_runner/regress/test_layers_from_future.py +++ b/test_runner/regress/test_layers_from_future.py @@ -159,7 +159,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder): time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites def get_generation_number(): - attachment = env.attachment_service.inspect(tenant_id) + attachment = env.storage_controller.inspect(tenant_id) assert attachment is not None return attachment[0] diff --git a/test_runner/regress/test_neon_cli.py b/test_runner/regress/test_neon_cli.py index 16d120e24a..cb69f0ef39 100644 --- a/test_runner/regress/test_neon_cli.py +++ b/test_runner/regress/test_neon_cli.py @@ -133,7 +133,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder): # Stop default ps/sk env.neon_cli.pageserver_stop(env.pageserver.id) env.neon_cli.safekeeper_stop() - env.neon_cli.attachment_service_stop(False) + env.neon_cli.storage_controller_stop(False) # Keep NeonEnv state up to date, it usually owns starting/stopping services env.pageserver.running = False @@ -175,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder): env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2) # Stop this to get out of the way of the following `start` - env.neon_cli.attachment_service_stop(False) + env.neon_cli.storage_controller_stop(False) # Default start res = env.neon_cli.raw_cli(["start"]) diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index e29db1e252..877deee08f 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -73,7 +73,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient): # create new tenant and check it is also there tenant_id = TenantId.generate() client.tenant_create( - tenant_id, generation=env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id) + tenant_id, generation=env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id) ) assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()} diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index 89fc48a49f..d1acb9817e 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -203,7 +203,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder): env.broker.try_start() for sk in env.safekeepers: sk.start() - env.attachment_service.start() + env.storage_controller.start() env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',)) @@ -285,7 +285,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_pageservers = 2 env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) - attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"] + attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"] main_pageserver = env.get_pageserver(attached_to_id) other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0] @@ -310,7 +310,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder): # Now advance the generation in the control plane: subsequent validations # from the running pageserver will fail. No more deletions should happen. - env.attachment_service.attach_hook_issue(env.initial_tenant, other_pageserver.id) + env.storage_controller.attach_hook_issue(env.initial_tenant, other_pageserver.id) generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver) assert_deletion_queue(ps_http, lambda n: n > 0) @@ -366,7 +366,7 @@ def test_deletion_queue_recovery( neon_env_builder.num_pageservers = 2 env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) - attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"] + attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"] main_pageserver = env.get_pageserver(attached_to_id) other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0] @@ -428,7 +428,7 @@ def test_deletion_queue_recovery( if keep_attachment == KeepAttachment.LOSE: some_other_pageserver = other_pageserver.id - env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver) + env.storage_controller.attach_hook_issue(env.initial_tenant, some_other_pageserver) main_pageserver.start() @@ -494,7 +494,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): ) # Simulate a major incident: the control plane goes offline - env.attachment_service.stop() + env.storage_controller.stop() # Remember how many validations had happened before the control plane went offline validated = get_deletion_queue_validated(ps_http) @@ -525,7 +525,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): assert get_deletion_queue_executed(ps_http) == 0 # When the control plane comes back up, normal service should resume - env.attachment_service.start() + env.storage_controller.start() ps_http.deletion_queue_flush(execute=True) assert get_deletion_queue_depth(ps_http) == 0 diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 8ba9d767dd..79145f61b3 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -157,7 +157,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): workload.churn_rows(rng.randint(128, 256), pageserver.id) workload.validate(pageserver.id) elif last_state_ps[0].startswith("Attached"): - # The `attachment_service` will only re-attach on startup when a pageserver was the + # The `storage_controller` will only re-attach on startup when a pageserver was the # holder of the latest generation: otherwise the pageserver will revert to detached # state if it was running attached with a stale generation last_state[pageserver.id] = ("Detached", None) @@ -182,12 +182,12 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int): generation = last_state_ps[1] else: # Switch generations, while also jumping between attached states - generation = env.attachment_service.attach_hook_issue( + generation = env.storage_controller.attach_hook_issue( tenant_id, pageserver.id ) latest_attached = pageserver.id else: - generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id) + generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver.id) latest_attached = pageserver.id else: generation = None @@ -273,7 +273,7 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder): # Encourage the new location to download while still in secondary mode pageserver_b.http_client().tenant_secondary_download(tenant_id) - migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id) + migrated_generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver_b.id) log.info(f"Acquired generation {migrated_generation} for destination pageserver") assert migrated_generation == initial_generation + 1 @@ -436,7 +436,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): remote_storage_kind=RemoteStorageKind.MOCK_S3, ) env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) - assert env.attachment_service is not None + assert env.storage_controller is not None assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter tenant_id = env.initial_tenant diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 06c13cc07d..05f769b0e3 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -169,7 +169,7 @@ def test_remote_storage_backup_and_restore( # Ensure that even though the tenant is broken, retrying the attachment fails with pytest.raises(Exception, match="Tenant state is Broken"): # Use same generation as in previous attempt - gen_state = env.attachment_service.inspect(tenant_id) + gen_state = env.storage_controller.inspect(tenant_id) assert gen_state is not None generation = gen_state[0] env.pageserver.tenant_attach(tenant_id, generation=generation) @@ -355,7 +355,7 @@ def test_remote_storage_upload_queue_retries( env.pageserver.stop(immediate=True) env.endpoints.stop_all() - # We are about to forcibly drop local dirs. Attachment service will increment generation in re-attach before + # We are about to forcibly drop local dirs. Storage controller will increment generation in re-attach before # we later increment when actually attaching it again, leading to skipping a generation and potentially getting # these warnings if there was a durable but un-executed deletion list at time of restart. env.pageserver.allowed_errors.extend( diff --git a/test_runner/regress/test_s3_restore.py b/test_runner/regress/test_s3_restore.py index aaa33f0bcb..611bd1c2a2 100644 --- a/test_runner/regress/test_s3_restore.py +++ b/test_runner/regress/test_s3_restore.py @@ -80,7 +80,7 @@ def test_tenant_s3_restore( assert ( ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0 ), "tenant removed before we deletion was issued" - env.attachment_service.attach_hook_drop(tenant_id) + env.storage_controller.attach_hook_drop(tenant_id) tenant_path = env.pageserver.tenant_dir(tenant_id) assert not tenant_path.exists() @@ -103,7 +103,7 @@ def test_tenant_s3_restore( tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion ) - generation = env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id) + generation = env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id) ps_http.tenant_attach(tenant_id, generation=generation) env.pageserver.quiesce_tenants() diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 57c8d1f849..1b96cd6a80 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -43,7 +43,7 @@ def test_sharding_smoke( tenant_id = env.initial_tenant pageservers = dict((int(p.id), p) for p in env.pageservers) - shards = env.attachment_service.locate(tenant_id) + shards = env.storage_controller.locate(tenant_id) def get_sizes(): sizes = {} @@ -86,7 +86,7 @@ def test_sharding_smoke( ) assert timelines == {env.initial_timeline, timeline_b} - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_split_unsharded( @@ -102,7 +102,7 @@ def test_sharding_split_unsharded( # Check that we created with an unsharded TenantShardId: this is the default, # but check it in case we change the default in future - assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 0)) is not None + assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 0)) is not None workload = Workload(env, tenant_id, timeline_id, branch_name="main") workload.init() @@ -110,15 +110,15 @@ def test_sharding_split_unsharded( workload.validate() # Split one shard into two - env.attachment_service.tenant_shard_split(tenant_id, shard_count=2) + env.storage_controller.tenant_shard_split(tenant_id, shard_count=2) # Check we got the shard IDs we expected - assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 2)) is not None - assert env.attachment_service.inspect(TenantShardId(tenant_id, 1, 2)) is not None + assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 2)) is not None + assert env.storage_controller.inspect(TenantShardId(tenant_id, 1, 2)) is not None workload.validate() - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_split_smoke( @@ -161,7 +161,7 @@ def test_sharding_split_smoke( workload.write_rows(256) # Note which pageservers initially hold a shard after tenant creation - pre_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)] + pre_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)] # For pageservers holding a shard, validate their ingest statistics # reflect a proper splitting of the WAL. @@ -213,9 +213,9 @@ def test_sharding_split_smoke( # Before split, old shards exist assert shards_on_disk(old_shard_ids) - env.attachment_service.tenant_shard_split(tenant_id, shard_count=split_shard_count) + env.storage_controller.tenant_shard_split(tenant_id, shard_count=split_shard_count) - post_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)] + post_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)] # We should have split into 8 shards, on the same 4 pageservers we started on. assert len(post_split_pageserver_ids) == split_shard_count assert len(set(post_split_pageserver_ids)) == shard_count @@ -261,7 +261,7 @@ def test_sharding_split_smoke( # Check that we didn't do any spurious reconciliations. # Total number of reconciles should have been one per original shard, plus # one for each shard that was migrated. - reconcile_ok = env.attachment_service.get_metric_value( + reconcile_ok = env.storage_controller.get_metric_value( "storage_controller_reconcile_complete_total", filter={"status": "ok"} ) assert reconcile_ok == shard_count + split_shard_count // 2 @@ -269,19 +269,19 @@ def test_sharding_split_smoke( # Check that no cancelled or errored reconciliations occurred: this test does no # failure injection and should run clean. assert ( - env.attachment_service.get_metric_value( + env.storage_controller.get_metric_value( "storage_controller_reconcile_complete_total", filter={"status": "cancel"} ) is None ) assert ( - env.attachment_service.get_metric_value( + env.storage_controller.get_metric_value( "storage_controller_reconcile_complete_total", filter={"status": "error"} ) is None ) - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() # Validate pageserver state shards_exist: list[TenantShardId] = [] @@ -360,7 +360,7 @@ def test_sharding_ingest( huge_layer_count = 0 # Inspect the resulting layer map, count how many layers are undersized. - for shard in env.attachment_service.locate(tenant_id): + for shard in env.storage_controller.locate(tenant_id): pageserver = env.get_pageserver(shard["node_id"]) shard_id = shard["shard_id"] layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id) diff --git a/test_runner/regress/test_sharding_service.py b/test_runner/regress/test_sharding_service.py index aecc244a47..6b7cd9d829 100644 --- a/test_runner/regress/test_sharding_service.py +++ b/test_runner/regress/test_sharding_service.py @@ -6,10 +6,10 @@ from typing import Any, Dict, List, Union import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import ( - AttachmentServiceApiException, NeonEnv, NeonEnvBuilder, PgBin, + StorageControllerApiException, TokenScope, ) from fixtures.pageserver.http import PageserverHttpClient @@ -36,7 +36,7 @@ from werkzeug.wrappers.response import Response def get_node_shard_counts(env: NeonEnv, tenant_ids): counts: defaultdict[str, int] = defaultdict(int) for tid in tenant_ids: - for shard in env.attachment_service.locate(tid): + for shard in env.storage_controller.locate(tid): counts[shard["node_id"]] += 1 return counts @@ -62,20 +62,20 @@ def test_sharding_service_smoke( # Start services by hand so that we can skip a pageserver (this will start + register later) env.broker.try_start() - env.attachment_service.start() + env.storage_controller.start() env.pageservers[0].start() env.pageservers[1].start() for sk in env.safekeepers: sk.start() # The pageservers we started should have registered with the sharding service on startup - nodes = env.attachment_service.node_list() + nodes = env.storage_controller.node_list() assert len(nodes) == 2 assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id} # Starting an additional pageserver should register successfully env.pageservers[2].start() - nodes = env.attachment_service.node_list() + nodes = env.storage_controller.node_list() assert len(nodes) == 3 assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers} @@ -99,22 +99,22 @@ def test_sharding_service_smoke( # Creating and deleting timelines should work, using identical API to pageserver timeline_crud_tenant = next(iter(tenant_ids)) timeline_id = TimelineId.generate() - env.attachment_service.pageserver_api().timeline_create( + env.storage_controller.pageserver_api().timeline_create( pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id ) - timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant) + timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant) assert len(timelines) == 2 assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines) # virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id) timeline_delete_wait_completed( - env.attachment_service.pageserver_api(), timeline_crud_tenant, timeline_id + env.storage_controller.pageserver_api(), timeline_crud_tenant, timeline_id ) - timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant) + timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant) assert len(timelines) == 1 assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines) # Marking a pageserver offline should migrate tenants away from it. - env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"}) + env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"}) def node_evacuated(node_id: int) -> None: counts = get_node_shard_counts(env, tenant_ids) @@ -124,7 +124,7 @@ def test_sharding_service_smoke( # Marking pageserver active should not migrate anything to it # immediately - env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Active"}) + env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Active"}) time.sleep(1) assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0 @@ -144,13 +144,13 @@ def test_sharding_service_smoke( # Delete all the tenants for tid in tenant_ids: - tenant_delete_wait_completed(env.attachment_service.pageserver_api(), tid, 10) + tenant_delete_wait_completed(env.storage_controller.pageserver_api(), tid, 10) - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() # Set a scheduling policy on one node, create all the tenants, observe # that the scheduling policy is respected. - env.attachment_service.node_configure(env.pageservers[1].id, {"scheduling": "Draining"}) + env.storage_controller.node_configure(env.pageservers[1].id, {"scheduling": "Draining"}) # Create some fresh tenants tenant_ids = set(TenantId.generate() for i in range(0, tenant_count)) @@ -163,7 +163,7 @@ def test_sharding_service_smoke( assert counts[env.pageservers[0].id] == tenant_shard_count // 2 assert counts[env.pageservers[2].id] == tenant_shard_count // 2 - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_node_status_after_restart( @@ -173,28 +173,28 @@ def test_node_status_after_restart( env = neon_env_builder.init_start() # Initially we have two online pageservers - nodes = env.attachment_service.node_list() + nodes = env.storage_controller.node_list() assert len(nodes) == 2 env.pageservers[1].stop() - env.attachment_service.stop() - env.attachment_service.start() + env.storage_controller.stop() + env.storage_controller.start() def is_ready(): - assert env.attachment_service.ready() is True + assert env.storage_controller.ready() is True wait_until(30, 1, is_ready) # We loaded nodes from database on restart - nodes = env.attachment_service.node_list() + nodes = env.storage_controller.node_list() assert len(nodes) == 2 # We should still be able to create a tenant, because the pageserver which is still online # should have had its availabilty state set to Active. - env.attachment_service.tenant_create(TenantId.generate()) + env.storage_controller.tenant_create(TenantId.generate()) - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_passthrough( @@ -208,9 +208,9 @@ def test_sharding_service_passthrough( neon_env_builder.num_pageservers = 2 env = neon_env_builder.init_start() - # We will talk to attachment service as if it was a pageserver, using the pageserver + # We will talk to storage controller as if it was a pageserver, using the pageserver # HTTP client - client = PageserverHttpClient(env.attachment_service_port, lambda: True) + client = PageserverHttpClient(env.storage_controller_port, lambda: True) timelines = client.timeline_list(tenant_id=env.initial_tenant) assert len(timelines) == 1 @@ -221,22 +221,22 @@ def test_sharding_service_passthrough( } assert status["state"]["slug"] == "Active" - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() tenant_a = env.initial_tenant tenant_b = TenantId.generate() - env.attachment_service.tenant_create(tenant_b) + env.storage_controller.tenant_create(tenant_b) env.pageserver.tenant_detach(tenant_a) # TODO: extend this test to use multiple pageservers, and check that locations don't move around # on restart. - # Attachment service restart - env.attachment_service.stop() - env.attachment_service.start() + # Storage controller restart + env.storage_controller.stop() + env.storage_controller.start() observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list()) @@ -255,7 +255,7 @@ def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder): assert tenant_a not in observed assert tenant_b in observed - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() @pytest.mark.parametrize("warm_up", [True, False]) @@ -271,7 +271,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: # Start services by hand so that we can skip registration on one of the pageservers env = neon_env_builder.init_configs() env.broker.try_start() - env.attachment_service.start() + env.storage_controller.start() # This is the pageserver where we'll initially create the tenant. Run it in emergency # mode so that it doesn't talk to storage controller, and do not register it. @@ -286,12 +286,12 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: # will be attached after onboarding env.pageservers[1].start(register=True) dest_ps = env.pageservers[1] - virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True) + virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True) for sk in env.safekeepers: sk.start() - # Create a tenant directly via pageserver HTTP API, skipping the attachment service + # Create a tenant directly via pageserver HTTP API, skipping the storage controller tenant_id = TenantId.generate() generation = 123 origin_ps.http_client().tenant_create(tenant_id, generation=generation) @@ -324,7 +324,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: virtual_ps_http.tenant_secondary_download(tenant_id) - # Call into attachment service to onboard the tenant + # Call into storage controller to onboard the tenant generation += 1 virtual_ps_http.tenant_location_conf( tenant_id, @@ -347,7 +347,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: }, ) - # As if doing a live migration, call into the attachment service to + # As if doing a live migration, call into the storage controller to # set it to AttachedSingle: this is a no-op, but we test it because the # cloud control plane may call this for symmetry with live migration to # an individual pageserver @@ -375,8 +375,8 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: assert dest_tenants[0]["generation"] == generation + 1 # The onboarded tenant should survive a restart of sharding service - env.attachment_service.stop() - env.attachment_service.start() + env.storage_controller.stop() + env.storage_controller.start() # The onboarded tenant should surviev a restart of pageserver dest_ps.stop() @@ -407,7 +407,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id) assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_compute_hook( @@ -419,7 +419,7 @@ def test_sharding_service_compute_hook( Test that the sharding service calls out to the configured HTTP endpoint on attachment changes """ - # We will run two pageserver to migrate and check that the attachment service sends notifications + # We will run two pageserver to migrate and check that the storage controller sends notifications # when migrating. neon_env_builder.num_pageservers = 2 (host, port) = httpserver_listen_address @@ -450,7 +450,7 @@ def test_sharding_service_compute_hook( } assert notifications[0] == expect - env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"}) + env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"}) def node_evacuated(node_id: int) -> None: counts = get_node_shard_counts(env, [env.initial_tenant]) @@ -473,8 +473,8 @@ def test_sharding_service_compute_hook( wait_until(20, 0.25, received_migration_notification) # When we restart, we should re-emit notifications for all tenants - env.attachment_service.stop() - env.attachment_service.start() + env.storage_controller.stop() + env.storage_controller.start() def received_restart_notification(): assert len(notifications) == 3 @@ -483,7 +483,7 @@ def test_sharding_service_compute_hook( wait_until(10, 1, received_restart_notification) # Splitting a tenant should cause its stripe size to become visible in the compute notification - env.attachment_service.tenant_shard_split(env.initial_tenant, shard_count=2) + env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=2) expect = { "tenant_id": str(env.initial_tenant), "stripe_size": 32768, @@ -499,7 +499,7 @@ def test_sharding_service_compute_hook( wait_until(10, 1, received_split_notification) - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder): @@ -512,55 +512,55 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() tenant_id = TenantId.generate() - env.attachment_service.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192) + env.storage_controller.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192) # Check that the consistency check passes on a freshly setup system - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() - # These APIs are intentionally not implemented as methods on NeonAttachmentService, as + # These APIs are intentionally not implemented as methods on NeonStorageController, as # they're just for use in unanticipated circumstances. # Initial tenant (1 shard) and the one we just created (2 shards) should be visible - response = env.attachment_service.request( + response = env.storage_controller.request( "GET", - f"{env.attachment_service_api}/debug/v1/tenant", - headers=env.attachment_service.headers(TokenScope.ADMIN), + f"{env.storage_controller_api}/debug/v1/tenant", + headers=env.storage_controller.headers(TokenScope.ADMIN), ) assert len(response.json()) == 3 # Scheduler should report the expected nodes and shard counts - response = env.attachment_service.request( - "GET", f"{env.attachment_service_api}/debug/v1/scheduler" + response = env.storage_controller.request( + "GET", f"{env.storage_controller_api}/debug/v1/scheduler" ) # Two nodes, in a dict of node_id->node assert len(response.json()["nodes"]) == 2 assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3 assert all(v["may_schedule"] for v in response.json()["nodes"].values()) - response = env.attachment_service.request( + response = env.storage_controller.request( "POST", - f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop", - headers=env.attachment_service.headers(TokenScope.ADMIN), + f"{env.storage_controller_api}/debug/v1/node/{env.pageservers[1].id}/drop", + headers=env.storage_controller.headers(TokenScope.ADMIN), ) - assert len(env.attachment_service.node_list()) == 1 + assert len(env.storage_controller.node_list()) == 1 - response = env.attachment_service.request( + response = env.storage_controller.request( "POST", - f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop", - headers=env.attachment_service.headers(TokenScope.ADMIN), + f"{env.storage_controller_api}/debug/v1/tenant/{tenant_id}/drop", + headers=env.storage_controller.headers(TokenScope.ADMIN), ) # Tenant drop should be reflected in dump output - response = env.attachment_service.request( + response = env.storage_controller.request( "GET", - f"{env.attachment_service_api}/debug/v1/tenant", - headers=env.attachment_service.headers(TokenScope.ADMIN), + f"{env.storage_controller_api}/debug/v1/tenant", + headers=env.storage_controller.headers(TokenScope.ADMIN), ) assert len(response.json()) == 1 # Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're # meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind. - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_s3_time_travel_recovery( @@ -584,10 +584,10 @@ def test_sharding_service_s3_time_travel_recovery( neon_env_builder.num_pageservers = 1 env = neon_env_builder.init_start() - virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True) + virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True) tenant_id = TenantId.generate() - env.attachment_service.tenant_create( + env.storage_controller.tenant_create( tenant_id, shard_count=2, shard_stripe_size=8192, @@ -595,7 +595,7 @@ def test_sharding_service_s3_time_travel_recovery( ) # Check that the consistency check passes - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() branch_name = "main" timeline_id = env.neon_cli.create_timeline( @@ -670,28 +670,28 @@ def test_sharding_service_s3_time_travel_recovery( with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: endpoint.safe_psql("SELECT * FROM created_foo;") - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder): neon_env_builder.auth_enabled = True env = neon_env_builder.init_start() - svc = env.attachment_service - api = env.attachment_service_api + svc = env.storage_controller + api = env.storage_controller_api tenant_id = TenantId.generate() body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)} # No token with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Unauthorized: missing authorization header", ): - svc.request("POST", f"{env.attachment_service_api}/v1/tenant", json=body) + svc.request("POST", f"{env.storage_controller_api}/v1/tenant", json=body) # Token with incorrect scope with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Forbidden: JWT authentication error", ): svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN)) @@ -703,14 +703,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder): # No token with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Unauthorized: missing authorization header", ): svc.request("GET", f"{api}/debug/v1/tenant") # Token with incorrect scope with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Forbidden: JWT authentication error", ): svc.request( @@ -719,14 +719,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder): # No token with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Unauthorized: missing authorization header", ): svc.request("POST", f"{api}/upcall/v1/re-attach") # Token with incorrect scope with pytest.raises( - AttachmentServiceApiException, + StorageControllerApiException, match="Forbidden: JWT authentication error", ): svc.request( @@ -743,7 +743,7 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() tenant_id = env.initial_tenant - http = env.attachment_service.pageserver_api() + http = env.storage_controller.pageserver_api() default_value = "7days" new_value = "1h" @@ -769,4 +769,4 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder): assert readback_ps.effective_config["pitr_interval"] == default_value assert "pitr_interval" not in readback_ps.tenant_specific_overrides - env.attachment_service.consistency_check() + env.storage_controller.consistency_check() diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py index cbf7059c92..205ca18050 100644 --- a/test_runner/regress/test_timeline_size.py +++ b/test_runner/regress/test_timeline_size.py @@ -1011,7 +1011,7 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder): resp = client.tenant_status(eager_tenant) assert resp["state"]["slug"] == "Active" - gen = env.attachment_service.attach_hook_issue(eager_tenant, env.pageserver.id) + gen = env.storage_controller.attach_hook_issue(eager_tenant, env.pageserver.id) client.tenant_location_conf( eager_tenant, { @@ -1071,7 +1071,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met # attach, it will consume the only permit because logical size calculation # is paused. - gen = env.attachment_service.attach_hook_issue(lazy_tenant, env.pageserver.id) + gen = env.storage_controller.attach_hook_issue(lazy_tenant, env.pageserver.id) client.tenant_location_conf( lazy_tenant, {