tests/neon_local: rename "attachment service" -> "storage controller" (#7087)

Not a user-facing change, but can break any existing `.neon` directories
created by neon_local, as the name of the database used by the storage
controller changes.

This PR changes all the locations apart from the path of
`control_plane/attachment_service` (waiting for an opportune moment to
do that one, because it's the most conflict-ish wrt ongoing PRs like
#6676 )
This commit is contained in:
John Spray
2024-03-12 11:36:27 +00:00
committed by GitHub
parent 621ea2ec44
commit 89cf714890
32 changed files with 267 additions and 275 deletions

View File

@@ -51,7 +51,7 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+) CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
# Force cargo not to print progress bar # Force cargo not to print progress bar
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1 CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
# Set PQ_LIB_DIR to make sure `attachment_service` get linked with bundled libpq (through diesel) # Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
# #

View File

@@ -30,7 +30,7 @@ use pageserver_api::controller_api::{
}; };
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
use control_plane::attachment_service::{AttachHookRequest, InspectRequest}; use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
/// State available to HTTP request handlers /// State available to HTTP request handlers
#[derive(Clone)] #[derive(Clone)]

View File

@@ -1,9 +1,3 @@
/// The attachment service mimics the aspects of the control plane API
/// that are required for a pageserver to operate.
///
/// This enables running & testing pageservers without a full-blown
/// deployment of the Neon cloud platform.
///
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use attachment_service::http::make_router; use attachment_service::http::make_router;
use attachment_service::metrics::preinitialize_metrics; use attachment_service::metrics::preinitialize_metrics;

View File

@@ -20,7 +20,7 @@ use crate::node::Node;
/// ## What do we store? /// ## What do we store?
/// ///
/// The attachment service does not store most of its state durably. /// The storage controller service does not store most of its state durably.
/// ///
/// The essential things to store durably are: /// The essential things to store durably are:
/// - generation numbers, as these must always advance monotonically to ensure data safety. /// - generation numbers, as these must always advance monotonically to ensure data safety.
@@ -34,7 +34,7 @@ use crate::node::Node;
/// ///
/// ## Performance/efficiency /// ## Performance/efficiency
/// ///
/// The attachment service does not go via the database for most things: there are /// The storage controller service does not go via the database for most things: there are
/// a couple of places where we must, and where efficiency matters: /// a couple of places where we must, and where efficiency matters:
/// - Incrementing generation numbers: the Reconciler has to wait for this to complete /// - Incrementing generation numbers: the Reconciler has to wait for this to complete
/// before it can attach a tenant, so this acts as a bound on how fast things like /// before it can attach a tenant, so this acts as a bound on how fast things like

View File

@@ -8,7 +8,7 @@ use std::{
}; };
use anyhow::Context; use anyhow::Context;
use control_plane::attachment_service::{ use control_plane::storage_controller::{
AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse, AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
}; };
use diesel::result::DatabaseErrorKind; use diesel::result::DatabaseErrorKind;
@@ -839,7 +839,7 @@ impl Service {
tenant_state.generation = Some(new_generation); tenant_state.generation = Some(new_generation);
} else { } else {
// This is a detach notification. We must update placement policy to avoid re-attaching // This is a detach notification. We must update placement policy to avoid re-attaching
// during background scheduling/reconciliation, or during attachment service restart. // during background scheduling/reconciliation, or during storage controller restart.
assert!(attach_req.node_id.is_none()); assert!(attach_req.node_id.is_none());
tenant_state.policy = PlacementPolicy::Detached; tenant_state.policy = PlacementPolicy::Detached;
} }

View File

@@ -8,11 +8,11 @@
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
use compute_api::spec::ComputeMode; use compute_api::spec::ComputeMode;
use control_plane::attachment_service::AttachmentService;
use control_plane::endpoint::ComputeControlPlane; use control_plane::endpoint::ComputeControlPlane;
use control_plane::local_env::{InitForceMode, LocalEnv}; use control_plane::local_env::{InitForceMode, LocalEnv};
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR}; use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
use control_plane::safekeeper::SafekeeperNode; use control_plane::safekeeper::SafekeeperNode;
use control_plane::storage_controller::StorageController;
use control_plane::{broker, local_env}; use control_plane::{broker, local_env};
use pageserver_api::controller_api::{ use pageserver_api::controller_api::{
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy, NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -138,7 +138,7 @@ fn main() -> Result<()> {
"start" => rt.block_on(handle_start_all(sub_args, &env)), "start" => rt.block_on(handle_start_all(sub_args, &env)),
"stop" => rt.block_on(handle_stop_all(sub_args, &env)), "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)), "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
"attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)), "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
"safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)), "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
"endpoint" => rt.block_on(handle_endpoint(sub_args, &env)), "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
"mappings" => handle_mappings(sub_args, &mut env), "mappings" => handle_mappings(sub_args, &mut env),
@@ -445,14 +445,14 @@ async fn handle_tenant(
// If tenant ID was not specified, generate one // If tenant ID was not specified, generate one
let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate); let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
// We must register the tenant with the attachment service, so // We must register the tenant with the storage controller, so
// that when the pageserver restarts, it will be re-attached. // that when the pageserver restarts, it will be re-attached.
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
attachment_service storage_controller
.tenant_create(TenantCreateRequest { .tenant_create(TenantCreateRequest {
// Note that ::unsharded here isn't actually because the tenant is unsharded, its because the // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
// attachment service expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest // storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
// type is used both in attachment service (for creating tenants) and in pageserver (for creating shards) // type is used both in storage controller (for creating tenants) and in pageserver (for creating shards)
new_tenant_id: TenantShardId::unsharded(tenant_id), new_tenant_id: TenantShardId::unsharded(tenant_id),
generation: None, generation: None,
shard_parameters: ShardParameters { shard_parameters: ShardParameters {
@@ -476,9 +476,9 @@ async fn handle_tenant(
.context("Failed to parse postgres version from the argument string")?; .context("Failed to parse postgres version from the argument string")?;
// FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
// different shards picking different start lsns. Maybe we have to teach attachment service // different shards picking different start lsns. Maybe we have to teach storage controller
// to let shard 0 branch first and then propagate the chosen LSN to other shards. // to let shard 0 branch first and then propagate the chosen LSN to other shards.
attachment_service storage_controller
.tenant_timeline_create( .tenant_timeline_create(
tenant_id, tenant_id,
TimelineCreateRequest { TimelineCreateRequest {
@@ -528,8 +528,8 @@ async fn handle_tenant(
let new_pageserver = get_pageserver(env, matches)?; let new_pageserver = get_pageserver(env, matches)?;
let new_pageserver_id = new_pageserver.conf.id; let new_pageserver_id = new_pageserver.conf.id;
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
attachment_service storage_controller
.tenant_migrate(tenant_shard_id, new_pageserver_id) .tenant_migrate(tenant_shard_id, new_pageserver_id)
.await?; .await?;
@@ -543,8 +543,8 @@ async fn handle_tenant(
let mut tenant_synthetic_size = None; let mut tenant_synthetic_size = None;
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
for shard in attachment_service.tenant_locate(tenant_id).await?.shards { for shard in storage_controller.tenant_locate(tenant_id).await?.shards {
let pageserver = let pageserver =
PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?); PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?);
@@ -586,8 +586,8 @@ async fn handle_tenant(
let tenant_id = get_tenant_id(matches, env)?; let tenant_id = get_tenant_id(matches, env)?;
let shard_count: u8 = matches.get_one::<u8>("shard-count").cloned().unwrap_or(0); let shard_count: u8 = matches.get_one::<u8>("shard-count").cloned().unwrap_or(0);
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
let result = attachment_service let result = storage_controller
.tenant_split(tenant_id, shard_count) .tenant_split(tenant_id, shard_count)
.await?; .await?;
println!( println!(
@@ -613,7 +613,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
match timeline_match.subcommand() { match timeline_match.subcommand() {
Some(("list", list_match)) => { Some(("list", list_match)) => {
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
// where shard 0 is attached, and query there. // where shard 0 is attached, and query there.
let tenant_shard_id = get_tenant_shard_id(list_match, env)?; let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
let timelines = pageserver.timeline_list(&tenant_shard_id).await?; let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
@@ -633,7 +633,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
let new_timeline_id_opt = parse_timeline_id(create_match)?; let new_timeline_id_opt = parse_timeline_id(create_match)?;
let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate()); let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
let create_req = TimelineCreateRequest { let create_req = TimelineCreateRequest {
new_timeline_id, new_timeline_id,
ancestor_timeline_id: None, ancestor_timeline_id: None,
@@ -641,7 +641,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
ancestor_start_lsn: None, ancestor_start_lsn: None,
pg_version: Some(pg_version), pg_version: Some(pg_version),
}; };
let timeline_info = attachment_service let timeline_info = storage_controller
.tenant_timeline_create(tenant_id, create_req) .tenant_timeline_create(tenant_id, create_req)
.await?; .await?;
@@ -730,7 +730,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
.transpose() .transpose()
.context("Failed to parse ancestor start Lsn from the request")?; .context("Failed to parse ancestor start Lsn from the request")?;
let new_timeline_id = TimelineId::generate(); let new_timeline_id = TimelineId::generate();
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
let create_req = TimelineCreateRequest { let create_req = TimelineCreateRequest {
new_timeline_id, new_timeline_id,
ancestor_timeline_id: Some(ancestor_timeline_id), ancestor_timeline_id: Some(ancestor_timeline_id),
@@ -738,7 +738,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
ancestor_start_lsn: start_lsn, ancestor_start_lsn: start_lsn,
pg_version: None, pg_version: None,
}; };
let timeline_info = attachment_service let timeline_info = storage_controller
.tenant_timeline_create(tenant_id, create_req) .tenant_timeline_create(tenant_id, create_req)
.await?; .await?;
@@ -767,7 +767,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
match sub_name { match sub_name {
"list" => { "list" => {
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
// where shard 0 is attached, and query there. // where shard 0 is attached, and query there.
let tenant_shard_id = get_tenant_shard_id(sub_args, env)?; let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
let timeline_infos = get_timeline_infos(env, &tenant_shard_id) let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
@@ -952,21 +952,21 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
( (
vec![(parsed.0, parsed.1.unwrap_or(5432))], vec![(parsed.0, parsed.1.unwrap_or(5432))],
// If caller is telling us what pageserver to use, this is not a tenant which is // If caller is telling us what pageserver to use, this is not a tenant which is
// full managed by attachment service, therefore not sharded. // full managed by storage controller, therefore not sharded.
ShardParameters::DEFAULT_STRIPE_SIZE, ShardParameters::DEFAULT_STRIPE_SIZE,
) )
} else { } else {
// Look up the currently attached location of the tenant, and its striping metadata, // Look up the currently attached location of the tenant, and its striping metadata,
// to pass these on to postgres. // to pass these on to postgres.
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
let locate_result = attachment_service.tenant_locate(endpoint.tenant_id).await?; let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
let pageservers = locate_result let pageservers = locate_result
.shards .shards
.into_iter() .into_iter()
.map(|shard| { .map(|shard| {
( (
Host::parse(&shard.listen_pg_addr) Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported bad hostname"), .expect("Storage controller reported bad hostname"),
shard.listen_pg_port, shard.listen_pg_port,
) )
}) })
@@ -1015,8 +1015,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
pageserver.pg_connection_config.port(), pageserver.pg_connection_config.port(),
)] )]
} else { } else {
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
attachment_service storage_controller
.tenant_locate(endpoint.tenant_id) .tenant_locate(endpoint.tenant_id)
.await? .await?
.shards .shards
@@ -1024,7 +1024,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
.map(|shard| { .map(|shard| {
( (
Host::parse(&shard.listen_pg_addr) Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported malformed host"), .expect("Storage controller reported malformed host"),
shard.listen_pg_port, shard.listen_pg_port,
) )
}) })
@@ -1144,8 +1144,8 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
let scheduling = subcommand_args.get_one("scheduling"); let scheduling = subcommand_args.get_one("scheduling");
let availability = subcommand_args.get_one("availability"); let availability = subcommand_args.get_one("availability");
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
attachment_service storage_controller
.node_configure(NodeConfigureRequest { .node_configure(NodeConfigureRequest {
node_id: pageserver.conf.id, node_id: pageserver.conf.id,
scheduling: scheduling.cloned(), scheduling: scheduling.cloned(),
@@ -1170,11 +1170,11 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
Ok(()) Ok(())
} }
async fn handle_attachment_service( async fn handle_storage_controller(
sub_match: &ArgMatches, sub_match: &ArgMatches,
env: &local_env::LocalEnv, env: &local_env::LocalEnv,
) -> Result<()> { ) -> Result<()> {
let svc = AttachmentService::from_env(env); let svc = StorageController::from_env(env);
match sub_match.subcommand() { match sub_match.subcommand() {
Some(("start", _start_match)) => { Some(("start", _start_match)) => {
if let Err(e) = svc.start().await { if let Err(e) = svc.start().await {
@@ -1194,8 +1194,8 @@ async fn handle_attachment_service(
exit(1); exit(1);
} }
} }
Some((sub_name, _)) => bail!("Unexpected attachment_service subcommand '{}'", sub_name), Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name),
None => bail!("no attachment_service subcommand provided"), None => bail!("no storage_controller subcommand provided"),
} }
Ok(()) Ok(())
} }
@@ -1280,11 +1280,11 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
broker::start_broker_process(env).await?; broker::start_broker_process(env).await?;
// Only start the attachment service if the pageserver is configured to need it // Only start the storage controller if the pageserver is configured to need it
if env.control_plane_api.is_some() { if env.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
if let Err(e) = attachment_service.start().await { if let Err(e) = storage_controller.start().await {
eprintln!("attachment_service start failed: {:#}", e); eprintln!("storage_controller start failed: {:#}", e);
try_stop_all(env, true).await; try_stop_all(env, true).await;
exit(1); exit(1);
} }
@@ -1356,9 +1356,9 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
} }
if env.control_plane_api.is_some() { if env.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env); let storage_controller = StorageController::from_env(env);
if let Err(e) = attachment_service.stop(immediate).await { if let Err(e) = storage_controller.stop(immediate).await {
eprintln!("attachment service stop failed: {e:#}"); eprintln!("storage controller stop failed: {e:#}");
} }
} }
} }
@@ -1618,9 +1618,9 @@ fn cli() -> Command {
) )
) )
.subcommand( .subcommand(
Command::new("attachment_service") Command::new("storage_controller")
.arg_required_else_help(true) .arg_required_else_help(true)
.about("Manage attachment_service") .about("Manage storage_controller")
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone())) .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.subcommand(Command::new("stop").about("Stop local pageserver") .subcommand(Command::new("stop").about("Stop local pageserver")
.arg(stop_mode_arg.clone())) .arg(stop_mode_arg.clone()))

View File

@@ -57,9 +57,9 @@ use serde::{Deserialize, Serialize};
use url::Host; use url::Host;
use utils::id::{NodeId, TenantId, TimelineId}; use utils::id::{NodeId, TenantId, TimelineId};
use crate::attachment_service::AttachmentService;
use crate::local_env::LocalEnv; use crate::local_env::LocalEnv;
use crate::postgresql_conf::PostgresConf; use crate::postgresql_conf::PostgresConf;
use crate::storage_controller::StorageController;
use compute_api::responses::{ComputeState, ComputeStatus}; use compute_api::responses::{ComputeState, ComputeStatus};
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec}; use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
@@ -750,17 +750,17 @@ impl Endpoint {
let postgresql_conf = self.read_postgresql_conf()?; let postgresql_conf = self.read_postgresql_conf()?;
spec.cluster.postgresql_conf = Some(postgresql_conf); spec.cluster.postgresql_conf = Some(postgresql_conf);
// If we weren't given explicit pageservers, query the attachment service // If we weren't given explicit pageservers, query the storage controller
if pageservers.is_empty() { if pageservers.is_empty() {
let attachment_service = AttachmentService::from_env(&self.env); let storage_controller = StorageController::from_env(&self.env);
let locate_result = attachment_service.tenant_locate(self.tenant_id).await?; let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
pageservers = locate_result pageservers = locate_result
.shards .shards
.into_iter() .into_iter()
.map(|shard| { .map(|shard| {
( (
Host::parse(&shard.listen_pg_addr) Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported bad hostname"), .expect("Storage controller reported bad hostname"),
shard.listen_pg_port, shard.listen_pg_port,
) )
}) })

View File

@@ -6,7 +6,6 @@
//! local installations. //! local installations.
#![deny(clippy::undocumented_unsafe_blocks)] #![deny(clippy::undocumented_unsafe_blocks)]
pub mod attachment_service;
mod background_process; mod background_process;
pub mod broker; pub mod broker;
pub mod endpoint; pub mod endpoint;
@@ -14,3 +13,4 @@ pub mod local_env;
pub mod pageserver; pub mod pageserver;
pub mod postgresql_conf; pub mod postgresql_conf;
pub mod safekeeper; pub mod safekeeper;
pub mod storage_controller;

View File

@@ -72,13 +72,13 @@ pub struct LocalEnv {
#[serde(default)] #[serde(default)]
pub safekeepers: Vec<SafekeeperConf>, pub safekeepers: Vec<SafekeeperConf>,
// Control plane upcall API for pageserver: if None, we will not run attachment_service. If set, this will // Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
// be propagated into each pageserver's configuration. // be propagated into each pageserver's configuration.
#[serde(default)] #[serde(default)]
pub control_plane_api: Option<Url>, pub control_plane_api: Option<Url>,
// Control plane upcall API for attachment service. If set, this will be propagated into the // Control plane upcall API for storage controller. If set, this will be propagated into the
// attachment service's configuration. // storage controller's configuration.
#[serde(default)] #[serde(default)]
pub control_plane_compute_hook_api: Option<Url>, pub control_plane_compute_hook_api: Option<Url>,
@@ -227,10 +227,10 @@ impl LocalEnv {
self.neon_distrib_dir.join("pageserver") self.neon_distrib_dir.join("pageserver")
} }
pub fn attachment_service_bin(&self) -> PathBuf { pub fn storage_controller_bin(&self) -> PathBuf {
// Irrespective of configuration, attachment service binary is always // Irrespective of configuration, storage controller binary is always
// run from the same location as neon_local. This means that for compatibility // run from the same location as neon_local. This means that for compatibility
// tests that run old pageserver/safekeeper, they still run latest attachment service. // tests that run old pageserver/safekeeper, they still run latest storage controller.
let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned(); let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
neon_local_bin_dir.join("storage_controller") neon_local_bin_dir.join("storage_controller")
} }

View File

@@ -31,8 +31,8 @@ use utils::{
lsn::Lsn, lsn::Lsn,
}; };
use crate::attachment_service::AttachmentService;
use crate::local_env::PageServerConf; use crate::local_env::PageServerConf;
use crate::storage_controller::StorageController;
use crate::{background_process, local_env::LocalEnv}; use crate::{background_process, local_env::LocalEnv};
/// Directory within .neon which will be used by default for LocalFs remote storage. /// Directory within .neon which will be used by default for LocalFs remote storage.
@@ -111,7 +111,7 @@ impl PageServerNode {
control_plane_api.as_str() control_plane_api.as_str()
)); ));
// Attachment service uses the same auth as pageserver: if JWT is enabled // Storage controller uses the same auth as pageserver: if JWT is enabled
// for us, we will also need it to talk to them. // for us, we will also need it to talk to them.
if matches!(self.conf.http_auth_type, AuthType::NeonJWT) { if matches!(self.conf.http_auth_type, AuthType::NeonJWT) {
let jwt_token = self let jwt_token = self
@@ -214,12 +214,12 @@ impl PageServerNode {
// Register the node with the storage controller before starting pageserver: pageserver must be registered to // Register the node with the storage controller before starting pageserver: pageserver must be registered to
// successfully call /re-attach and finish starting up. // successfully call /re-attach and finish starting up.
if register { if register {
let attachment_service = AttachmentService::from_env(&self.env); let storage_controller = StorageController::from_env(&self.env);
let (pg_host, pg_port) = let (pg_host, pg_port) =
parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr"); parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr) let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr)
.expect("Unable to parse listen_http_addr"); .expect("Unable to parse listen_http_addr");
attachment_service storage_controller
.node_register(NodeRegisterRequest { .node_register(NodeRegisterRequest {
node_id: self.conf.id, node_id: self.conf.id,
listen_pg_addr: pg_host.to_string(), listen_pg_addr: pg_host.to_string(),

View File

@@ -24,7 +24,7 @@ use utils::{
id::{NodeId, TenantId}, id::{NodeId, TenantId},
}; };
pub struct AttachmentService { pub struct StorageController {
env: LocalEnv, env: LocalEnv,
listen: String, listen: String,
path: Utf8PathBuf, path: Utf8PathBuf,
@@ -36,7 +36,7 @@ pub struct AttachmentService {
const COMMAND: &str = "storage_controller"; const COMMAND: &str = "storage_controller";
const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16; const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct AttachHookRequest { pub struct AttachHookRequest {
@@ -59,7 +59,7 @@ pub struct InspectResponse {
pub attachment: Option<(u32, NodeId)>, pub attachment: Option<(u32, NodeId)>,
} }
impl AttachmentService { impl StorageController {
pub fn from_env(env: &LocalEnv) -> Self { pub fn from_env(env: &LocalEnv) -> Self {
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone()) let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
.unwrap() .unwrap()
@@ -136,27 +136,27 @@ impl AttachmentService {
} }
fn pid_file(&self) -> Utf8PathBuf { fn pid_file(&self) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("attachment_service.pid")) Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_controller.pid"))
.expect("non-Unicode path") .expect("non-Unicode path")
} }
/// PIDFile for the postgres instance used to store attachment service state /// PIDFile for the postgres instance used to store storage controller state
fn postgres_pid_file(&self) -> Utf8PathBuf { fn postgres_pid_file(&self) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf( Utf8PathBuf::from_path_buf(
self.env self.env
.base_data_dir .base_data_dir
.join("attachment_service_postgres.pid"), .join("storage_controller_postgres.pid"),
) )
.expect("non-Unicode path") .expect("non-Unicode path")
} }
/// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl` /// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
/// ///
/// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back /// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
/// to other versions if that one isn't found. Some automated tests create circumstances /// to other versions if that one isn't found. Some automated tests create circumstances
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`. /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> { pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14]; let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
for v in prefer_versions { for v in prefer_versions {
let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap(); let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
@@ -189,7 +189,7 @@ impl AttachmentService {
/// ///
/// Returns the database url /// Returns the database url
pub async fn setup_database(&self) -> anyhow::Result<String> { pub async fn setup_database(&self) -> anyhow::Result<String> {
const DB_NAME: &str = "attachment_service"; const DB_NAME: &str = "storage_controller";
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port); let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
let pg_bin_dir = self.get_pg_bin_dir().await?; let pg_bin_dir = self.get_pg_bin_dir().await?;
@@ -219,10 +219,10 @@ impl AttachmentService {
} }
pub async fn start(&self) -> anyhow::Result<()> { pub async fn start(&self) -> anyhow::Result<()> {
// Start a vanilla Postgres process used by the attachment service for persistence. // Start a vanilla Postgres process used by the storage controller for persistence.
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone()) let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
.unwrap() .unwrap()
.join("attachment_service_db"); .join("storage_controller_db");
let pg_bin_dir = self.get_pg_bin_dir().await?; let pg_bin_dir = self.get_pg_bin_dir().await?;
let pg_log_path = pg_data_path.join("postgres.log"); let pg_log_path = pg_data_path.join("postgres.log");
@@ -245,7 +245,7 @@ impl AttachmentService {
.await?; .await?;
}; };
println!("Starting attachment service database..."); println!("Starting storage controller database...");
let db_start_args = [ let db_start_args = [
"-w", "-w",
"-D", "-D",
@@ -256,7 +256,7 @@ impl AttachmentService {
]; ];
background_process::start_process( background_process::start_process(
"attachment_service_db", "storage_controller_db",
&self.env.base_data_dir, &self.env.base_data_dir,
pg_bin_dir.join("pg_ctl").as_std_path(), pg_bin_dir.join("pg_ctl").as_std_path(),
db_start_args, db_start_args,
@@ -300,7 +300,7 @@ impl AttachmentService {
background_process::start_process( background_process::start_process(
COMMAND, COMMAND,
&self.env.base_data_dir, &self.env.base_data_dir,
&self.env.attachment_service_bin(), &self.env.storage_controller_bin(),
args, args,
[( [(
"NEON_REPO_DIR".to_string(), "NEON_REPO_DIR".to_string(),
@@ -322,10 +322,10 @@ impl AttachmentService {
pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> { pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
background_process::stop_process(immediate, COMMAND, &self.pid_file())?; background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
let pg_data_path = self.env.base_data_dir.join("attachment_service_db"); let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
let pg_bin_dir = self.get_pg_bin_dir().await?; let pg_bin_dir = self.get_pg_bin_dir().await?;
println!("Stopping attachment service database..."); println!("Stopping storage controller database...");
let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"]; let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
let stop_status = Command::new(pg_bin_dir.join("pg_ctl")) let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
.args(pg_stop_args) .args(pg_stop_args)
@@ -344,10 +344,10 @@ impl AttachmentService {
// fine that stop failed. Otherwise it is an error that stop failed. // fine that stop failed. Otherwise it is an error that stop failed.
const PG_STATUS_NOT_RUNNING: i32 = 3; const PG_STATUS_NOT_RUNNING: i32 = 3;
if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() { if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
println!("Attachment service data base is already stopped"); println!("Storage controller database is already stopped");
return Ok(()); return Ok(());
} else { } else {
anyhow::bail!("Failed to stop attachment service database: {stop_status}") anyhow::bail!("Failed to stop storage controller database: {stop_status}")
} }
} }
@@ -368,7 +368,7 @@ impl AttachmentService {
} }
} }
/// Simple HTTP request wrapper for calling into attachment service /// Simple HTTP request wrapper for calling into storage controller
async fn dispatch<RQ, RS>( async fn dispatch<RQ, RS>(
&self, &self,
method: hyper::Method, method: hyper::Method,

View File

@@ -70,9 +70,9 @@ Should only be used e.g. for status check/tenant creation/list.
Should only be used e.g. for status check. Should only be used e.g. for status check.
Currently also used for connection from any pageserver to any safekeeper. Currently also used for connection from any pageserver to any safekeeper.
"generations_api": Provides access to the upcall APIs served by the attachment service or the control plane. "generations_api": Provides access to the upcall APIs served by the storage controller or the control plane.
"admin": Provides access to the control plane and admin APIs of the attachment service. "admin": Provides access to the control plane and admin APIs of the storage controller.
### CLI ### CLI
CLI generates a key pair during call to `neon_local init` with the following commands: CLI generates a key pair during call to `neon_local init` with the following commands:

View File

@@ -88,8 +88,6 @@ impl FromStr for NodeAvailability {
} }
} }
/// FIXME: this is a duplicate of the type in the attachment_service crate, because the
/// type needs to be defined with diesel traits in there.
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)] #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
pub enum NodeSchedulingPolicy { pub enum NodeSchedulingPolicy {
Active, Active,

View File

@@ -1014,24 +1014,24 @@ class NeonEnv:
self.initial_tenant = config.initial_tenant self.initial_tenant = config.initial_tenant
self.initial_timeline = config.initial_timeline self.initial_timeline = config.initial_timeline
# Find two adjacent ports for attachment service and its postgres DB. This # Find two adjacent ports for storage controller and its postgres DB. This
# loop would eventually throw from get_port() if we run out of ports (extremely # loop would eventually throw from get_port() if we run out of ports (extremely
# unlikely): usually we find two adjacent free ports on the first iteration. # unlikely): usually we find two adjacent free ports on the first iteration.
while True: while True:
self.attachment_service_port = self.port_distributor.get_port() self.storage_controller_port = self.port_distributor.get_port()
attachment_service_pg_port = self.port_distributor.get_port() storage_controller_pg_port = self.port_distributor.get_port()
if attachment_service_pg_port == self.attachment_service_port + 1: if storage_controller_pg_port == self.storage_controller_port + 1:
break break
# The URL for the pageserver to use as its control_plane_api config # The URL for the pageserver to use as its control_plane_api config
self.control_plane_api: str = f"http://127.0.0.1:{self.attachment_service_port}/upcall/v1" self.control_plane_api: str = f"http://127.0.0.1:{self.storage_controller_port}/upcall/v1"
# The base URL of the attachment service # The base URL of the storage controller
self.attachment_service_api: str = f"http://127.0.0.1:{self.attachment_service_port}" self.storage_controller_api: str = f"http://127.0.0.1:{self.storage_controller_port}"
# For testing this with a fake HTTP server, enable passing through a URL from config # For testing this with a fake HTTP server, enable passing through a URL from config
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
self.attachment_service: NeonAttachmentService = NeonAttachmentService( self.storage_controller: NeonStorageController = NeonStorageController(
self, config.auth_enabled self, config.auth_enabled
) )
@@ -1113,16 +1113,16 @@ class NeonEnv:
self.neon_cli.init(cfg, force=config.config_init_force) self.neon_cli.init(cfg, force=config.config_init_force)
def start(self): def start(self):
# Attachment service starts first, so that pageserver /re-attach calls don't # storage controller starts first, so that pageserver /re-attach calls don't
# bounce through retries on startup # bounce through retries on startup
self.attachment_service.start() self.storage_controller.start()
def attachment_service_ready(): def storage_controller_ready():
assert self.attachment_service.ready() is True assert self.storage_controller.ready() is True
# Wait for attachment service readiness to prevent unnecessary post start-up # Wait for storage controller readiness to prevent unnecessary post start-up
# reconcile. # reconcile.
wait_until(30, 1, attachment_service_ready) wait_until(30, 1, storage_controller_ready)
# Start up broker, pageserver and all safekeepers # Start up broker, pageserver and all safekeepers
futs = [] futs = []
@@ -1153,7 +1153,7 @@ class NeonEnv:
if ps_assert_metric_no_errors: if ps_assert_metric_no_errors:
pageserver.assert_no_metric_errors() pageserver.assert_no_metric_errors()
pageserver.stop(immediate=immediate) pageserver.stop(immediate=immediate)
self.attachment_service.stop(immediate=immediate) self.storage_controller.stop(immediate=immediate)
self.broker.stop(immediate=immediate) self.broker.stop(immediate=immediate)
@property @property
@@ -1188,9 +1188,9 @@ class NeonEnv:
def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]): def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]):
""" """
Get the NeonPageserver where this tenant shard is currently attached, according Get the NeonPageserver where this tenant shard is currently attached, according
to the attachment service. to the storage controller.
""" """
meta = self.attachment_service.inspect(tenant_id) meta = self.storage_controller.inspect(tenant_id)
if meta is None: if meta is None:
return None return None
pageserver_id = meta[1] pageserver_id = meta[1]
@@ -1697,12 +1697,12 @@ class NeonCli(AbstractNeonCli):
res.check_returncode() res.check_returncode()
return res return res
def attachment_service_start(self): def storage_controller_start(self):
cmd = ["attachment_service", "start"] cmd = ["storage_controller", "start"]
return self.raw_cli(cmd) return self.raw_cli(cmd)
def attachment_service_stop(self, immediate: bool): def storage_controller_stop(self, immediate: bool):
cmd = ["attachment_service", "stop"] cmd = ["storage_controller", "stop"]
if immediate: if immediate:
cmd.extend(["-m", "immediate"]) cmd.extend(["-m", "immediate"])
return self.raw_cli(cmd) return self.raw_cli(cmd)
@@ -1942,14 +1942,14 @@ class Pagectl(AbstractNeonCli):
return IndexPartDump.from_json(parsed) return IndexPartDump.from_json(parsed)
class AttachmentServiceApiException(Exception): class StorageControllerApiException(Exception):
def __init__(self, message, status_code: int): def __init__(self, message, status_code: int):
super().__init__(message) super().__init__(message)
self.message = message self.message = message
self.status_code = status_code self.status_code = status_code
class NeonAttachmentService(MetricsGetter): class NeonStorageController(MetricsGetter):
def __init__(self, env: NeonEnv, auth_enabled: bool): def __init__(self, env: NeonEnv, auth_enabled: bool):
self.env = env self.env = env
self.running = False self.running = False
@@ -1957,13 +1957,13 @@ class NeonAttachmentService(MetricsGetter):
def start(self): def start(self):
assert not self.running assert not self.running
self.env.neon_cli.attachment_service_start() self.env.neon_cli.storage_controller_start()
self.running = True self.running = True
return self return self
def stop(self, immediate: bool = False) -> "NeonAttachmentService": def stop(self, immediate: bool = False) -> "NeonStorageController":
if self.running: if self.running:
self.env.neon_cli.attachment_service_stop(immediate) self.env.neon_cli.storage_controller_stop(immediate)
self.running = False self.running = False
return self return self
@@ -1976,22 +1976,22 @@ class NeonAttachmentService(MetricsGetter):
msg = res.json()["msg"] msg = res.json()["msg"]
except: # noqa: E722 except: # noqa: E722
msg = "" msg = ""
raise AttachmentServiceApiException(msg, res.status_code) from e raise StorageControllerApiException(msg, res.status_code) from e
def pageserver_api(self) -> PageserverHttpClient: def pageserver_api(self) -> PageserverHttpClient:
""" """
The attachment service implements a subset of the pageserver REST API, for mapping The storage controller implements a subset of the pageserver REST API, for mapping
per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those
functions via the HttpClient, as an implicit check that these APIs remain compatible. functions via the HttpClient, as an implicit check that these APIs remain compatible.
""" """
auth_token = None auth_token = None
if self.auth_enabled: if self.auth_enabled:
auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API) auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API)
return PageserverHttpClient(self.env.attachment_service_port, lambda: True, auth_token) return PageserverHttpClient(self.env.storage_controller_port, lambda: True, auth_token)
def request(self, method, *args, **kwargs) -> requests.Response: def request(self, method, *args, **kwargs) -> requests.Response:
resp = requests.request(method, *args, **kwargs) resp = requests.request(method, *args, **kwargs)
NeonAttachmentService.raise_api_exception(resp) NeonStorageController.raise_api_exception(resp)
return resp return resp
@@ -2004,15 +2004,15 @@ class NeonAttachmentService(MetricsGetter):
return headers return headers
def get_metrics(self) -> Metrics: def get_metrics(self) -> Metrics:
res = self.request("GET", f"{self.env.attachment_service_api}/metrics") res = self.request("GET", f"{self.env.storage_controller_api}/metrics")
return parse_metrics(res.text) return parse_metrics(res.text)
def ready(self) -> bool: def ready(self) -> bool:
status = None status = None
try: try:
resp = self.request("GET", f"{self.env.attachment_service_api}/ready") resp = self.request("GET", f"{self.env.storage_controller_api}/ready")
status = resp.status_code status = resp.status_code
except AttachmentServiceApiException as e: except StorageControllerApiException as e:
status = e.status_code status = e.status_code
if status == 503: if status == 503:
@@ -2027,7 +2027,7 @@ class NeonAttachmentService(MetricsGetter):
) -> int: ) -> int:
response = self.request( response = self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook", f"{self.env.storage_controller_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id}, json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2038,7 +2038,7 @@ class NeonAttachmentService(MetricsGetter):
def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]): def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
self.request( self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook", f"{self.env.storage_controller_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": None}, json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2049,7 +2049,7 @@ class NeonAttachmentService(MetricsGetter):
""" """
response = self.request( response = self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/debug/v1/inspect", f"{self.env.storage_controller_api}/debug/v1/inspect",
json={"tenant_shard_id": str(tenant_shard_id)}, json={"tenant_shard_id": str(tenant_shard_id)},
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2070,7 +2070,7 @@ class NeonAttachmentService(MetricsGetter):
log.info(f"node_register({body})") log.info(f"node_register({body})")
self.request( self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/control/v1/node", f"{self.env.storage_controller_api}/control/v1/node",
json=body, json=body,
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2078,7 +2078,7 @@ class NeonAttachmentService(MetricsGetter):
def node_list(self): def node_list(self):
response = self.request( response = self.request(
"GET", "GET",
f"{self.env.attachment_service_api}/control/v1/node", f"{self.env.storage_controller_api}/control/v1/node",
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
return response.json() return response.json()
@@ -2088,7 +2088,7 @@ class NeonAttachmentService(MetricsGetter):
body["node_id"] = node_id body["node_id"] = node_id
self.request( self.request(
"PUT", "PUT",
f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config", f"{self.env.storage_controller_api}/control/v1/node/{node_id}/config",
json=body, json=body,
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2118,7 +2118,7 @@ class NeonAttachmentService(MetricsGetter):
response = self.request( response = self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/v1/tenant", f"{self.env.storage_controller_api}/v1/tenant",
json=body, json=body,
headers=self.headers(TokenScope.PAGE_SERVER_API), headers=self.headers(TokenScope.PAGE_SERVER_API),
) )
@@ -2130,7 +2130,7 @@ class NeonAttachmentService(MetricsGetter):
""" """
response = self.request( response = self.request(
"GET", "GET",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate", f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/locate",
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
body = response.json() body = response.json()
@@ -2140,7 +2140,7 @@ class NeonAttachmentService(MetricsGetter):
def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]: def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]:
response = self.request( response = self.request(
"PUT", "PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split", f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/shard_split",
json={"new_shard_count": shard_count}, json={"new_shard_count": shard_count},
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2152,7 +2152,7 @@ class NeonAttachmentService(MetricsGetter):
def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int): def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
self.request( self.request(
"PUT", "PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate", f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_shard_id}/migrate",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id}, json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
@@ -2165,12 +2165,12 @@ class NeonAttachmentService(MetricsGetter):
""" """
self.request( self.request(
"POST", "POST",
f"{self.env.attachment_service_api}/debug/v1/consistency_check", f"{self.env.storage_controller_api}/debug/v1/consistency_check",
headers=self.headers(TokenScope.ADMIN), headers=self.headers(TokenScope.ADMIN),
) )
log.info("Attachment service passed consistency check") log.info("storage controller passed consistency check")
def __enter__(self) -> "NeonAttachmentService": def __enter__(self) -> "NeonStorageController":
return self return self
def __exit__( def __exit__(
@@ -2401,7 +2401,7 @@ class NeonPageserver(PgProtocol):
""" """
client = self.http_client() client = self.http_client()
if generation is None: if generation is None:
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
return client.tenant_attach( return client.tenant_attach(
tenant_id, tenant_id,
config, config,
@@ -2410,14 +2410,14 @@ class NeonPageserver(PgProtocol):
) )
def tenant_detach(self, tenant_id: TenantId): def tenant_detach(self, tenant_id: TenantId):
self.env.attachment_service.attach_hook_drop(tenant_id) self.env.storage_controller.attach_hook_drop(tenant_id)
client = self.http_client() client = self.http_client()
return client.tenant_detach(tenant_id) return client.tenant_detach(tenant_id)
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs): def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
if config["mode"].startswith("Attached") and "generation" not in config: if config["mode"].startswith("Attached") and "generation" not in config:
config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) config["generation"] = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
client = self.http_client() client = self.http_client()
return client.tenant_location_conf(tenant_id, config, **kwargs) return client.tenant_location_conf(tenant_id, config, **kwargs)
@@ -2441,14 +2441,14 @@ class NeonPageserver(PgProtocol):
generation: Optional[int] = None, generation: Optional[int] = None,
) -> TenantId: ) -> TenantId:
if generation is None: if generation is None:
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id) generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
client = self.http_client(auth_token=auth_token) client = self.http_client(auth_token=auth_token)
return client.tenant_create(tenant_id, conf, generation=generation) return client.tenant_create(tenant_id, conf, generation=generation)
def tenant_load(self, tenant_id: TenantId): def tenant_load(self, tenant_id: TenantId):
client = self.http_client() client = self.http_client()
return client.tenant_load( return client.tenant_load(
tenant_id, generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id) tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
) )
@@ -3907,7 +3907,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
psql_path = os.path.join(pg_bin.pg_bin_path, "psql") psql_path = os.path.join(pg_bin.pg_bin_path, "psql")
pageserver_id = env.attachment_service.locate(endpoint.tenant_id)[0]["node_id"] pageserver_id = env.storage_controller.locate(endpoint.tenant_id)[0]["node_id"]
cmd = rf""" cmd = rf"""
{psql_path} \ {psql_path} \
--no-psqlrc \ --no-psqlrc \
@@ -3994,7 +3994,7 @@ def tenant_get_shards(
us to figure out the shards for a tenant. us to figure out the shards for a tenant.
If the caller provides `pageserver_id`, it will be used for all shards, even If the caller provides `pageserver_id`, it will be used for all shards, even
if the shard is indicated by attachment service to be on some other pageserver. if the shard is indicated by storage controller to be on some other pageserver.
Caller should over the response to apply their per-pageserver action to Caller should over the response to apply their per-pageserver action to
each shard each shard
@@ -4010,7 +4010,7 @@ def tenant_get_shards(
TenantShardId.parse(s["shard_id"]), TenantShardId.parse(s["shard_id"]),
override_pageserver or env.get_pageserver(s["node_id"]), override_pageserver or env.get_pageserver(s["node_id"]),
) )
for s in env.attachment_service.locate(tenant_id) for s in env.storage_controller.locate(tenant_id)
] ]
else: else:
# Assume an unsharded tenant # Assume an unsharded tenant

View File

@@ -43,7 +43,7 @@ def single_timeline(
log.info("detach template tenant form pageserver") log.info("detach template tenant form pageserver")
env.pageserver.tenant_detach(template_tenant) env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append( env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*", ".*Dropped remote consistent LSN updates.*",
) )

View File

@@ -56,7 +56,7 @@ def setup_env(
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant) env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append( env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*", ".*Dropped remote consistent LSN updates.*",
) )
env.pageserver.tenant_attach(template_tenant, config) env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -92,7 +92,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant) env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append( env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*", ".*Dropped remote consistent LSN updates.*",
) )
env.pageserver.tenant_attach(template_tenant, config) env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -114,7 +114,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True) template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant) env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append( env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely # tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*", ".*Dropped remote consistent LSN updates.*",
) )
env.pageserver.tenant_attach(template_tenant, config) env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -56,12 +56,12 @@ def measure_recovery_time(env: NeonCompare):
# Delete the Tenant in the pageserver: this will drop local and remote layers, such that # Delete the Tenant in the pageserver: this will drop local and remote layers, such that
# when we "create" the Tenant again, we will replay the WAL from the beginning. # when we "create" the Tenant again, we will replay the WAL from the beginning.
# #
# This is a "weird" thing to do, and can confuse the attachment service as we're re-using # This is a "weird" thing to do, and can confuse the storage controller as we're re-using
# the same tenant ID for a tenant that is logically different from the pageserver's point # the same tenant ID for a tenant that is logically different from the pageserver's point
# of view, but the same as far as the safekeeper/WAL is concerned. To work around that, # of view, but the same as far as the safekeeper/WAL is concerned. To work around that,
# we will explicitly create the tenant in the same generation that it was previously # we will explicitly create the tenant in the same generation that it was previously
# attached in. # attached in.
attach_status = env.env.attachment_service.inspect(tenant_shard_id=env.tenant) attach_status = env.env.storage_controller.inspect(tenant_shard_id=env.tenant)
assert attach_status is not None assert attach_status is not None
(attach_gen, _) = attach_status (attach_gen, _) = attach_status

View File

@@ -137,7 +137,7 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
ps_http.tenant_detach(tenant_id) ps_http.tenant_detach(tenant_id)
assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()] assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
body = {"generation": env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)} body = {"generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)}
ps_http.post( ps_http.post(
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach", f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",

View File

@@ -85,9 +85,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
# the endpoint. Whereas the previous reconfiguration was like a healthy migration, this # the endpoint. Whereas the previous reconfiguration was like a healthy migration, this
# is more like what happens in an unexpected pageserver failure. # is more like what happens in an unexpected pageserver failure.
# #
# Since we're dual-attached, need to tip-off attachment service to treat the one we're # Since we're dual-attached, need to tip-off storage controller to treat the one we're
# about to start as the attached pageserver # about to start as the attached pageserver
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id) env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
env.pageservers[0].start() env.pageservers[0].start()
env.pageservers[1].stop() env.pageservers[1].stop()
@@ -97,9 +97,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
assert fetchone() == (100000,) assert fetchone() == (100000,)
env.pageservers[0].stop() env.pageservers[0].stop()
# Since we're dual-attached, need to tip-off attachment service to treat the one we're # Since we're dual-attached, need to tip-off storage controller to treat the one we're
# about to start as the attached pageserver # about to start as the attached pageserver
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[1].id) env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
env.pageservers[1].start() env.pageservers[1].start()
# Test a (former) bug where a child process spins without updating its connection string # Test a (former) bug where a child process spins without updating its connection string

View File

@@ -133,7 +133,7 @@ def test_create_snapshot(
for sk in env.safekeepers: for sk in env.safekeepers:
sk.stop() sk.stop()
env.pageserver.stop() env.pageserver.stop()
env.attachment_service.stop() env.storage_controller.stop()
# Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it # Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
compatibility_snapshot_dir = ( compatibility_snapshot_dir = (

View File

@@ -159,7 +159,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
def get_generation_number(): def get_generation_number():
attachment = env.attachment_service.inspect(tenant_id) attachment = env.storage_controller.inspect(tenant_id)
assert attachment is not None assert attachment is not None
return attachment[0] return attachment[0]

View File

@@ -133,7 +133,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
# Stop default ps/sk # Stop default ps/sk
env.neon_cli.pageserver_stop(env.pageserver.id) env.neon_cli.pageserver_stop(env.pageserver.id)
env.neon_cli.safekeeper_stop() env.neon_cli.safekeeper_stop()
env.neon_cli.attachment_service_stop(False) env.neon_cli.storage_controller_stop(False)
# Keep NeonEnv state up to date, it usually owns starting/stopping services # Keep NeonEnv state up to date, it usually owns starting/stopping services
env.pageserver.running = False env.pageserver.running = False
@@ -175,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2) env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)
# Stop this to get out of the way of the following `start` # Stop this to get out of the way of the following `start`
env.neon_cli.attachment_service_stop(False) env.neon_cli.storage_controller_stop(False)
# Default start # Default start
res = env.neon_cli.raw_cli(["start"]) res = env.neon_cli.raw_cli(["start"])

View File

@@ -73,7 +73,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
# create new tenant and check it is also there # create new tenant and check it is also there
tenant_id = TenantId.generate() tenant_id = TenantId.generate()
client.tenant_create( client.tenant_create(
tenant_id, generation=env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id) tenant_id, generation=env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
) )
assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()} assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}

View File

@@ -203,7 +203,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
env.broker.try_start() env.broker.try_start()
for sk in env.safekeepers: for sk in env.safekeepers:
sk.start() sk.start()
env.attachment_service.start() env.storage_controller.start()
env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',)) env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
@@ -285,7 +285,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_pageservers = 2 neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"] attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
main_pageserver = env.get_pageserver(attached_to_id) main_pageserver = env.get_pageserver(attached_to_id)
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0] other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
@@ -310,7 +310,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
# Now advance the generation in the control plane: subsequent validations # Now advance the generation in the control plane: subsequent validations
# from the running pageserver will fail. No more deletions should happen. # from the running pageserver will fail. No more deletions should happen.
env.attachment_service.attach_hook_issue(env.initial_tenant, other_pageserver.id) env.storage_controller.attach_hook_issue(env.initial_tenant, other_pageserver.id)
generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver) generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver)
assert_deletion_queue(ps_http, lambda n: n > 0) assert_deletion_queue(ps_http, lambda n: n > 0)
@@ -366,7 +366,7 @@ def test_deletion_queue_recovery(
neon_env_builder.num_pageservers = 2 neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"] attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
main_pageserver = env.get_pageserver(attached_to_id) main_pageserver = env.get_pageserver(attached_to_id)
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0] other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
@@ -428,7 +428,7 @@ def test_deletion_queue_recovery(
if keep_attachment == KeepAttachment.LOSE: if keep_attachment == KeepAttachment.LOSE:
some_other_pageserver = other_pageserver.id some_other_pageserver = other_pageserver.id
env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver) env.storage_controller.attach_hook_issue(env.initial_tenant, some_other_pageserver)
main_pageserver.start() main_pageserver.start()
@@ -494,7 +494,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
) )
# Simulate a major incident: the control plane goes offline # Simulate a major incident: the control plane goes offline
env.attachment_service.stop() env.storage_controller.stop()
# Remember how many validations had happened before the control plane went offline # Remember how many validations had happened before the control plane went offline
validated = get_deletion_queue_validated(ps_http) validated = get_deletion_queue_validated(ps_http)
@@ -525,7 +525,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
assert get_deletion_queue_executed(ps_http) == 0 assert get_deletion_queue_executed(ps_http) == 0
# When the control plane comes back up, normal service should resume # When the control plane comes back up, normal service should resume
env.attachment_service.start() env.storage_controller.start()
ps_http.deletion_queue_flush(execute=True) ps_http.deletion_queue_flush(execute=True)
assert get_deletion_queue_depth(ps_http) == 0 assert get_deletion_queue_depth(ps_http) == 0

View File

@@ -157,7 +157,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
workload.churn_rows(rng.randint(128, 256), pageserver.id) workload.churn_rows(rng.randint(128, 256), pageserver.id)
workload.validate(pageserver.id) workload.validate(pageserver.id)
elif last_state_ps[0].startswith("Attached"): elif last_state_ps[0].startswith("Attached"):
# The `attachment_service` will only re-attach on startup when a pageserver was the # The `storage_controller` will only re-attach on startup when a pageserver was the
# holder of the latest generation: otherwise the pageserver will revert to detached # holder of the latest generation: otherwise the pageserver will revert to detached
# state if it was running attached with a stale generation # state if it was running attached with a stale generation
last_state[pageserver.id] = ("Detached", None) last_state[pageserver.id] = ("Detached", None)
@@ -182,12 +182,12 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
generation = last_state_ps[1] generation = last_state_ps[1]
else: else:
# Switch generations, while also jumping between attached states # Switch generations, while also jumping between attached states
generation = env.attachment_service.attach_hook_issue( generation = env.storage_controller.attach_hook_issue(
tenant_id, pageserver.id tenant_id, pageserver.id
) )
latest_attached = pageserver.id latest_attached = pageserver.id
else: else:
generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id) generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver.id)
latest_attached = pageserver.id latest_attached = pageserver.id
else: else:
generation = None generation = None
@@ -273,7 +273,7 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
# Encourage the new location to download while still in secondary mode # Encourage the new location to download while still in secondary mode
pageserver_b.http_client().tenant_secondary_download(tenant_id) pageserver_b.http_client().tenant_secondary_download(tenant_id)
migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id) migrated_generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver_b.id)
log.info(f"Acquired generation {migrated_generation} for destination pageserver") log.info(f"Acquired generation {migrated_generation} for destination pageserver")
assert migrated_generation == initial_generation + 1 assert migrated_generation == initial_generation + 1
@@ -436,7 +436,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
remote_storage_kind=RemoteStorageKind.MOCK_S3, remote_storage_kind=RemoteStorageKind.MOCK_S3,
) )
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
assert env.attachment_service is not None assert env.storage_controller is not None
assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter
tenant_id = env.initial_tenant tenant_id = env.initial_tenant

View File

@@ -169,7 +169,7 @@ def test_remote_storage_backup_and_restore(
# Ensure that even though the tenant is broken, retrying the attachment fails # Ensure that even though the tenant is broken, retrying the attachment fails
with pytest.raises(Exception, match="Tenant state is Broken"): with pytest.raises(Exception, match="Tenant state is Broken"):
# Use same generation as in previous attempt # Use same generation as in previous attempt
gen_state = env.attachment_service.inspect(tenant_id) gen_state = env.storage_controller.inspect(tenant_id)
assert gen_state is not None assert gen_state is not None
generation = gen_state[0] generation = gen_state[0]
env.pageserver.tenant_attach(tenant_id, generation=generation) env.pageserver.tenant_attach(tenant_id, generation=generation)
@@ -355,7 +355,7 @@ def test_remote_storage_upload_queue_retries(
env.pageserver.stop(immediate=True) env.pageserver.stop(immediate=True)
env.endpoints.stop_all() env.endpoints.stop_all()
# We are about to forcibly drop local dirs. Attachment service will increment generation in re-attach before # We are about to forcibly drop local dirs. Storage controller will increment generation in re-attach before
# we later increment when actually attaching it again, leading to skipping a generation and potentially getting # we later increment when actually attaching it again, leading to skipping a generation and potentially getting
# these warnings if there was a durable but un-executed deletion list at time of restart. # these warnings if there was a durable but un-executed deletion list at time of restart.
env.pageserver.allowed_errors.extend( env.pageserver.allowed_errors.extend(

View File

@@ -80,7 +80,7 @@ def test_tenant_s3_restore(
assert ( assert (
ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0 ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
), "tenant removed before we deletion was issued" ), "tenant removed before we deletion was issued"
env.attachment_service.attach_hook_drop(tenant_id) env.storage_controller.attach_hook_drop(tenant_id)
tenant_path = env.pageserver.tenant_dir(tenant_id) tenant_path = env.pageserver.tenant_dir(tenant_id)
assert not tenant_path.exists() assert not tenant_path.exists()
@@ -103,7 +103,7 @@ def test_tenant_s3_restore(
tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion
) )
generation = env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id) generation = env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
ps_http.tenant_attach(tenant_id, generation=generation) ps_http.tenant_attach(tenant_id, generation=generation)
env.pageserver.quiesce_tenants() env.pageserver.quiesce_tenants()

View File

@@ -43,7 +43,7 @@ def test_sharding_smoke(
tenant_id = env.initial_tenant tenant_id = env.initial_tenant
pageservers = dict((int(p.id), p) for p in env.pageservers) pageservers = dict((int(p.id), p) for p in env.pageservers)
shards = env.attachment_service.locate(tenant_id) shards = env.storage_controller.locate(tenant_id)
def get_sizes(): def get_sizes():
sizes = {} sizes = {}
@@ -86,7 +86,7 @@ def test_sharding_smoke(
) )
assert timelines == {env.initial_timeline, timeline_b} assert timelines == {env.initial_timeline, timeline_b}
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_split_unsharded( def test_sharding_split_unsharded(
@@ -102,7 +102,7 @@ def test_sharding_split_unsharded(
# Check that we created with an unsharded TenantShardId: this is the default, # Check that we created with an unsharded TenantShardId: this is the default,
# but check it in case we change the default in future # but check it in case we change the default in future
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 0)) is not None assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 0)) is not None
workload = Workload(env, tenant_id, timeline_id, branch_name="main") workload = Workload(env, tenant_id, timeline_id, branch_name="main")
workload.init() workload.init()
@@ -110,15 +110,15 @@ def test_sharding_split_unsharded(
workload.validate() workload.validate()
# Split one shard into two # Split one shard into two
env.attachment_service.tenant_shard_split(tenant_id, shard_count=2) env.storage_controller.tenant_shard_split(tenant_id, shard_count=2)
# Check we got the shard IDs we expected # Check we got the shard IDs we expected
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 2)) is not None assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 2)) is not None
assert env.attachment_service.inspect(TenantShardId(tenant_id, 1, 2)) is not None assert env.storage_controller.inspect(TenantShardId(tenant_id, 1, 2)) is not None
workload.validate() workload.validate()
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_split_smoke( def test_sharding_split_smoke(
@@ -161,7 +161,7 @@ def test_sharding_split_smoke(
workload.write_rows(256) workload.write_rows(256)
# Note which pageservers initially hold a shard after tenant creation # Note which pageservers initially hold a shard after tenant creation
pre_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)] pre_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
# For pageservers holding a shard, validate their ingest statistics # For pageservers holding a shard, validate their ingest statistics
# reflect a proper splitting of the WAL. # reflect a proper splitting of the WAL.
@@ -213,9 +213,9 @@ def test_sharding_split_smoke(
# Before split, old shards exist # Before split, old shards exist
assert shards_on_disk(old_shard_ids) assert shards_on_disk(old_shard_ids)
env.attachment_service.tenant_shard_split(tenant_id, shard_count=split_shard_count) env.storage_controller.tenant_shard_split(tenant_id, shard_count=split_shard_count)
post_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)] post_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
# We should have split into 8 shards, on the same 4 pageservers we started on. # We should have split into 8 shards, on the same 4 pageservers we started on.
assert len(post_split_pageserver_ids) == split_shard_count assert len(post_split_pageserver_ids) == split_shard_count
assert len(set(post_split_pageserver_ids)) == shard_count assert len(set(post_split_pageserver_ids)) == shard_count
@@ -261,7 +261,7 @@ def test_sharding_split_smoke(
# Check that we didn't do any spurious reconciliations. # Check that we didn't do any spurious reconciliations.
# Total number of reconciles should have been one per original shard, plus # Total number of reconciles should have been one per original shard, plus
# one for each shard that was migrated. # one for each shard that was migrated.
reconcile_ok = env.attachment_service.get_metric_value( reconcile_ok = env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "ok"} "storage_controller_reconcile_complete_total", filter={"status": "ok"}
) )
assert reconcile_ok == shard_count + split_shard_count // 2 assert reconcile_ok == shard_count + split_shard_count // 2
@@ -269,19 +269,19 @@ def test_sharding_split_smoke(
# Check that no cancelled or errored reconciliations occurred: this test does no # Check that no cancelled or errored reconciliations occurred: this test does no
# failure injection and should run clean. # failure injection and should run clean.
assert ( assert (
env.attachment_service.get_metric_value( env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "cancel"} "storage_controller_reconcile_complete_total", filter={"status": "cancel"}
) )
is None is None
) )
assert ( assert (
env.attachment_service.get_metric_value( env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "error"} "storage_controller_reconcile_complete_total", filter={"status": "error"}
) )
is None is None
) )
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
# Validate pageserver state # Validate pageserver state
shards_exist: list[TenantShardId] = [] shards_exist: list[TenantShardId] = []
@@ -360,7 +360,7 @@ def test_sharding_ingest(
huge_layer_count = 0 huge_layer_count = 0
# Inspect the resulting layer map, count how many layers are undersized. # Inspect the resulting layer map, count how many layers are undersized.
for shard in env.attachment_service.locate(tenant_id): for shard in env.storage_controller.locate(tenant_id):
pageserver = env.get_pageserver(shard["node_id"]) pageserver = env.get_pageserver(shard["node_id"])
shard_id = shard["shard_id"] shard_id = shard["shard_id"]
layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id) layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id)

View File

@@ -6,10 +6,10 @@ from typing import Any, Dict, List, Union
import pytest import pytest
from fixtures.log_helper import log from fixtures.log_helper import log
from fixtures.neon_fixtures import ( from fixtures.neon_fixtures import (
AttachmentServiceApiException,
NeonEnv, NeonEnv,
NeonEnvBuilder, NeonEnvBuilder,
PgBin, PgBin,
StorageControllerApiException,
TokenScope, TokenScope,
) )
from fixtures.pageserver.http import PageserverHttpClient from fixtures.pageserver.http import PageserverHttpClient
@@ -36,7 +36,7 @@ from werkzeug.wrappers.response import Response
def get_node_shard_counts(env: NeonEnv, tenant_ids): def get_node_shard_counts(env: NeonEnv, tenant_ids):
counts: defaultdict[str, int] = defaultdict(int) counts: defaultdict[str, int] = defaultdict(int)
for tid in tenant_ids: for tid in tenant_ids:
for shard in env.attachment_service.locate(tid): for shard in env.storage_controller.locate(tid):
counts[shard["node_id"]] += 1 counts[shard["node_id"]] += 1
return counts return counts
@@ -62,20 +62,20 @@ def test_sharding_service_smoke(
# Start services by hand so that we can skip a pageserver (this will start + register later) # Start services by hand so that we can skip a pageserver (this will start + register later)
env.broker.try_start() env.broker.try_start()
env.attachment_service.start() env.storage_controller.start()
env.pageservers[0].start() env.pageservers[0].start()
env.pageservers[1].start() env.pageservers[1].start()
for sk in env.safekeepers: for sk in env.safekeepers:
sk.start() sk.start()
# The pageservers we started should have registered with the sharding service on startup # The pageservers we started should have registered with the sharding service on startup
nodes = env.attachment_service.node_list() nodes = env.storage_controller.node_list()
assert len(nodes) == 2 assert len(nodes) == 2
assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id} assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id}
# Starting an additional pageserver should register successfully # Starting an additional pageserver should register successfully
env.pageservers[2].start() env.pageservers[2].start()
nodes = env.attachment_service.node_list() nodes = env.storage_controller.node_list()
assert len(nodes) == 3 assert len(nodes) == 3
assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers} assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers}
@@ -99,22 +99,22 @@ def test_sharding_service_smoke(
# Creating and deleting timelines should work, using identical API to pageserver # Creating and deleting timelines should work, using identical API to pageserver
timeline_crud_tenant = next(iter(tenant_ids)) timeline_crud_tenant = next(iter(tenant_ids))
timeline_id = TimelineId.generate() timeline_id = TimelineId.generate()
env.attachment_service.pageserver_api().timeline_create( env.storage_controller.pageserver_api().timeline_create(
pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id
) )
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant) timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
assert len(timelines) == 2 assert len(timelines) == 2
assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines) assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines)
# virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id) # virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id)
timeline_delete_wait_completed( timeline_delete_wait_completed(
env.attachment_service.pageserver_api(), timeline_crud_tenant, timeline_id env.storage_controller.pageserver_api(), timeline_crud_tenant, timeline_id
) )
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant) timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
assert len(timelines) == 1 assert len(timelines) == 1
assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines) assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines)
# Marking a pageserver offline should migrate tenants away from it. # Marking a pageserver offline should migrate tenants away from it.
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"}) env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
def node_evacuated(node_id: int) -> None: def node_evacuated(node_id: int) -> None:
counts = get_node_shard_counts(env, tenant_ids) counts = get_node_shard_counts(env, tenant_ids)
@@ -124,7 +124,7 @@ def test_sharding_service_smoke(
# Marking pageserver active should not migrate anything to it # Marking pageserver active should not migrate anything to it
# immediately # immediately
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Active"}) env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Active"})
time.sleep(1) time.sleep(1)
assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0 assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0
@@ -144,13 +144,13 @@ def test_sharding_service_smoke(
# Delete all the tenants # Delete all the tenants
for tid in tenant_ids: for tid in tenant_ids:
tenant_delete_wait_completed(env.attachment_service.pageserver_api(), tid, 10) tenant_delete_wait_completed(env.storage_controller.pageserver_api(), tid, 10)
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
# Set a scheduling policy on one node, create all the tenants, observe # Set a scheduling policy on one node, create all the tenants, observe
# that the scheduling policy is respected. # that the scheduling policy is respected.
env.attachment_service.node_configure(env.pageservers[1].id, {"scheduling": "Draining"}) env.storage_controller.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
# Create some fresh tenants # Create some fresh tenants
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count)) tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
@@ -163,7 +163,7 @@ def test_sharding_service_smoke(
assert counts[env.pageservers[0].id] == tenant_shard_count // 2 assert counts[env.pageservers[0].id] == tenant_shard_count // 2
assert counts[env.pageservers[2].id] == tenant_shard_count // 2 assert counts[env.pageservers[2].id] == tenant_shard_count // 2
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_node_status_after_restart( def test_node_status_after_restart(
@@ -173,28 +173,28 @@ def test_node_status_after_restart(
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
# Initially we have two online pageservers # Initially we have two online pageservers
nodes = env.attachment_service.node_list() nodes = env.storage_controller.node_list()
assert len(nodes) == 2 assert len(nodes) == 2
env.pageservers[1].stop() env.pageservers[1].stop()
env.attachment_service.stop() env.storage_controller.stop()
env.attachment_service.start() env.storage_controller.start()
def is_ready(): def is_ready():
assert env.attachment_service.ready() is True assert env.storage_controller.ready() is True
wait_until(30, 1, is_ready) wait_until(30, 1, is_ready)
# We loaded nodes from database on restart # We loaded nodes from database on restart
nodes = env.attachment_service.node_list() nodes = env.storage_controller.node_list()
assert len(nodes) == 2 assert len(nodes) == 2
# We should still be able to create a tenant, because the pageserver which is still online # We should still be able to create a tenant, because the pageserver which is still online
# should have had its availabilty state set to Active. # should have had its availabilty state set to Active.
env.attachment_service.tenant_create(TenantId.generate()) env.storage_controller.tenant_create(TenantId.generate())
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_passthrough( def test_sharding_service_passthrough(
@@ -208,9 +208,9 @@ def test_sharding_service_passthrough(
neon_env_builder.num_pageservers = 2 neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
# We will talk to attachment service as if it was a pageserver, using the pageserver # We will talk to storage controller as if it was a pageserver, using the pageserver
# HTTP client # HTTP client
client = PageserverHttpClient(env.attachment_service_port, lambda: True) client = PageserverHttpClient(env.storage_controller_port, lambda: True)
timelines = client.timeline_list(tenant_id=env.initial_tenant) timelines = client.timeline_list(tenant_id=env.initial_tenant)
assert len(timelines) == 1 assert len(timelines) == 1
@@ -221,22 +221,22 @@ def test_sharding_service_passthrough(
} }
assert status["state"]["slug"] == "Active" assert status["state"]["slug"] == "Active"
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder): def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
tenant_a = env.initial_tenant tenant_a = env.initial_tenant
tenant_b = TenantId.generate() tenant_b = TenantId.generate()
env.attachment_service.tenant_create(tenant_b) env.storage_controller.tenant_create(tenant_b)
env.pageserver.tenant_detach(tenant_a) env.pageserver.tenant_detach(tenant_a)
# TODO: extend this test to use multiple pageservers, and check that locations don't move around # TODO: extend this test to use multiple pageservers, and check that locations don't move around
# on restart. # on restart.
# Attachment service restart # Storage controller restart
env.attachment_service.stop() env.storage_controller.stop()
env.attachment_service.start() env.storage_controller.start()
observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list()) observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
@@ -255,7 +255,7 @@ def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
assert tenant_a not in observed assert tenant_a not in observed
assert tenant_b in observed assert tenant_b in observed
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
@pytest.mark.parametrize("warm_up", [True, False]) @pytest.mark.parametrize("warm_up", [True, False])
@@ -271,7 +271,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
# Start services by hand so that we can skip registration on one of the pageservers # Start services by hand so that we can skip registration on one of the pageservers
env = neon_env_builder.init_configs() env = neon_env_builder.init_configs()
env.broker.try_start() env.broker.try_start()
env.attachment_service.start() env.storage_controller.start()
# This is the pageserver where we'll initially create the tenant. Run it in emergency # This is the pageserver where we'll initially create the tenant. Run it in emergency
# mode so that it doesn't talk to storage controller, and do not register it. # mode so that it doesn't talk to storage controller, and do not register it.
@@ -286,12 +286,12 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
# will be attached after onboarding # will be attached after onboarding
env.pageservers[1].start(register=True) env.pageservers[1].start(register=True)
dest_ps = env.pageservers[1] dest_ps = env.pageservers[1]
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True) virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
for sk in env.safekeepers: for sk in env.safekeepers:
sk.start() sk.start()
# Create a tenant directly via pageserver HTTP API, skipping the attachment service # Create a tenant directly via pageserver HTTP API, skipping the storage controller
tenant_id = TenantId.generate() tenant_id = TenantId.generate()
generation = 123 generation = 123
origin_ps.http_client().tenant_create(tenant_id, generation=generation) origin_ps.http_client().tenant_create(tenant_id, generation=generation)
@@ -324,7 +324,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
virtual_ps_http.tenant_secondary_download(tenant_id) virtual_ps_http.tenant_secondary_download(tenant_id)
# Call into attachment service to onboard the tenant # Call into storage controller to onboard the tenant
generation += 1 generation += 1
virtual_ps_http.tenant_location_conf( virtual_ps_http.tenant_location_conf(
tenant_id, tenant_id,
@@ -347,7 +347,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
}, },
) )
# As if doing a live migration, call into the attachment service to # As if doing a live migration, call into the storage controller to
# set it to AttachedSingle: this is a no-op, but we test it because the # set it to AttachedSingle: this is a no-op, but we test it because the
# cloud control plane may call this for symmetry with live migration to # cloud control plane may call this for symmetry with live migration to
# an individual pageserver # an individual pageserver
@@ -375,8 +375,8 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
assert dest_tenants[0]["generation"] == generation + 1 assert dest_tenants[0]["generation"] == generation + 1
# The onboarded tenant should survive a restart of sharding service # The onboarded tenant should survive a restart of sharding service
env.attachment_service.stop() env.storage_controller.stop()
env.attachment_service.start() env.storage_controller.start()
# The onboarded tenant should surviev a restart of pageserver # The onboarded tenant should surviev a restart of pageserver
dest_ps.stop() dest_ps.stop()
@@ -407,7 +407,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id) dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_compute_hook( def test_sharding_service_compute_hook(
@@ -419,7 +419,7 @@ def test_sharding_service_compute_hook(
Test that the sharding service calls out to the configured HTTP endpoint on attachment changes Test that the sharding service calls out to the configured HTTP endpoint on attachment changes
""" """
# We will run two pageserver to migrate and check that the attachment service sends notifications # We will run two pageserver to migrate and check that the storage controller sends notifications
# when migrating. # when migrating.
neon_env_builder.num_pageservers = 2 neon_env_builder.num_pageservers = 2
(host, port) = httpserver_listen_address (host, port) = httpserver_listen_address
@@ -450,7 +450,7 @@ def test_sharding_service_compute_hook(
} }
assert notifications[0] == expect assert notifications[0] == expect
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"}) env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
def node_evacuated(node_id: int) -> None: def node_evacuated(node_id: int) -> None:
counts = get_node_shard_counts(env, [env.initial_tenant]) counts = get_node_shard_counts(env, [env.initial_tenant])
@@ -473,8 +473,8 @@ def test_sharding_service_compute_hook(
wait_until(20, 0.25, received_migration_notification) wait_until(20, 0.25, received_migration_notification)
# When we restart, we should re-emit notifications for all tenants # When we restart, we should re-emit notifications for all tenants
env.attachment_service.stop() env.storage_controller.stop()
env.attachment_service.start() env.storage_controller.start()
def received_restart_notification(): def received_restart_notification():
assert len(notifications) == 3 assert len(notifications) == 3
@@ -483,7 +483,7 @@ def test_sharding_service_compute_hook(
wait_until(10, 1, received_restart_notification) wait_until(10, 1, received_restart_notification)
# Splitting a tenant should cause its stripe size to become visible in the compute notification # Splitting a tenant should cause its stripe size to become visible in the compute notification
env.attachment_service.tenant_shard_split(env.initial_tenant, shard_count=2) env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=2)
expect = { expect = {
"tenant_id": str(env.initial_tenant), "tenant_id": str(env.initial_tenant),
"stripe_size": 32768, "stripe_size": 32768,
@@ -499,7 +499,7 @@ def test_sharding_service_compute_hook(
wait_until(10, 1, received_split_notification) wait_until(10, 1, received_split_notification)
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder): def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
@@ -512,55 +512,55 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
tenant_id = TenantId.generate() tenant_id = TenantId.generate()
env.attachment_service.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192) env.storage_controller.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192)
# Check that the consistency check passes on a freshly setup system # Check that the consistency check passes on a freshly setup system
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
# These APIs are intentionally not implemented as methods on NeonAttachmentService, as # These APIs are intentionally not implemented as methods on NeonStorageController, as
# they're just for use in unanticipated circumstances. # they're just for use in unanticipated circumstances.
# Initial tenant (1 shard) and the one we just created (2 shards) should be visible # Initial tenant (1 shard) and the one we just created (2 shards) should be visible
response = env.attachment_service.request( response = env.storage_controller.request(
"GET", "GET",
f"{env.attachment_service_api}/debug/v1/tenant", f"{env.storage_controller_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN), headers=env.storage_controller.headers(TokenScope.ADMIN),
) )
assert len(response.json()) == 3 assert len(response.json()) == 3
# Scheduler should report the expected nodes and shard counts # Scheduler should report the expected nodes and shard counts
response = env.attachment_service.request( response = env.storage_controller.request(
"GET", f"{env.attachment_service_api}/debug/v1/scheduler" "GET", f"{env.storage_controller_api}/debug/v1/scheduler"
) )
# Two nodes, in a dict of node_id->node # Two nodes, in a dict of node_id->node
assert len(response.json()["nodes"]) == 2 assert len(response.json()["nodes"]) == 2
assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3 assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
assert all(v["may_schedule"] for v in response.json()["nodes"].values()) assert all(v["may_schedule"] for v in response.json()["nodes"].values())
response = env.attachment_service.request( response = env.storage_controller.request(
"POST", "POST",
f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop", f"{env.storage_controller_api}/debug/v1/node/{env.pageservers[1].id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN), headers=env.storage_controller.headers(TokenScope.ADMIN),
) )
assert len(env.attachment_service.node_list()) == 1 assert len(env.storage_controller.node_list()) == 1
response = env.attachment_service.request( response = env.storage_controller.request(
"POST", "POST",
f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop", f"{env.storage_controller_api}/debug/v1/tenant/{tenant_id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN), headers=env.storage_controller.headers(TokenScope.ADMIN),
) )
# Tenant drop should be reflected in dump output # Tenant drop should be reflected in dump output
response = env.attachment_service.request( response = env.storage_controller.request(
"GET", "GET",
f"{env.attachment_service_api}/debug/v1/tenant", f"{env.storage_controller_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN), headers=env.storage_controller.headers(TokenScope.ADMIN),
) )
assert len(response.json()) == 1 assert len(response.json()) == 1
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're # Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
# meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind. # meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_s3_time_travel_recovery( def test_sharding_service_s3_time_travel_recovery(
@@ -584,10 +584,10 @@ def test_sharding_service_s3_time_travel_recovery(
neon_env_builder.num_pageservers = 1 neon_env_builder.num_pageservers = 1
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True) virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
tenant_id = TenantId.generate() tenant_id = TenantId.generate()
env.attachment_service.tenant_create( env.storage_controller.tenant_create(
tenant_id, tenant_id,
shard_count=2, shard_count=2,
shard_stripe_size=8192, shard_stripe_size=8192,
@@ -595,7 +595,7 @@ def test_sharding_service_s3_time_travel_recovery(
) )
# Check that the consistency check passes # Check that the consistency check passes
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
branch_name = "main" branch_name = "main"
timeline_id = env.neon_cli.create_timeline( timeline_id = env.neon_cli.create_timeline(
@@ -670,28 +670,28 @@ def test_sharding_service_s3_time_travel_recovery(
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
endpoint.safe_psql("SELECT * FROM created_foo;") endpoint.safe_psql("SELECT * FROM created_foo;")
env.attachment_service.consistency_check() env.storage_controller.consistency_check()
def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder): def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
svc = env.attachment_service svc = env.storage_controller
api = env.attachment_service_api api = env.storage_controller_api
tenant_id = TenantId.generate() tenant_id = TenantId.generate()
body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)} body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}
# No token # No token
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Unauthorized: missing authorization header", match="Unauthorized: missing authorization header",
): ):
svc.request("POST", f"{env.attachment_service_api}/v1/tenant", json=body) svc.request("POST", f"{env.storage_controller_api}/v1/tenant", json=body)
# Token with incorrect scope # Token with incorrect scope
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Forbidden: JWT authentication error", match="Forbidden: JWT authentication error",
): ):
svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN)) svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
@@ -703,14 +703,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
# No token # No token
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Unauthorized: missing authorization header", match="Unauthorized: missing authorization header",
): ):
svc.request("GET", f"{api}/debug/v1/tenant") svc.request("GET", f"{api}/debug/v1/tenant")
# Token with incorrect scope # Token with incorrect scope
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Forbidden: JWT authentication error", match="Forbidden: JWT authentication error",
): ):
svc.request( svc.request(
@@ -719,14 +719,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
# No token # No token
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Unauthorized: missing authorization header", match="Unauthorized: missing authorization header",
): ):
svc.request("POST", f"{api}/upcall/v1/re-attach") svc.request("POST", f"{api}/upcall/v1/re-attach")
# Token with incorrect scope # Token with incorrect scope
with pytest.raises( with pytest.raises(
AttachmentServiceApiException, StorageControllerApiException,
match="Forbidden: JWT authentication error", match="Forbidden: JWT authentication error",
): ):
svc.request( svc.request(
@@ -743,7 +743,7 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start() env = neon_env_builder.init_start()
tenant_id = env.initial_tenant tenant_id = env.initial_tenant
http = env.attachment_service.pageserver_api() http = env.storage_controller.pageserver_api()
default_value = "7days" default_value = "7days"
new_value = "1h" new_value = "1h"
@@ -769,4 +769,4 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
assert readback_ps.effective_config["pitr_interval"] == default_value assert readback_ps.effective_config["pitr_interval"] == default_value
assert "pitr_interval" not in readback_ps.tenant_specific_overrides assert "pitr_interval" not in readback_ps.tenant_specific_overrides
env.attachment_service.consistency_check() env.storage_controller.consistency_check()

View File

@@ -1011,7 +1011,7 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
resp = client.tenant_status(eager_tenant) resp = client.tenant_status(eager_tenant)
assert resp["state"]["slug"] == "Active" assert resp["state"]["slug"] == "Active"
gen = env.attachment_service.attach_hook_issue(eager_tenant, env.pageserver.id) gen = env.storage_controller.attach_hook_issue(eager_tenant, env.pageserver.id)
client.tenant_location_conf( client.tenant_location_conf(
eager_tenant, eager_tenant,
{ {
@@ -1071,7 +1071,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met
# attach, it will consume the only permit because logical size calculation # attach, it will consume the only permit because logical size calculation
# is paused. # is paused.
gen = env.attachment_service.attach_hook_issue(lazy_tenant, env.pageserver.id) gen = env.storage_controller.attach_hook_issue(lazy_tenant, env.pageserver.id)
client.tenant_location_conf( client.tenant_location_conf(
lazy_tenant, lazy_tenant,
{ {