tests/neon_local: rename "attachment service" -> "storage controller" (#7087)

Not a user-facing change, but can break any existing `.neon` directories
created by neon_local, as the name of the database used by the storage
controller changes.

This PR changes all the locations apart from the path of
`control_plane/attachment_service` (waiting for an opportune moment to
do that one, because it's the most conflict-ish wrt ongoing PRs like
#6676 )
This commit is contained in:
John Spray
2024-03-12 11:36:27 +00:00
committed by GitHub
parent 621ea2ec44
commit 89cf714890
32 changed files with 267 additions and 275 deletions

View File

@@ -51,7 +51,7 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
# Force cargo not to print progress bar
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
# Set PQ_LIB_DIR to make sure `attachment_service` get linked with bundled libpq (through diesel)
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
#

View File

@@ -30,7 +30,7 @@ use pageserver_api::controller_api::{
};
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
use control_plane::attachment_service::{AttachHookRequest, InspectRequest};
use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
/// State available to HTTP request handlers
#[derive(Clone)]

View File

@@ -1,9 +1,3 @@
/// The attachment service mimics the aspects of the control plane API
/// that are required for a pageserver to operate.
///
/// This enables running & testing pageservers without a full-blown
/// deployment of the Neon cloud platform.
///
use anyhow::{anyhow, Context};
use attachment_service::http::make_router;
use attachment_service::metrics::preinitialize_metrics;

View File

@@ -20,7 +20,7 @@ use crate::node::Node;
/// ## What do we store?
///
/// The attachment service does not store most of its state durably.
/// The storage controller service does not store most of its state durably.
///
/// The essential things to store durably are:
/// - generation numbers, as these must always advance monotonically to ensure data safety.
@@ -34,7 +34,7 @@ use crate::node::Node;
///
/// ## Performance/efficiency
///
/// The attachment service does not go via the database for most things: there are
/// The storage controller service does not go via the database for most things: there are
/// a couple of places where we must, and where efficiency matters:
/// - Incrementing generation numbers: the Reconciler has to wait for this to complete
/// before it can attach a tenant, so this acts as a bound on how fast things like

View File

@@ -8,7 +8,7 @@ use std::{
};
use anyhow::Context;
use control_plane::attachment_service::{
use control_plane::storage_controller::{
AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
};
use diesel::result::DatabaseErrorKind;
@@ -839,7 +839,7 @@ impl Service {
tenant_state.generation = Some(new_generation);
} else {
// This is a detach notification. We must update placement policy to avoid re-attaching
// during background scheduling/reconciliation, or during attachment service restart.
// during background scheduling/reconciliation, or during storage controller restart.
assert!(attach_req.node_id.is_none());
tenant_state.policy = PlacementPolicy::Detached;
}

View File

@@ -8,11 +8,11 @@
use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
use compute_api::spec::ComputeMode;
use control_plane::attachment_service::AttachmentService;
use control_plane::endpoint::ComputeControlPlane;
use control_plane::local_env::{InitForceMode, LocalEnv};
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
use control_plane::safekeeper::SafekeeperNode;
use control_plane::storage_controller::StorageController;
use control_plane::{broker, local_env};
use pageserver_api::controller_api::{
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
@@ -138,7 +138,7 @@ fn main() -> Result<()> {
"start" => rt.block_on(handle_start_all(sub_args, &env)),
"stop" => rt.block_on(handle_stop_all(sub_args, &env)),
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
"attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)),
"storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
"safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
"endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
"mappings" => handle_mappings(sub_args, &mut env),
@@ -445,14 +445,14 @@ async fn handle_tenant(
// If tenant ID was not specified, generate one
let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
// We must register the tenant with the attachment service, so
// We must register the tenant with the storage controller, so
// that when the pageserver restarts, it will be re-attached.
let attachment_service = AttachmentService::from_env(env);
attachment_service
let storage_controller = StorageController::from_env(env);
storage_controller
.tenant_create(TenantCreateRequest {
// Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
// attachment service expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
// type is used both in attachment service (for creating tenants) and in pageserver (for creating shards)
// storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
// type is used both in storage controller (for creating tenants) and in pageserver (for creating shards)
new_tenant_id: TenantShardId::unsharded(tenant_id),
generation: None,
shard_parameters: ShardParameters {
@@ -476,9 +476,9 @@ async fn handle_tenant(
.context("Failed to parse postgres version from the argument string")?;
// FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
// different shards picking different start lsns. Maybe we have to teach attachment service
// different shards picking different start lsns. Maybe we have to teach storage controller
// to let shard 0 branch first and then propagate the chosen LSN to other shards.
attachment_service
storage_controller
.tenant_timeline_create(
tenant_id,
TimelineCreateRequest {
@@ -528,8 +528,8 @@ async fn handle_tenant(
let new_pageserver = get_pageserver(env, matches)?;
let new_pageserver_id = new_pageserver.conf.id;
let attachment_service = AttachmentService::from_env(env);
attachment_service
let storage_controller = StorageController::from_env(env);
storage_controller
.tenant_migrate(tenant_shard_id, new_pageserver_id)
.await?;
@@ -543,8 +543,8 @@ async fn handle_tenant(
let mut tenant_synthetic_size = None;
let attachment_service = AttachmentService::from_env(env);
for shard in attachment_service.tenant_locate(tenant_id).await?.shards {
let storage_controller = StorageController::from_env(env);
for shard in storage_controller.tenant_locate(tenant_id).await?.shards {
let pageserver =
PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?);
@@ -586,8 +586,8 @@ async fn handle_tenant(
let tenant_id = get_tenant_id(matches, env)?;
let shard_count: u8 = matches.get_one::<u8>("shard-count").cloned().unwrap_or(0);
let attachment_service = AttachmentService::from_env(env);
let result = attachment_service
let storage_controller = StorageController::from_env(env);
let result = storage_controller
.tenant_split(tenant_id, shard_count)
.await?;
println!(
@@ -613,7 +613,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
match timeline_match.subcommand() {
Some(("list", list_match)) => {
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
// where shard 0 is attached, and query there.
let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
@@ -633,7 +633,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
let new_timeline_id_opt = parse_timeline_id(create_match)?;
let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
let attachment_service = AttachmentService::from_env(env);
let storage_controller = StorageController::from_env(env);
let create_req = TimelineCreateRequest {
new_timeline_id,
ancestor_timeline_id: None,
@@ -641,7 +641,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
ancestor_start_lsn: None,
pg_version: Some(pg_version),
};
let timeline_info = attachment_service
let timeline_info = storage_controller
.tenant_timeline_create(tenant_id, create_req)
.await?;
@@ -730,7 +730,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
.transpose()
.context("Failed to parse ancestor start Lsn from the request")?;
let new_timeline_id = TimelineId::generate();
let attachment_service = AttachmentService::from_env(env);
let storage_controller = StorageController::from_env(env);
let create_req = TimelineCreateRequest {
new_timeline_id,
ancestor_timeline_id: Some(ancestor_timeline_id),
@@ -738,7 +738,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
ancestor_start_lsn: start_lsn,
pg_version: None,
};
let timeline_info = attachment_service
let timeline_info = storage_controller
.tenant_timeline_create(tenant_id, create_req)
.await?;
@@ -767,7 +767,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
match sub_name {
"list" => {
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
// where shard 0 is attached, and query there.
let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
@@ -952,21 +952,21 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
(
vec![(parsed.0, parsed.1.unwrap_or(5432))],
// If caller is telling us what pageserver to use, this is not a tenant which is
// full managed by attachment service, therefore not sharded.
// full managed by storage controller, therefore not sharded.
ShardParameters::DEFAULT_STRIPE_SIZE,
)
} else {
// Look up the currently attached location of the tenant, and its striping metadata,
// to pass these on to postgres.
let attachment_service = AttachmentService::from_env(env);
let locate_result = attachment_service.tenant_locate(endpoint.tenant_id).await?;
let storage_controller = StorageController::from_env(env);
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
let pageservers = locate_result
.shards
.into_iter()
.map(|shard| {
(
Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported bad hostname"),
.expect("Storage controller reported bad hostname"),
shard.listen_pg_port,
)
})
@@ -1015,8 +1015,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
pageserver.pg_connection_config.port(),
)]
} else {
let attachment_service = AttachmentService::from_env(env);
attachment_service
let storage_controller = StorageController::from_env(env);
storage_controller
.tenant_locate(endpoint.tenant_id)
.await?
.shards
@@ -1024,7 +1024,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
.map(|shard| {
(
Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported malformed host"),
.expect("Storage controller reported malformed host"),
shard.listen_pg_port,
)
})
@@ -1144,8 +1144,8 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
let scheduling = subcommand_args.get_one("scheduling");
let availability = subcommand_args.get_one("availability");
let attachment_service = AttachmentService::from_env(env);
attachment_service
let storage_controller = StorageController::from_env(env);
storage_controller
.node_configure(NodeConfigureRequest {
node_id: pageserver.conf.id,
scheduling: scheduling.cloned(),
@@ -1170,11 +1170,11 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
Ok(())
}
async fn handle_attachment_service(
async fn handle_storage_controller(
sub_match: &ArgMatches,
env: &local_env::LocalEnv,
) -> Result<()> {
let svc = AttachmentService::from_env(env);
let svc = StorageController::from_env(env);
match sub_match.subcommand() {
Some(("start", _start_match)) => {
if let Err(e) = svc.start().await {
@@ -1194,8 +1194,8 @@ async fn handle_attachment_service(
exit(1);
}
}
Some((sub_name, _)) => bail!("Unexpected attachment_service subcommand '{}'", sub_name),
None => bail!("no attachment_service subcommand provided"),
Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name),
None => bail!("no storage_controller subcommand provided"),
}
Ok(())
}
@@ -1280,11 +1280,11 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
broker::start_broker_process(env).await?;
// Only start the attachment service if the pageserver is configured to need it
// Only start the storage controller if the pageserver is configured to need it
if env.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env);
if let Err(e) = attachment_service.start().await {
eprintln!("attachment_service start failed: {:#}", e);
let storage_controller = StorageController::from_env(env);
if let Err(e) = storage_controller.start().await {
eprintln!("storage_controller start failed: {:#}", e);
try_stop_all(env, true).await;
exit(1);
}
@@ -1356,9 +1356,9 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
}
if env.control_plane_api.is_some() {
let attachment_service = AttachmentService::from_env(env);
if let Err(e) = attachment_service.stop(immediate).await {
eprintln!("attachment service stop failed: {e:#}");
let storage_controller = StorageController::from_env(env);
if let Err(e) = storage_controller.stop(immediate).await {
eprintln!("storage controller stop failed: {e:#}");
}
}
}
@@ -1618,9 +1618,9 @@ fn cli() -> Command {
)
)
.subcommand(
Command::new("attachment_service")
Command::new("storage_controller")
.arg_required_else_help(true)
.about("Manage attachment_service")
.about("Manage storage_controller")
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.subcommand(Command::new("stop").about("Stop local pageserver")
.arg(stop_mode_arg.clone()))

View File

@@ -57,9 +57,9 @@ use serde::{Deserialize, Serialize};
use url::Host;
use utils::id::{NodeId, TenantId, TimelineId};
use crate::attachment_service::AttachmentService;
use crate::local_env::LocalEnv;
use crate::postgresql_conf::PostgresConf;
use crate::storage_controller::StorageController;
use compute_api::responses::{ComputeState, ComputeStatus};
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
@@ -750,17 +750,17 @@ impl Endpoint {
let postgresql_conf = self.read_postgresql_conf()?;
spec.cluster.postgresql_conf = Some(postgresql_conf);
// If we weren't given explicit pageservers, query the attachment service
// If we weren't given explicit pageservers, query the storage controller
if pageservers.is_empty() {
let attachment_service = AttachmentService::from_env(&self.env);
let locate_result = attachment_service.tenant_locate(self.tenant_id).await?;
let storage_controller = StorageController::from_env(&self.env);
let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
pageservers = locate_result
.shards
.into_iter()
.map(|shard| {
(
Host::parse(&shard.listen_pg_addr)
.expect("Attachment service reported bad hostname"),
.expect("Storage controller reported bad hostname"),
shard.listen_pg_port,
)
})

View File

@@ -6,7 +6,6 @@
//! local installations.
#![deny(clippy::undocumented_unsafe_blocks)]
pub mod attachment_service;
mod background_process;
pub mod broker;
pub mod endpoint;
@@ -14,3 +13,4 @@ pub mod local_env;
pub mod pageserver;
pub mod postgresql_conf;
pub mod safekeeper;
pub mod storage_controller;

View File

@@ -72,13 +72,13 @@ pub struct LocalEnv {
#[serde(default)]
pub safekeepers: Vec<SafekeeperConf>,
// Control plane upcall API for pageserver: if None, we will not run attachment_service. If set, this will
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
// be propagated into each pageserver's configuration.
#[serde(default)]
pub control_plane_api: Option<Url>,
// Control plane upcall API for attachment service. If set, this will be propagated into the
// attachment service's configuration.
// Control plane upcall API for storage controller. If set, this will be propagated into the
// storage controller's configuration.
#[serde(default)]
pub control_plane_compute_hook_api: Option<Url>,
@@ -227,10 +227,10 @@ impl LocalEnv {
self.neon_distrib_dir.join("pageserver")
}
pub fn attachment_service_bin(&self) -> PathBuf {
// Irrespective of configuration, attachment service binary is always
pub fn storage_controller_bin(&self) -> PathBuf {
// Irrespective of configuration, storage controller binary is always
// run from the same location as neon_local. This means that for compatibility
// tests that run old pageserver/safekeeper, they still run latest attachment service.
// tests that run old pageserver/safekeeper, they still run latest storage controller.
let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
neon_local_bin_dir.join("storage_controller")
}

View File

@@ -31,8 +31,8 @@ use utils::{
lsn::Lsn,
};
use crate::attachment_service::AttachmentService;
use crate::local_env::PageServerConf;
use crate::storage_controller::StorageController;
use crate::{background_process, local_env::LocalEnv};
/// Directory within .neon which will be used by default for LocalFs remote storage.
@@ -111,7 +111,7 @@ impl PageServerNode {
control_plane_api.as_str()
));
// Attachment service uses the same auth as pageserver: if JWT is enabled
// Storage controller uses the same auth as pageserver: if JWT is enabled
// for us, we will also need it to talk to them.
if matches!(self.conf.http_auth_type, AuthType::NeonJWT) {
let jwt_token = self
@@ -214,12 +214,12 @@ impl PageServerNode {
// Register the node with the storage controller before starting pageserver: pageserver must be registered to
// successfully call /re-attach and finish starting up.
if register {
let attachment_service = AttachmentService::from_env(&self.env);
let storage_controller = StorageController::from_env(&self.env);
let (pg_host, pg_port) =
parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr)
.expect("Unable to parse listen_http_addr");
attachment_service
storage_controller
.node_register(NodeRegisterRequest {
node_id: self.conf.id,
listen_pg_addr: pg_host.to_string(),

View File

@@ -24,7 +24,7 @@ use utils::{
id::{NodeId, TenantId},
};
pub struct AttachmentService {
pub struct StorageController {
env: LocalEnv,
listen: String,
path: Utf8PathBuf,
@@ -36,7 +36,7 @@ pub struct AttachmentService {
const COMMAND: &str = "storage_controller";
const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16;
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
#[derive(Serialize, Deserialize)]
pub struct AttachHookRequest {
@@ -59,7 +59,7 @@ pub struct InspectResponse {
pub attachment: Option<(u32, NodeId)>,
}
impl AttachmentService {
impl StorageController {
pub fn from_env(env: &LocalEnv) -> Self {
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
.unwrap()
@@ -136,27 +136,27 @@ impl AttachmentService {
}
fn pid_file(&self) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("attachment_service.pid"))
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_controller.pid"))
.expect("non-Unicode path")
}
/// PIDFile for the postgres instance used to store attachment service state
/// PIDFile for the postgres instance used to store storage controller state
fn postgres_pid_file(&self) -> Utf8PathBuf {
Utf8PathBuf::from_path_buf(
self.env
.base_data_dir
.join("attachment_service_postgres.pid"),
.join("storage_controller_postgres.pid"),
)
.expect("non-Unicode path")
}
/// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
///
/// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back
/// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
/// to other versions if that one isn't found. Some automated tests create circumstances
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14];
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
for v in prefer_versions {
let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
@@ -189,7 +189,7 @@ impl AttachmentService {
///
/// Returns the database url
pub async fn setup_database(&self) -> anyhow::Result<String> {
const DB_NAME: &str = "attachment_service";
const DB_NAME: &str = "storage_controller";
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
let pg_bin_dir = self.get_pg_bin_dir().await?;
@@ -219,10 +219,10 @@ impl AttachmentService {
}
pub async fn start(&self) -> anyhow::Result<()> {
// Start a vanilla Postgres process used by the attachment service for persistence.
// Start a vanilla Postgres process used by the storage controller for persistence.
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
.unwrap()
.join("attachment_service_db");
.join("storage_controller_db");
let pg_bin_dir = self.get_pg_bin_dir().await?;
let pg_log_path = pg_data_path.join("postgres.log");
@@ -245,7 +245,7 @@ impl AttachmentService {
.await?;
};
println!("Starting attachment service database...");
println!("Starting storage controller database...");
let db_start_args = [
"-w",
"-D",
@@ -256,7 +256,7 @@ impl AttachmentService {
];
background_process::start_process(
"attachment_service_db",
"storage_controller_db",
&self.env.base_data_dir,
pg_bin_dir.join("pg_ctl").as_std_path(),
db_start_args,
@@ -300,7 +300,7 @@ impl AttachmentService {
background_process::start_process(
COMMAND,
&self.env.base_data_dir,
&self.env.attachment_service_bin(),
&self.env.storage_controller_bin(),
args,
[(
"NEON_REPO_DIR".to_string(),
@@ -322,10 +322,10 @@ impl AttachmentService {
pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
let pg_data_path = self.env.base_data_dir.join("attachment_service_db");
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
let pg_bin_dir = self.get_pg_bin_dir().await?;
println!("Stopping attachment service database...");
println!("Stopping storage controller database...");
let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
.args(pg_stop_args)
@@ -344,10 +344,10 @@ impl AttachmentService {
// fine that stop failed. Otherwise it is an error that stop failed.
const PG_STATUS_NOT_RUNNING: i32 = 3;
if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
println!("Attachment service data base is already stopped");
println!("Storage controller database is already stopped");
return Ok(());
} else {
anyhow::bail!("Failed to stop attachment service database: {stop_status}")
anyhow::bail!("Failed to stop storage controller database: {stop_status}")
}
}
@@ -368,7 +368,7 @@ impl AttachmentService {
}
}
/// Simple HTTP request wrapper for calling into attachment service
/// Simple HTTP request wrapper for calling into storage controller
async fn dispatch<RQ, RS>(
&self,
method: hyper::Method,

View File

@@ -70,9 +70,9 @@ Should only be used e.g. for status check/tenant creation/list.
Should only be used e.g. for status check.
Currently also used for connection from any pageserver to any safekeeper.
"generations_api": Provides access to the upcall APIs served by the attachment service or the control plane.
"generations_api": Provides access to the upcall APIs served by the storage controller or the control plane.
"admin": Provides access to the control plane and admin APIs of the attachment service.
"admin": Provides access to the control plane and admin APIs of the storage controller.
### CLI
CLI generates a key pair during call to `neon_local init` with the following commands:

View File

@@ -88,8 +88,6 @@ impl FromStr for NodeAvailability {
}
}
/// FIXME: this is a duplicate of the type in the attachment_service crate, because the
/// type needs to be defined with diesel traits in there.
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
pub enum NodeSchedulingPolicy {
Active,

View File

@@ -1014,24 +1014,24 @@ class NeonEnv:
self.initial_tenant = config.initial_tenant
self.initial_timeline = config.initial_timeline
# Find two adjacent ports for attachment service and its postgres DB. This
# Find two adjacent ports for storage controller and its postgres DB. This
# loop would eventually throw from get_port() if we run out of ports (extremely
# unlikely): usually we find two adjacent free ports on the first iteration.
while True:
self.attachment_service_port = self.port_distributor.get_port()
attachment_service_pg_port = self.port_distributor.get_port()
if attachment_service_pg_port == self.attachment_service_port + 1:
self.storage_controller_port = self.port_distributor.get_port()
storage_controller_pg_port = self.port_distributor.get_port()
if storage_controller_pg_port == self.storage_controller_port + 1:
break
# The URL for the pageserver to use as its control_plane_api config
self.control_plane_api: str = f"http://127.0.0.1:{self.attachment_service_port}/upcall/v1"
# The base URL of the attachment service
self.attachment_service_api: str = f"http://127.0.0.1:{self.attachment_service_port}"
self.control_plane_api: str = f"http://127.0.0.1:{self.storage_controller_port}/upcall/v1"
# The base URL of the storage controller
self.storage_controller_api: str = f"http://127.0.0.1:{self.storage_controller_port}"
# For testing this with a fake HTTP server, enable passing through a URL from config
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
self.attachment_service: NeonAttachmentService = NeonAttachmentService(
self.storage_controller: NeonStorageController = NeonStorageController(
self, config.auth_enabled
)
@@ -1113,16 +1113,16 @@ class NeonEnv:
self.neon_cli.init(cfg, force=config.config_init_force)
def start(self):
# Attachment service starts first, so that pageserver /re-attach calls don't
# storage controller starts first, so that pageserver /re-attach calls don't
# bounce through retries on startup
self.attachment_service.start()
self.storage_controller.start()
def attachment_service_ready():
assert self.attachment_service.ready() is True
def storage_controller_ready():
assert self.storage_controller.ready() is True
# Wait for attachment service readiness to prevent unnecessary post start-up
# Wait for storage controller readiness to prevent unnecessary post start-up
# reconcile.
wait_until(30, 1, attachment_service_ready)
wait_until(30, 1, storage_controller_ready)
# Start up broker, pageserver and all safekeepers
futs = []
@@ -1153,7 +1153,7 @@ class NeonEnv:
if ps_assert_metric_no_errors:
pageserver.assert_no_metric_errors()
pageserver.stop(immediate=immediate)
self.attachment_service.stop(immediate=immediate)
self.storage_controller.stop(immediate=immediate)
self.broker.stop(immediate=immediate)
@property
@@ -1188,9 +1188,9 @@ class NeonEnv:
def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]):
"""
Get the NeonPageserver where this tenant shard is currently attached, according
to the attachment service.
to the storage controller.
"""
meta = self.attachment_service.inspect(tenant_id)
meta = self.storage_controller.inspect(tenant_id)
if meta is None:
return None
pageserver_id = meta[1]
@@ -1697,12 +1697,12 @@ class NeonCli(AbstractNeonCli):
res.check_returncode()
return res
def attachment_service_start(self):
cmd = ["attachment_service", "start"]
def storage_controller_start(self):
cmd = ["storage_controller", "start"]
return self.raw_cli(cmd)
def attachment_service_stop(self, immediate: bool):
cmd = ["attachment_service", "stop"]
def storage_controller_stop(self, immediate: bool):
cmd = ["storage_controller", "stop"]
if immediate:
cmd.extend(["-m", "immediate"])
return self.raw_cli(cmd)
@@ -1942,14 +1942,14 @@ class Pagectl(AbstractNeonCli):
return IndexPartDump.from_json(parsed)
class AttachmentServiceApiException(Exception):
class StorageControllerApiException(Exception):
def __init__(self, message, status_code: int):
super().__init__(message)
self.message = message
self.status_code = status_code
class NeonAttachmentService(MetricsGetter):
class NeonStorageController(MetricsGetter):
def __init__(self, env: NeonEnv, auth_enabled: bool):
self.env = env
self.running = False
@@ -1957,13 +1957,13 @@ class NeonAttachmentService(MetricsGetter):
def start(self):
assert not self.running
self.env.neon_cli.attachment_service_start()
self.env.neon_cli.storage_controller_start()
self.running = True
return self
def stop(self, immediate: bool = False) -> "NeonAttachmentService":
def stop(self, immediate: bool = False) -> "NeonStorageController":
if self.running:
self.env.neon_cli.attachment_service_stop(immediate)
self.env.neon_cli.storage_controller_stop(immediate)
self.running = False
return self
@@ -1976,22 +1976,22 @@ class NeonAttachmentService(MetricsGetter):
msg = res.json()["msg"]
except: # noqa: E722
msg = ""
raise AttachmentServiceApiException(msg, res.status_code) from e
raise StorageControllerApiException(msg, res.status_code) from e
def pageserver_api(self) -> PageserverHttpClient:
"""
The attachment service implements a subset of the pageserver REST API, for mapping
The storage controller implements a subset of the pageserver REST API, for mapping
per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those
functions via the HttpClient, as an implicit check that these APIs remain compatible.
"""
auth_token = None
if self.auth_enabled:
auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API)
return PageserverHttpClient(self.env.attachment_service_port, lambda: True, auth_token)
return PageserverHttpClient(self.env.storage_controller_port, lambda: True, auth_token)
def request(self, method, *args, **kwargs) -> requests.Response:
resp = requests.request(method, *args, **kwargs)
NeonAttachmentService.raise_api_exception(resp)
NeonStorageController.raise_api_exception(resp)
return resp
@@ -2004,15 +2004,15 @@ class NeonAttachmentService(MetricsGetter):
return headers
def get_metrics(self) -> Metrics:
res = self.request("GET", f"{self.env.attachment_service_api}/metrics")
res = self.request("GET", f"{self.env.storage_controller_api}/metrics")
return parse_metrics(res.text)
def ready(self) -> bool:
status = None
try:
resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
resp = self.request("GET", f"{self.env.storage_controller_api}/ready")
status = resp.status_code
except AttachmentServiceApiException as e:
except StorageControllerApiException as e:
status = e.status_code
if status == 503:
@@ -2027,7 +2027,7 @@ class NeonAttachmentService(MetricsGetter):
) -> int:
response = self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
f"{self.env.storage_controller_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
headers=self.headers(TokenScope.ADMIN),
)
@@ -2038,7 +2038,7 @@ class NeonAttachmentService(MetricsGetter):
def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
f"{self.env.storage_controller_api}/debug/v1/attach-hook",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
headers=self.headers(TokenScope.ADMIN),
)
@@ -2049,7 +2049,7 @@ class NeonAttachmentService(MetricsGetter):
"""
response = self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/inspect",
f"{self.env.storage_controller_api}/debug/v1/inspect",
json={"tenant_shard_id": str(tenant_shard_id)},
headers=self.headers(TokenScope.ADMIN),
)
@@ -2070,7 +2070,7 @@ class NeonAttachmentService(MetricsGetter):
log.info(f"node_register({body})")
self.request(
"POST",
f"{self.env.attachment_service_api}/control/v1/node",
f"{self.env.storage_controller_api}/control/v1/node",
json=body,
headers=self.headers(TokenScope.ADMIN),
)
@@ -2078,7 +2078,7 @@ class NeonAttachmentService(MetricsGetter):
def node_list(self):
response = self.request(
"GET",
f"{self.env.attachment_service_api}/control/v1/node",
f"{self.env.storage_controller_api}/control/v1/node",
headers=self.headers(TokenScope.ADMIN),
)
return response.json()
@@ -2088,7 +2088,7 @@ class NeonAttachmentService(MetricsGetter):
body["node_id"] = node_id
self.request(
"PUT",
f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config",
f"{self.env.storage_controller_api}/control/v1/node/{node_id}/config",
json=body,
headers=self.headers(TokenScope.ADMIN),
)
@@ -2118,7 +2118,7 @@ class NeonAttachmentService(MetricsGetter):
response = self.request(
"POST",
f"{self.env.attachment_service_api}/v1/tenant",
f"{self.env.storage_controller_api}/v1/tenant",
json=body,
headers=self.headers(TokenScope.PAGE_SERVER_API),
)
@@ -2130,7 +2130,7 @@ class NeonAttachmentService(MetricsGetter):
"""
response = self.request(
"GET",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate",
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/locate",
headers=self.headers(TokenScope.ADMIN),
)
body = response.json()
@@ -2140,7 +2140,7 @@ class NeonAttachmentService(MetricsGetter):
def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]:
response = self.request(
"PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split",
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/shard_split",
json={"new_shard_count": shard_count},
headers=self.headers(TokenScope.ADMIN),
)
@@ -2152,7 +2152,7 @@ class NeonAttachmentService(MetricsGetter):
def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
self.request(
"PUT",
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate",
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_shard_id}/migrate",
json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
headers=self.headers(TokenScope.ADMIN),
)
@@ -2165,12 +2165,12 @@ class NeonAttachmentService(MetricsGetter):
"""
self.request(
"POST",
f"{self.env.attachment_service_api}/debug/v1/consistency_check",
f"{self.env.storage_controller_api}/debug/v1/consistency_check",
headers=self.headers(TokenScope.ADMIN),
)
log.info("Attachment service passed consistency check")
log.info("storage controller passed consistency check")
def __enter__(self) -> "NeonAttachmentService":
def __enter__(self) -> "NeonStorageController":
return self
def __exit__(
@@ -2401,7 +2401,7 @@ class NeonPageserver(PgProtocol):
"""
client = self.http_client()
if generation is None:
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
return client.tenant_attach(
tenant_id,
config,
@@ -2410,14 +2410,14 @@ class NeonPageserver(PgProtocol):
)
def tenant_detach(self, tenant_id: TenantId):
self.env.attachment_service.attach_hook_drop(tenant_id)
self.env.storage_controller.attach_hook_drop(tenant_id)
client = self.http_client()
return client.tenant_detach(tenant_id)
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
if config["mode"].startswith("Attached") and "generation" not in config:
config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
config["generation"] = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
client = self.http_client()
return client.tenant_location_conf(tenant_id, config, **kwargs)
@@ -2441,14 +2441,14 @@ class NeonPageserver(PgProtocol):
generation: Optional[int] = None,
) -> TenantId:
if generation is None:
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
client = self.http_client(auth_token=auth_token)
return client.tenant_create(tenant_id, conf, generation=generation)
def tenant_load(self, tenant_id: TenantId):
client = self.http_client()
return client.tenant_load(
tenant_id, generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
)
@@ -3907,7 +3907,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
psql_path = os.path.join(pg_bin.pg_bin_path, "psql")
pageserver_id = env.attachment_service.locate(endpoint.tenant_id)[0]["node_id"]
pageserver_id = env.storage_controller.locate(endpoint.tenant_id)[0]["node_id"]
cmd = rf"""
{psql_path} \
--no-psqlrc \
@@ -3994,7 +3994,7 @@ def tenant_get_shards(
us to figure out the shards for a tenant.
If the caller provides `pageserver_id`, it will be used for all shards, even
if the shard is indicated by attachment service to be on some other pageserver.
if the shard is indicated by storage controller to be on some other pageserver.
Caller should over the response to apply their per-pageserver action to
each shard
@@ -4010,7 +4010,7 @@ def tenant_get_shards(
TenantShardId.parse(s["shard_id"]),
override_pageserver or env.get_pageserver(s["node_id"]),
)
for s in env.attachment_service.locate(tenant_id)
for s in env.storage_controller.locate(tenant_id)
]
else:
# Assume an unsharded tenant

View File

@@ -43,7 +43,7 @@ def single_timeline(
log.info("detach template tenant form pageserver")
env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*",
)

View File

@@ -56,7 +56,7 @@ def setup_env(
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*",
)
env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -92,7 +92,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*",
)
env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -114,7 +114,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
env.pageserver.tenant_detach(template_tenant)
env.pageserver.allowed_errors.append(
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
".*Dropped remote consistent LSN updates.*",
)
env.pageserver.tenant_attach(template_tenant, config)

View File

@@ -56,12 +56,12 @@ def measure_recovery_time(env: NeonCompare):
# Delete the Tenant in the pageserver: this will drop local and remote layers, such that
# when we "create" the Tenant again, we will replay the WAL from the beginning.
#
# This is a "weird" thing to do, and can confuse the attachment service as we're re-using
# This is a "weird" thing to do, and can confuse the storage controller as we're re-using
# the same tenant ID for a tenant that is logically different from the pageserver's point
# of view, but the same as far as the safekeeper/WAL is concerned. To work around that,
# we will explicitly create the tenant in the same generation that it was previously
# attached in.
attach_status = env.env.attachment_service.inspect(tenant_shard_id=env.tenant)
attach_status = env.env.storage_controller.inspect(tenant_shard_id=env.tenant)
assert attach_status is not None
(attach_gen, _) = attach_status

View File

@@ -137,7 +137,7 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
ps_http.tenant_detach(tenant_id)
assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
body = {"generation": env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)}
body = {"generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)}
ps_http.post(
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",

View File

@@ -85,9 +85,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
# the endpoint. Whereas the previous reconfiguration was like a healthy migration, this
# is more like what happens in an unexpected pageserver failure.
#
# Since we're dual-attached, need to tip-off attachment service to treat the one we're
# Since we're dual-attached, need to tip-off storage controller to treat the one we're
# about to start as the attached pageserver
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
env.pageservers[0].start()
env.pageservers[1].stop()
@@ -97,9 +97,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
assert fetchone() == (100000,)
env.pageservers[0].stop()
# Since we're dual-attached, need to tip-off attachment service to treat the one we're
# Since we're dual-attached, need to tip-off storage controller to treat the one we're
# about to start as the attached pageserver
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
env.pageservers[1].start()
# Test a (former) bug where a child process spins without updating its connection string

View File

@@ -133,7 +133,7 @@ def test_create_snapshot(
for sk in env.safekeepers:
sk.stop()
env.pageserver.stop()
env.attachment_service.stop()
env.storage_controller.stop()
# Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
compatibility_snapshot_dir = (

View File

@@ -159,7 +159,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
def get_generation_number():
attachment = env.attachment_service.inspect(tenant_id)
attachment = env.storage_controller.inspect(tenant_id)
assert attachment is not None
return attachment[0]

View File

@@ -133,7 +133,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
# Stop default ps/sk
env.neon_cli.pageserver_stop(env.pageserver.id)
env.neon_cli.safekeeper_stop()
env.neon_cli.attachment_service_stop(False)
env.neon_cli.storage_controller_stop(False)
# Keep NeonEnv state up to date, it usually owns starting/stopping services
env.pageserver.running = False
@@ -175,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)
# Stop this to get out of the way of the following `start`
env.neon_cli.attachment_service_stop(False)
env.neon_cli.storage_controller_stop(False)
# Default start
res = env.neon_cli.raw_cli(["start"])

View File

@@ -73,7 +73,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
# create new tenant and check it is also there
tenant_id = TenantId.generate()
client.tenant_create(
tenant_id, generation=env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)
tenant_id, generation=env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
)
assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}

View File

@@ -203,7 +203,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
env.broker.try_start()
for sk in env.safekeepers:
sk.start()
env.attachment_service.start()
env.storage_controller.start()
env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
@@ -285,7 +285,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
main_pageserver = env.get_pageserver(attached_to_id)
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
@@ -310,7 +310,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
# Now advance the generation in the control plane: subsequent validations
# from the running pageserver will fail. No more deletions should happen.
env.attachment_service.attach_hook_issue(env.initial_tenant, other_pageserver.id)
env.storage_controller.attach_hook_issue(env.initial_tenant, other_pageserver.id)
generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver)
assert_deletion_queue(ps_http, lambda n: n > 0)
@@ -366,7 +366,7 @@ def test_deletion_queue_recovery(
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
main_pageserver = env.get_pageserver(attached_to_id)
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
@@ -428,7 +428,7 @@ def test_deletion_queue_recovery(
if keep_attachment == KeepAttachment.LOSE:
some_other_pageserver = other_pageserver.id
env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
env.storage_controller.attach_hook_issue(env.initial_tenant, some_other_pageserver)
main_pageserver.start()
@@ -494,7 +494,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
)
# Simulate a major incident: the control plane goes offline
env.attachment_service.stop()
env.storage_controller.stop()
# Remember how many validations had happened before the control plane went offline
validated = get_deletion_queue_validated(ps_http)
@@ -525,7 +525,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
assert get_deletion_queue_executed(ps_http) == 0
# When the control plane comes back up, normal service should resume
env.attachment_service.start()
env.storage_controller.start()
ps_http.deletion_queue_flush(execute=True)
assert get_deletion_queue_depth(ps_http) == 0

View File

@@ -157,7 +157,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
workload.churn_rows(rng.randint(128, 256), pageserver.id)
workload.validate(pageserver.id)
elif last_state_ps[0].startswith("Attached"):
# The `attachment_service` will only re-attach on startup when a pageserver was the
# The `storage_controller` will only re-attach on startup when a pageserver was the
# holder of the latest generation: otherwise the pageserver will revert to detached
# state if it was running attached with a stale generation
last_state[pageserver.id] = ("Detached", None)
@@ -182,12 +182,12 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
generation = last_state_ps[1]
else:
# Switch generations, while also jumping between attached states
generation = env.attachment_service.attach_hook_issue(
generation = env.storage_controller.attach_hook_issue(
tenant_id, pageserver.id
)
latest_attached = pageserver.id
else:
generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver.id)
latest_attached = pageserver.id
else:
generation = None
@@ -273,7 +273,7 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
# Encourage the new location to download while still in secondary mode
pageserver_b.http_client().tenant_secondary_download(tenant_id)
migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
migrated_generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver_b.id)
log.info(f"Acquired generation {migrated_generation} for destination pageserver")
assert migrated_generation == initial_generation + 1
@@ -436,7 +436,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
remote_storage_kind=RemoteStorageKind.MOCK_S3,
)
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
assert env.attachment_service is not None
assert env.storage_controller is not None
assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter
tenant_id = env.initial_tenant

View File

@@ -169,7 +169,7 @@ def test_remote_storage_backup_and_restore(
# Ensure that even though the tenant is broken, retrying the attachment fails
with pytest.raises(Exception, match="Tenant state is Broken"):
# Use same generation as in previous attempt
gen_state = env.attachment_service.inspect(tenant_id)
gen_state = env.storage_controller.inspect(tenant_id)
assert gen_state is not None
generation = gen_state[0]
env.pageserver.tenant_attach(tenant_id, generation=generation)
@@ -355,7 +355,7 @@ def test_remote_storage_upload_queue_retries(
env.pageserver.stop(immediate=True)
env.endpoints.stop_all()
# We are about to forcibly drop local dirs. Attachment service will increment generation in re-attach before
# We are about to forcibly drop local dirs. Storage controller will increment generation in re-attach before
# we later increment when actually attaching it again, leading to skipping a generation and potentially getting
# these warnings if there was a durable but un-executed deletion list at time of restart.
env.pageserver.allowed_errors.extend(

View File

@@ -80,7 +80,7 @@ def test_tenant_s3_restore(
assert (
ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
), "tenant removed before we deletion was issued"
env.attachment_service.attach_hook_drop(tenant_id)
env.storage_controller.attach_hook_drop(tenant_id)
tenant_path = env.pageserver.tenant_dir(tenant_id)
assert not tenant_path.exists()
@@ -103,7 +103,7 @@ def test_tenant_s3_restore(
tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion
)
generation = env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)
generation = env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
ps_http.tenant_attach(tenant_id, generation=generation)
env.pageserver.quiesce_tenants()

View File

@@ -43,7 +43,7 @@ def test_sharding_smoke(
tenant_id = env.initial_tenant
pageservers = dict((int(p.id), p) for p in env.pageservers)
shards = env.attachment_service.locate(tenant_id)
shards = env.storage_controller.locate(tenant_id)
def get_sizes():
sizes = {}
@@ -86,7 +86,7 @@ def test_sharding_smoke(
)
assert timelines == {env.initial_timeline, timeline_b}
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_split_unsharded(
@@ -102,7 +102,7 @@ def test_sharding_split_unsharded(
# Check that we created with an unsharded TenantShardId: this is the default,
# but check it in case we change the default in future
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 0)) is not None
assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 0)) is not None
workload = Workload(env, tenant_id, timeline_id, branch_name="main")
workload.init()
@@ -110,15 +110,15 @@ def test_sharding_split_unsharded(
workload.validate()
# Split one shard into two
env.attachment_service.tenant_shard_split(tenant_id, shard_count=2)
env.storage_controller.tenant_shard_split(tenant_id, shard_count=2)
# Check we got the shard IDs we expected
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 2)) is not None
assert env.attachment_service.inspect(TenantShardId(tenant_id, 1, 2)) is not None
assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 2)) is not None
assert env.storage_controller.inspect(TenantShardId(tenant_id, 1, 2)) is not None
workload.validate()
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_split_smoke(
@@ -161,7 +161,7 @@ def test_sharding_split_smoke(
workload.write_rows(256)
# Note which pageservers initially hold a shard after tenant creation
pre_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)]
pre_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
# For pageservers holding a shard, validate their ingest statistics
# reflect a proper splitting of the WAL.
@@ -213,9 +213,9 @@ def test_sharding_split_smoke(
# Before split, old shards exist
assert shards_on_disk(old_shard_ids)
env.attachment_service.tenant_shard_split(tenant_id, shard_count=split_shard_count)
env.storage_controller.tenant_shard_split(tenant_id, shard_count=split_shard_count)
post_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)]
post_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
# We should have split into 8 shards, on the same 4 pageservers we started on.
assert len(post_split_pageserver_ids) == split_shard_count
assert len(set(post_split_pageserver_ids)) == shard_count
@@ -261,7 +261,7 @@ def test_sharding_split_smoke(
# Check that we didn't do any spurious reconciliations.
# Total number of reconciles should have been one per original shard, plus
# one for each shard that was migrated.
reconcile_ok = env.attachment_service.get_metric_value(
reconcile_ok = env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "ok"}
)
assert reconcile_ok == shard_count + split_shard_count // 2
@@ -269,19 +269,19 @@ def test_sharding_split_smoke(
# Check that no cancelled or errored reconciliations occurred: this test does no
# failure injection and should run clean.
assert (
env.attachment_service.get_metric_value(
env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "cancel"}
)
is None
)
assert (
env.attachment_service.get_metric_value(
env.storage_controller.get_metric_value(
"storage_controller_reconcile_complete_total", filter={"status": "error"}
)
is None
)
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
# Validate pageserver state
shards_exist: list[TenantShardId] = []
@@ -360,7 +360,7 @@ def test_sharding_ingest(
huge_layer_count = 0
# Inspect the resulting layer map, count how many layers are undersized.
for shard in env.attachment_service.locate(tenant_id):
for shard in env.storage_controller.locate(tenant_id):
pageserver = env.get_pageserver(shard["node_id"])
shard_id = shard["shard_id"]
layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id)

View File

@@ -6,10 +6,10 @@ from typing import Any, Dict, List, Union
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import (
AttachmentServiceApiException,
NeonEnv,
NeonEnvBuilder,
PgBin,
StorageControllerApiException,
TokenScope,
)
from fixtures.pageserver.http import PageserverHttpClient
@@ -36,7 +36,7 @@ from werkzeug.wrappers.response import Response
def get_node_shard_counts(env: NeonEnv, tenant_ids):
counts: defaultdict[str, int] = defaultdict(int)
for tid in tenant_ids:
for shard in env.attachment_service.locate(tid):
for shard in env.storage_controller.locate(tid):
counts[shard["node_id"]] += 1
return counts
@@ -62,20 +62,20 @@ def test_sharding_service_smoke(
# Start services by hand so that we can skip a pageserver (this will start + register later)
env.broker.try_start()
env.attachment_service.start()
env.storage_controller.start()
env.pageservers[0].start()
env.pageservers[1].start()
for sk in env.safekeepers:
sk.start()
# The pageservers we started should have registered with the sharding service on startup
nodes = env.attachment_service.node_list()
nodes = env.storage_controller.node_list()
assert len(nodes) == 2
assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id}
# Starting an additional pageserver should register successfully
env.pageservers[2].start()
nodes = env.attachment_service.node_list()
nodes = env.storage_controller.node_list()
assert len(nodes) == 3
assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers}
@@ -99,22 +99,22 @@ def test_sharding_service_smoke(
# Creating and deleting timelines should work, using identical API to pageserver
timeline_crud_tenant = next(iter(tenant_ids))
timeline_id = TimelineId.generate()
env.attachment_service.pageserver_api().timeline_create(
env.storage_controller.pageserver_api().timeline_create(
pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id
)
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
assert len(timelines) == 2
assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines)
# virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id)
timeline_delete_wait_completed(
env.attachment_service.pageserver_api(), timeline_crud_tenant, timeline_id
env.storage_controller.pageserver_api(), timeline_crud_tenant, timeline_id
)
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
assert len(timelines) == 1
assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines)
# Marking a pageserver offline should migrate tenants away from it.
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
def node_evacuated(node_id: int) -> None:
counts = get_node_shard_counts(env, tenant_ids)
@@ -124,7 +124,7 @@ def test_sharding_service_smoke(
# Marking pageserver active should not migrate anything to it
# immediately
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Active"})
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Active"})
time.sleep(1)
assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0
@@ -144,13 +144,13 @@ def test_sharding_service_smoke(
# Delete all the tenants
for tid in tenant_ids:
tenant_delete_wait_completed(env.attachment_service.pageserver_api(), tid, 10)
tenant_delete_wait_completed(env.storage_controller.pageserver_api(), tid, 10)
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
# Set a scheduling policy on one node, create all the tenants, observe
# that the scheduling policy is respected.
env.attachment_service.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
env.storage_controller.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
# Create some fresh tenants
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
@@ -163,7 +163,7 @@ def test_sharding_service_smoke(
assert counts[env.pageservers[0].id] == tenant_shard_count // 2
assert counts[env.pageservers[2].id] == tenant_shard_count // 2
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_node_status_after_restart(
@@ -173,28 +173,28 @@ def test_node_status_after_restart(
env = neon_env_builder.init_start()
# Initially we have two online pageservers
nodes = env.attachment_service.node_list()
nodes = env.storage_controller.node_list()
assert len(nodes) == 2
env.pageservers[1].stop()
env.attachment_service.stop()
env.attachment_service.start()
env.storage_controller.stop()
env.storage_controller.start()
def is_ready():
assert env.attachment_service.ready() is True
assert env.storage_controller.ready() is True
wait_until(30, 1, is_ready)
# We loaded nodes from database on restart
nodes = env.attachment_service.node_list()
nodes = env.storage_controller.node_list()
assert len(nodes) == 2
# We should still be able to create a tenant, because the pageserver which is still online
# should have had its availabilty state set to Active.
env.attachment_service.tenant_create(TenantId.generate())
env.storage_controller.tenant_create(TenantId.generate())
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_passthrough(
@@ -208,9 +208,9 @@ def test_sharding_service_passthrough(
neon_env_builder.num_pageservers = 2
env = neon_env_builder.init_start()
# We will talk to attachment service as if it was a pageserver, using the pageserver
# We will talk to storage controller as if it was a pageserver, using the pageserver
# HTTP client
client = PageserverHttpClient(env.attachment_service_port, lambda: True)
client = PageserverHttpClient(env.storage_controller_port, lambda: True)
timelines = client.timeline_list(tenant_id=env.initial_tenant)
assert len(timelines) == 1
@@ -221,22 +221,22 @@ def test_sharding_service_passthrough(
}
assert status["state"]["slug"] == "Active"
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_a = env.initial_tenant
tenant_b = TenantId.generate()
env.attachment_service.tenant_create(tenant_b)
env.storage_controller.tenant_create(tenant_b)
env.pageserver.tenant_detach(tenant_a)
# TODO: extend this test to use multiple pageservers, and check that locations don't move around
# on restart.
# Attachment service restart
env.attachment_service.stop()
env.attachment_service.start()
# Storage controller restart
env.storage_controller.stop()
env.storage_controller.start()
observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
@@ -255,7 +255,7 @@ def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
assert tenant_a not in observed
assert tenant_b in observed
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
@pytest.mark.parametrize("warm_up", [True, False])
@@ -271,7 +271,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
# Start services by hand so that we can skip registration on one of the pageservers
env = neon_env_builder.init_configs()
env.broker.try_start()
env.attachment_service.start()
env.storage_controller.start()
# This is the pageserver where we'll initially create the tenant. Run it in emergency
# mode so that it doesn't talk to storage controller, and do not register it.
@@ -286,12 +286,12 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
# will be attached after onboarding
env.pageservers[1].start(register=True)
dest_ps = env.pageservers[1]
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
for sk in env.safekeepers:
sk.start()
# Create a tenant directly via pageserver HTTP API, skipping the attachment service
# Create a tenant directly via pageserver HTTP API, skipping the storage controller
tenant_id = TenantId.generate()
generation = 123
origin_ps.http_client().tenant_create(tenant_id, generation=generation)
@@ -324,7 +324,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
virtual_ps_http.tenant_secondary_download(tenant_id)
# Call into attachment service to onboard the tenant
# Call into storage controller to onboard the tenant
generation += 1
virtual_ps_http.tenant_location_conf(
tenant_id,
@@ -347,7 +347,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
},
)
# As if doing a live migration, call into the attachment service to
# As if doing a live migration, call into the storage controller to
# set it to AttachedSingle: this is a no-op, but we test it because the
# cloud control plane may call this for symmetry with live migration to
# an individual pageserver
@@ -375,8 +375,8 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
assert dest_tenants[0]["generation"] == generation + 1
# The onboarded tenant should survive a restart of sharding service
env.attachment_service.stop()
env.attachment_service.start()
env.storage_controller.stop()
env.storage_controller.start()
# The onboarded tenant should surviev a restart of pageserver
dest_ps.stop()
@@ -407,7 +407,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_compute_hook(
@@ -419,7 +419,7 @@ def test_sharding_service_compute_hook(
Test that the sharding service calls out to the configured HTTP endpoint on attachment changes
"""
# We will run two pageserver to migrate and check that the attachment service sends notifications
# We will run two pageserver to migrate and check that the storage controller sends notifications
# when migrating.
neon_env_builder.num_pageservers = 2
(host, port) = httpserver_listen_address
@@ -450,7 +450,7 @@ def test_sharding_service_compute_hook(
}
assert notifications[0] == expect
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
def node_evacuated(node_id: int) -> None:
counts = get_node_shard_counts(env, [env.initial_tenant])
@@ -473,8 +473,8 @@ def test_sharding_service_compute_hook(
wait_until(20, 0.25, received_migration_notification)
# When we restart, we should re-emit notifications for all tenants
env.attachment_service.stop()
env.attachment_service.start()
env.storage_controller.stop()
env.storage_controller.start()
def received_restart_notification():
assert len(notifications) == 3
@@ -483,7 +483,7 @@ def test_sharding_service_compute_hook(
wait_until(10, 1, received_restart_notification)
# Splitting a tenant should cause its stripe size to become visible in the compute notification
env.attachment_service.tenant_shard_split(env.initial_tenant, shard_count=2)
env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=2)
expect = {
"tenant_id": str(env.initial_tenant),
"stripe_size": 32768,
@@ -499,7 +499,7 @@ def test_sharding_service_compute_hook(
wait_until(10, 1, received_split_notification)
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
@@ -512,55 +512,55 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = TenantId.generate()
env.attachment_service.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192)
env.storage_controller.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192)
# Check that the consistency check passes on a freshly setup system
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
# These APIs are intentionally not implemented as methods on NeonAttachmentService, as
# These APIs are intentionally not implemented as methods on NeonStorageController, as
# they're just for use in unanticipated circumstances.
# Initial tenant (1 shard) and the one we just created (2 shards) should be visible
response = env.attachment_service.request(
response = env.storage_controller.request(
"GET",
f"{env.attachment_service_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN),
f"{env.storage_controller_api}/debug/v1/tenant",
headers=env.storage_controller.headers(TokenScope.ADMIN),
)
assert len(response.json()) == 3
# Scheduler should report the expected nodes and shard counts
response = env.attachment_service.request(
"GET", f"{env.attachment_service_api}/debug/v1/scheduler"
response = env.storage_controller.request(
"GET", f"{env.storage_controller_api}/debug/v1/scheduler"
)
# Two nodes, in a dict of node_id->node
assert len(response.json()["nodes"]) == 2
assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
assert all(v["may_schedule"] for v in response.json()["nodes"].values())
response = env.attachment_service.request(
response = env.storage_controller.request(
"POST",
f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN),
f"{env.storage_controller_api}/debug/v1/node/{env.pageservers[1].id}/drop",
headers=env.storage_controller.headers(TokenScope.ADMIN),
)
assert len(env.attachment_service.node_list()) == 1
assert len(env.storage_controller.node_list()) == 1
response = env.attachment_service.request(
response = env.storage_controller.request(
"POST",
f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop",
headers=env.attachment_service.headers(TokenScope.ADMIN),
f"{env.storage_controller_api}/debug/v1/tenant/{tenant_id}/drop",
headers=env.storage_controller.headers(TokenScope.ADMIN),
)
# Tenant drop should be reflected in dump output
response = env.attachment_service.request(
response = env.storage_controller.request(
"GET",
f"{env.attachment_service_api}/debug/v1/tenant",
headers=env.attachment_service.headers(TokenScope.ADMIN),
f"{env.storage_controller_api}/debug/v1/tenant",
headers=env.storage_controller.headers(TokenScope.ADMIN),
)
assert len(response.json()) == 1
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
# meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_s3_time_travel_recovery(
@@ -584,10 +584,10 @@ def test_sharding_service_s3_time_travel_recovery(
neon_env_builder.num_pageservers = 1
env = neon_env_builder.init_start()
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
tenant_id = TenantId.generate()
env.attachment_service.tenant_create(
env.storage_controller.tenant_create(
tenant_id,
shard_count=2,
shard_stripe_size=8192,
@@ -595,7 +595,7 @@ def test_sharding_service_s3_time_travel_recovery(
)
# Check that the consistency check passes
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
branch_name = "main"
timeline_id = env.neon_cli.create_timeline(
@@ -670,28 +670,28 @@ def test_sharding_service_s3_time_travel_recovery(
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
endpoint.safe_psql("SELECT * FROM created_foo;")
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()
def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
neon_env_builder.auth_enabled = True
env = neon_env_builder.init_start()
svc = env.attachment_service
api = env.attachment_service_api
svc = env.storage_controller
api = env.storage_controller_api
tenant_id = TenantId.generate()
body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}
# No token
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Unauthorized: missing authorization header",
):
svc.request("POST", f"{env.attachment_service_api}/v1/tenant", json=body)
svc.request("POST", f"{env.storage_controller_api}/v1/tenant", json=body)
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Forbidden: JWT authentication error",
):
svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
@@ -703,14 +703,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
# No token
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Unauthorized: missing authorization header",
):
svc.request("GET", f"{api}/debug/v1/tenant")
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Forbidden: JWT authentication error",
):
svc.request(
@@ -719,14 +719,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
# No token
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Unauthorized: missing authorization header",
):
svc.request("POST", f"{api}/upcall/v1/re-attach")
# Token with incorrect scope
with pytest.raises(
AttachmentServiceApiException,
StorageControllerApiException,
match="Forbidden: JWT authentication error",
):
svc.request(
@@ -743,7 +743,7 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
tenant_id = env.initial_tenant
http = env.attachment_service.pageserver_api()
http = env.storage_controller.pageserver_api()
default_value = "7days"
new_value = "1h"
@@ -769,4 +769,4 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
assert readback_ps.effective_config["pitr_interval"] == default_value
assert "pitr_interval" not in readback_ps.tenant_specific_overrides
env.attachment_service.consistency_check()
env.storage_controller.consistency_check()

View File

@@ -1011,7 +1011,7 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
resp = client.tenant_status(eager_tenant)
assert resp["state"]["slug"] == "Active"
gen = env.attachment_service.attach_hook_issue(eager_tenant, env.pageserver.id)
gen = env.storage_controller.attach_hook_issue(eager_tenant, env.pageserver.id)
client.tenant_location_conf(
eager_tenant,
{
@@ -1071,7 +1071,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met
# attach, it will consume the only permit because logical size calculation
# is paused.
gen = env.attachment_service.attach_hook_issue(lazy_tenant, env.pageserver.id)
gen = env.storage_controller.attach_hook_issue(lazy_tenant, env.pageserver.id)
client.tenant_location_conf(
lazy_tenant,
{