mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
tests/neon_local: rename "attachment service" -> "storage controller" (#7087)
Not a user-facing change, but can break any existing `.neon` directories created by neon_local, as the name of the database used by the storage controller changes. This PR changes all the locations apart from the path of `control_plane/attachment_service` (waiting for an opportune moment to do that one, because it's the most conflict-ish wrt ongoing PRs like #6676 )
This commit is contained in:
2
Makefile
2
Makefile
@@ -51,7 +51,7 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
|||||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||||
# Force cargo not to print progress bar
|
# Force cargo not to print progress bar
|
||||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||||
# Set PQ_LIB_DIR to make sure `attachment_service` get linked with bundled libpq (through diesel)
|
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ use pageserver_api::controller_api::{
|
|||||||
};
|
};
|
||||||
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
|
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
|
||||||
|
|
||||||
use control_plane::attachment_service::{AttachHookRequest, InspectRequest};
|
use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
|
||||||
|
|
||||||
/// State available to HTTP request handlers
|
/// State available to HTTP request handlers
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|||||||
@@ -1,9 +1,3 @@
|
|||||||
/// The attachment service mimics the aspects of the control plane API
|
|
||||||
/// that are required for a pageserver to operate.
|
|
||||||
///
|
|
||||||
/// This enables running & testing pageservers without a full-blown
|
|
||||||
/// deployment of the Neon cloud platform.
|
|
||||||
///
|
|
||||||
use anyhow::{anyhow, Context};
|
use anyhow::{anyhow, Context};
|
||||||
use attachment_service::http::make_router;
|
use attachment_service::http::make_router;
|
||||||
use attachment_service::metrics::preinitialize_metrics;
|
use attachment_service::metrics::preinitialize_metrics;
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use crate::node::Node;
|
|||||||
|
|
||||||
/// ## What do we store?
|
/// ## What do we store?
|
||||||
///
|
///
|
||||||
/// The attachment service does not store most of its state durably.
|
/// The storage controller service does not store most of its state durably.
|
||||||
///
|
///
|
||||||
/// The essential things to store durably are:
|
/// The essential things to store durably are:
|
||||||
/// - generation numbers, as these must always advance monotonically to ensure data safety.
|
/// - generation numbers, as these must always advance monotonically to ensure data safety.
|
||||||
@@ -34,7 +34,7 @@ use crate::node::Node;
|
|||||||
///
|
///
|
||||||
/// ## Performance/efficiency
|
/// ## Performance/efficiency
|
||||||
///
|
///
|
||||||
/// The attachment service does not go via the database for most things: there are
|
/// The storage controller service does not go via the database for most things: there are
|
||||||
/// a couple of places where we must, and where efficiency matters:
|
/// a couple of places where we must, and where efficiency matters:
|
||||||
/// - Incrementing generation numbers: the Reconciler has to wait for this to complete
|
/// - Incrementing generation numbers: the Reconciler has to wait for this to complete
|
||||||
/// before it can attach a tenant, so this acts as a bound on how fast things like
|
/// before it can attach a tenant, so this acts as a bound on how fast things like
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use control_plane::attachment_service::{
|
use control_plane::storage_controller::{
|
||||||
AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
|
AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
|
||||||
};
|
};
|
||||||
use diesel::result::DatabaseErrorKind;
|
use diesel::result::DatabaseErrorKind;
|
||||||
@@ -839,7 +839,7 @@ impl Service {
|
|||||||
tenant_state.generation = Some(new_generation);
|
tenant_state.generation = Some(new_generation);
|
||||||
} else {
|
} else {
|
||||||
// This is a detach notification. We must update placement policy to avoid re-attaching
|
// This is a detach notification. We must update placement policy to avoid re-attaching
|
||||||
// during background scheduling/reconciliation, or during attachment service restart.
|
// during background scheduling/reconciliation, or during storage controller restart.
|
||||||
assert!(attach_req.node_id.is_none());
|
assert!(attach_req.node_id.is_none());
|
||||||
tenant_state.policy = PlacementPolicy::Detached;
|
tenant_state.policy = PlacementPolicy::Detached;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,11 +8,11 @@
|
|||||||
use anyhow::{anyhow, bail, Context, Result};
|
use anyhow::{anyhow, bail, Context, Result};
|
||||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
|
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
|
||||||
use compute_api::spec::ComputeMode;
|
use compute_api::spec::ComputeMode;
|
||||||
use control_plane::attachment_service::AttachmentService;
|
|
||||||
use control_plane::endpoint::ComputeControlPlane;
|
use control_plane::endpoint::ComputeControlPlane;
|
||||||
use control_plane::local_env::{InitForceMode, LocalEnv};
|
use control_plane::local_env::{InitForceMode, LocalEnv};
|
||||||
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
|
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
|
||||||
use control_plane::safekeeper::SafekeeperNode;
|
use control_plane::safekeeper::SafekeeperNode;
|
||||||
|
use control_plane::storage_controller::StorageController;
|
||||||
use control_plane::{broker, local_env};
|
use control_plane::{broker, local_env};
|
||||||
use pageserver_api::controller_api::{
|
use pageserver_api::controller_api::{
|
||||||
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
|
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
|
||||||
@@ -138,7 +138,7 @@ fn main() -> Result<()> {
|
|||||||
"start" => rt.block_on(handle_start_all(sub_args, &env)),
|
"start" => rt.block_on(handle_start_all(sub_args, &env)),
|
||||||
"stop" => rt.block_on(handle_stop_all(sub_args, &env)),
|
"stop" => rt.block_on(handle_stop_all(sub_args, &env)),
|
||||||
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
|
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
|
||||||
"attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)),
|
"storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
|
||||||
"safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
|
"safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
|
||||||
"endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
|
"endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
|
||||||
"mappings" => handle_mappings(sub_args, &mut env),
|
"mappings" => handle_mappings(sub_args, &mut env),
|
||||||
@@ -445,14 +445,14 @@ async fn handle_tenant(
|
|||||||
// If tenant ID was not specified, generate one
|
// If tenant ID was not specified, generate one
|
||||||
let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
|
let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
|
||||||
|
|
||||||
// We must register the tenant with the attachment service, so
|
// We must register the tenant with the storage controller, so
|
||||||
// that when the pageserver restarts, it will be re-attached.
|
// that when the pageserver restarts, it will be re-attached.
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
attachment_service
|
storage_controller
|
||||||
.tenant_create(TenantCreateRequest {
|
.tenant_create(TenantCreateRequest {
|
||||||
// Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
|
// Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
|
||||||
// attachment service expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
|
// storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
|
||||||
// type is used both in attachment service (for creating tenants) and in pageserver (for creating shards)
|
// type is used both in storage controller (for creating tenants) and in pageserver (for creating shards)
|
||||||
new_tenant_id: TenantShardId::unsharded(tenant_id),
|
new_tenant_id: TenantShardId::unsharded(tenant_id),
|
||||||
generation: None,
|
generation: None,
|
||||||
shard_parameters: ShardParameters {
|
shard_parameters: ShardParameters {
|
||||||
@@ -476,9 +476,9 @@ async fn handle_tenant(
|
|||||||
.context("Failed to parse postgres version from the argument string")?;
|
.context("Failed to parse postgres version from the argument string")?;
|
||||||
|
|
||||||
// FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
|
// FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
|
||||||
// different shards picking different start lsns. Maybe we have to teach attachment service
|
// different shards picking different start lsns. Maybe we have to teach storage controller
|
||||||
// to let shard 0 branch first and then propagate the chosen LSN to other shards.
|
// to let shard 0 branch first and then propagate the chosen LSN to other shards.
|
||||||
attachment_service
|
storage_controller
|
||||||
.tenant_timeline_create(
|
.tenant_timeline_create(
|
||||||
tenant_id,
|
tenant_id,
|
||||||
TimelineCreateRequest {
|
TimelineCreateRequest {
|
||||||
@@ -528,8 +528,8 @@ async fn handle_tenant(
|
|||||||
let new_pageserver = get_pageserver(env, matches)?;
|
let new_pageserver = get_pageserver(env, matches)?;
|
||||||
let new_pageserver_id = new_pageserver.conf.id;
|
let new_pageserver_id = new_pageserver.conf.id;
|
||||||
|
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
attachment_service
|
storage_controller
|
||||||
.tenant_migrate(tenant_shard_id, new_pageserver_id)
|
.tenant_migrate(tenant_shard_id, new_pageserver_id)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -543,8 +543,8 @@ async fn handle_tenant(
|
|||||||
|
|
||||||
let mut tenant_synthetic_size = None;
|
let mut tenant_synthetic_size = None;
|
||||||
|
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
for shard in attachment_service.tenant_locate(tenant_id).await?.shards {
|
for shard in storage_controller.tenant_locate(tenant_id).await?.shards {
|
||||||
let pageserver =
|
let pageserver =
|
||||||
PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?);
|
PageServerNode::from_env(env, env.get_pageserver_conf(shard.node_id)?);
|
||||||
|
|
||||||
@@ -586,8 +586,8 @@ async fn handle_tenant(
|
|||||||
let tenant_id = get_tenant_id(matches, env)?;
|
let tenant_id = get_tenant_id(matches, env)?;
|
||||||
let shard_count: u8 = matches.get_one::<u8>("shard-count").cloned().unwrap_or(0);
|
let shard_count: u8 = matches.get_one::<u8>("shard-count").cloned().unwrap_or(0);
|
||||||
|
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
let result = attachment_service
|
let result = storage_controller
|
||||||
.tenant_split(tenant_id, shard_count)
|
.tenant_split(tenant_id, shard_count)
|
||||||
.await?;
|
.await?;
|
||||||
println!(
|
println!(
|
||||||
@@ -613,7 +613,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
|||||||
|
|
||||||
match timeline_match.subcommand() {
|
match timeline_match.subcommand() {
|
||||||
Some(("list", list_match)) => {
|
Some(("list", list_match)) => {
|
||||||
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
|
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
|
||||||
// where shard 0 is attached, and query there.
|
// where shard 0 is attached, and query there.
|
||||||
let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
|
let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
|
||||||
let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
|
let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
|
||||||
@@ -633,7 +633,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
|||||||
let new_timeline_id_opt = parse_timeline_id(create_match)?;
|
let new_timeline_id_opt = parse_timeline_id(create_match)?;
|
||||||
let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
|
let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
|
||||||
|
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
let create_req = TimelineCreateRequest {
|
let create_req = TimelineCreateRequest {
|
||||||
new_timeline_id,
|
new_timeline_id,
|
||||||
ancestor_timeline_id: None,
|
ancestor_timeline_id: None,
|
||||||
@@ -641,7 +641,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
|||||||
ancestor_start_lsn: None,
|
ancestor_start_lsn: None,
|
||||||
pg_version: Some(pg_version),
|
pg_version: Some(pg_version),
|
||||||
};
|
};
|
||||||
let timeline_info = attachment_service
|
let timeline_info = storage_controller
|
||||||
.tenant_timeline_create(tenant_id, create_req)
|
.tenant_timeline_create(tenant_id, create_req)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -730,7 +730,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
|||||||
.transpose()
|
.transpose()
|
||||||
.context("Failed to parse ancestor start Lsn from the request")?;
|
.context("Failed to parse ancestor start Lsn from the request")?;
|
||||||
let new_timeline_id = TimelineId::generate();
|
let new_timeline_id = TimelineId::generate();
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
let create_req = TimelineCreateRequest {
|
let create_req = TimelineCreateRequest {
|
||||||
new_timeline_id,
|
new_timeline_id,
|
||||||
ancestor_timeline_id: Some(ancestor_timeline_id),
|
ancestor_timeline_id: Some(ancestor_timeline_id),
|
||||||
@@ -738,7 +738,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
|
|||||||
ancestor_start_lsn: start_lsn,
|
ancestor_start_lsn: start_lsn,
|
||||||
pg_version: None,
|
pg_version: None,
|
||||||
};
|
};
|
||||||
let timeline_info = attachment_service
|
let timeline_info = storage_controller
|
||||||
.tenant_timeline_create(tenant_id, create_req)
|
.tenant_timeline_create(tenant_id, create_req)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -767,7 +767,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
|||||||
|
|
||||||
match sub_name {
|
match sub_name {
|
||||||
"list" => {
|
"list" => {
|
||||||
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the attachment service
|
// TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
|
||||||
// where shard 0 is attached, and query there.
|
// where shard 0 is attached, and query there.
|
||||||
let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
|
let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
|
||||||
let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
|
let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
|
||||||
@@ -952,21 +952,21 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
|||||||
(
|
(
|
||||||
vec![(parsed.0, parsed.1.unwrap_or(5432))],
|
vec![(parsed.0, parsed.1.unwrap_or(5432))],
|
||||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||||
// full managed by attachment service, therefore not sharded.
|
// full managed by storage controller, therefore not sharded.
|
||||||
ShardParameters::DEFAULT_STRIPE_SIZE,
|
ShardParameters::DEFAULT_STRIPE_SIZE,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
// Look up the currently attached location of the tenant, and its striping metadata,
|
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||||
// to pass these on to postgres.
|
// to pass these on to postgres.
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
let locate_result = attachment_service.tenant_locate(endpoint.tenant_id).await?;
|
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
|
||||||
let pageservers = locate_result
|
let pageservers = locate_result
|
||||||
.shards
|
.shards
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|shard| {
|
.map(|shard| {
|
||||||
(
|
(
|
||||||
Host::parse(&shard.listen_pg_addr)
|
Host::parse(&shard.listen_pg_addr)
|
||||||
.expect("Attachment service reported bad hostname"),
|
.expect("Storage controller reported bad hostname"),
|
||||||
shard.listen_pg_port,
|
shard.listen_pg_port,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@@ -1015,8 +1015,8 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
|||||||
pageserver.pg_connection_config.port(),
|
pageserver.pg_connection_config.port(),
|
||||||
)]
|
)]
|
||||||
} else {
|
} else {
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
attachment_service
|
storage_controller
|
||||||
.tenant_locate(endpoint.tenant_id)
|
.tenant_locate(endpoint.tenant_id)
|
||||||
.await?
|
.await?
|
||||||
.shards
|
.shards
|
||||||
@@ -1024,7 +1024,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
|||||||
.map(|shard| {
|
.map(|shard| {
|
||||||
(
|
(
|
||||||
Host::parse(&shard.listen_pg_addr)
|
Host::parse(&shard.listen_pg_addr)
|
||||||
.expect("Attachment service reported malformed host"),
|
.expect("Storage controller reported malformed host"),
|
||||||
shard.listen_pg_port,
|
shard.listen_pg_port,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
@@ -1144,8 +1144,8 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
|
|||||||
let scheduling = subcommand_args.get_one("scheduling");
|
let scheduling = subcommand_args.get_one("scheduling");
|
||||||
let availability = subcommand_args.get_one("availability");
|
let availability = subcommand_args.get_one("availability");
|
||||||
|
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
attachment_service
|
storage_controller
|
||||||
.node_configure(NodeConfigureRequest {
|
.node_configure(NodeConfigureRequest {
|
||||||
node_id: pageserver.conf.id,
|
node_id: pageserver.conf.id,
|
||||||
scheduling: scheduling.cloned(),
|
scheduling: scheduling.cloned(),
|
||||||
@@ -1170,11 +1170,11 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_attachment_service(
|
async fn handle_storage_controller(
|
||||||
sub_match: &ArgMatches,
|
sub_match: &ArgMatches,
|
||||||
env: &local_env::LocalEnv,
|
env: &local_env::LocalEnv,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let svc = AttachmentService::from_env(env);
|
let svc = StorageController::from_env(env);
|
||||||
match sub_match.subcommand() {
|
match sub_match.subcommand() {
|
||||||
Some(("start", _start_match)) => {
|
Some(("start", _start_match)) => {
|
||||||
if let Err(e) = svc.start().await {
|
if let Err(e) = svc.start().await {
|
||||||
@@ -1194,8 +1194,8 @@ async fn handle_attachment_service(
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some((sub_name, _)) => bail!("Unexpected attachment_service subcommand '{}'", sub_name),
|
Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name),
|
||||||
None => bail!("no attachment_service subcommand provided"),
|
None => bail!("no storage_controller subcommand provided"),
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -1280,11 +1280,11 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
|
|||||||
|
|
||||||
broker::start_broker_process(env).await?;
|
broker::start_broker_process(env).await?;
|
||||||
|
|
||||||
// Only start the attachment service if the pageserver is configured to need it
|
// Only start the storage controller if the pageserver is configured to need it
|
||||||
if env.control_plane_api.is_some() {
|
if env.control_plane_api.is_some() {
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
if let Err(e) = attachment_service.start().await {
|
if let Err(e) = storage_controller.start().await {
|
||||||
eprintln!("attachment_service start failed: {:#}", e);
|
eprintln!("storage_controller start failed: {:#}", e);
|
||||||
try_stop_all(env, true).await;
|
try_stop_all(env, true).await;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@@ -1356,9 +1356,9 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if env.control_plane_api.is_some() {
|
if env.control_plane_api.is_some() {
|
||||||
let attachment_service = AttachmentService::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
if let Err(e) = attachment_service.stop(immediate).await {
|
if let Err(e) = storage_controller.stop(immediate).await {
|
||||||
eprintln!("attachment service stop failed: {e:#}");
|
eprintln!("storage controller stop failed: {e:#}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1618,9 +1618,9 @@ fn cli() -> Command {
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
.subcommand(
|
.subcommand(
|
||||||
Command::new("attachment_service")
|
Command::new("storage_controller")
|
||||||
.arg_required_else_help(true)
|
.arg_required_else_help(true)
|
||||||
.about("Manage attachment_service")
|
.about("Manage storage_controller")
|
||||||
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||||
.subcommand(Command::new("stop").about("Stop local pageserver")
|
.subcommand(Command::new("stop").about("Stop local pageserver")
|
||||||
.arg(stop_mode_arg.clone()))
|
.arg(stop_mode_arg.clone()))
|
||||||
|
|||||||
@@ -57,9 +57,9 @@ use serde::{Deserialize, Serialize};
|
|||||||
use url::Host;
|
use url::Host;
|
||||||
use utils::id::{NodeId, TenantId, TimelineId};
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
|
|
||||||
use crate::attachment_service::AttachmentService;
|
|
||||||
use crate::local_env::LocalEnv;
|
use crate::local_env::LocalEnv;
|
||||||
use crate::postgresql_conf::PostgresConf;
|
use crate::postgresql_conf::PostgresConf;
|
||||||
|
use crate::storage_controller::StorageController;
|
||||||
|
|
||||||
use compute_api::responses::{ComputeState, ComputeStatus};
|
use compute_api::responses::{ComputeState, ComputeStatus};
|
||||||
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
|
use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
|
||||||
@@ -750,17 +750,17 @@ impl Endpoint {
|
|||||||
let postgresql_conf = self.read_postgresql_conf()?;
|
let postgresql_conf = self.read_postgresql_conf()?;
|
||||||
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
||||||
|
|
||||||
// If we weren't given explicit pageservers, query the attachment service
|
// If we weren't given explicit pageservers, query the storage controller
|
||||||
if pageservers.is_empty() {
|
if pageservers.is_empty() {
|
||||||
let attachment_service = AttachmentService::from_env(&self.env);
|
let storage_controller = StorageController::from_env(&self.env);
|
||||||
let locate_result = attachment_service.tenant_locate(self.tenant_id).await?;
|
let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
|
||||||
pageservers = locate_result
|
pageservers = locate_result
|
||||||
.shards
|
.shards
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|shard| {
|
.map(|shard| {
|
||||||
(
|
(
|
||||||
Host::parse(&shard.listen_pg_addr)
|
Host::parse(&shard.listen_pg_addr)
|
||||||
.expect("Attachment service reported bad hostname"),
|
.expect("Storage controller reported bad hostname"),
|
||||||
shard.listen_pg_port,
|
shard.listen_pg_port,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
//! local installations.
|
//! local installations.
|
||||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
pub mod attachment_service;
|
|
||||||
mod background_process;
|
mod background_process;
|
||||||
pub mod broker;
|
pub mod broker;
|
||||||
pub mod endpoint;
|
pub mod endpoint;
|
||||||
@@ -14,3 +13,4 @@ pub mod local_env;
|
|||||||
pub mod pageserver;
|
pub mod pageserver;
|
||||||
pub mod postgresql_conf;
|
pub mod postgresql_conf;
|
||||||
pub mod safekeeper;
|
pub mod safekeeper;
|
||||||
|
pub mod storage_controller;
|
||||||
|
|||||||
@@ -72,13 +72,13 @@ pub struct LocalEnv {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub safekeepers: Vec<SafekeeperConf>,
|
pub safekeepers: Vec<SafekeeperConf>,
|
||||||
|
|
||||||
// Control plane upcall API for pageserver: if None, we will not run attachment_service. If set, this will
|
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||||
// be propagated into each pageserver's configuration.
|
// be propagated into each pageserver's configuration.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub control_plane_api: Option<Url>,
|
pub control_plane_api: Option<Url>,
|
||||||
|
|
||||||
// Control plane upcall API for attachment service. If set, this will be propagated into the
|
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||||
// attachment service's configuration.
|
// storage controller's configuration.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub control_plane_compute_hook_api: Option<Url>,
|
pub control_plane_compute_hook_api: Option<Url>,
|
||||||
|
|
||||||
@@ -227,10 +227,10 @@ impl LocalEnv {
|
|||||||
self.neon_distrib_dir.join("pageserver")
|
self.neon_distrib_dir.join("pageserver")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn attachment_service_bin(&self) -> PathBuf {
|
pub fn storage_controller_bin(&self) -> PathBuf {
|
||||||
// Irrespective of configuration, attachment service binary is always
|
// Irrespective of configuration, storage controller binary is always
|
||||||
// run from the same location as neon_local. This means that for compatibility
|
// run from the same location as neon_local. This means that for compatibility
|
||||||
// tests that run old pageserver/safekeeper, they still run latest attachment service.
|
// tests that run old pageserver/safekeeper, they still run latest storage controller.
|
||||||
let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
|
let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
|
||||||
neon_local_bin_dir.join("storage_controller")
|
neon_local_bin_dir.join("storage_controller")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,8 +31,8 @@ use utils::{
|
|||||||
lsn::Lsn,
|
lsn::Lsn,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::attachment_service::AttachmentService;
|
|
||||||
use crate::local_env::PageServerConf;
|
use crate::local_env::PageServerConf;
|
||||||
|
use crate::storage_controller::StorageController;
|
||||||
use crate::{background_process, local_env::LocalEnv};
|
use crate::{background_process, local_env::LocalEnv};
|
||||||
|
|
||||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||||
@@ -111,7 +111,7 @@ impl PageServerNode {
|
|||||||
control_plane_api.as_str()
|
control_plane_api.as_str()
|
||||||
));
|
));
|
||||||
|
|
||||||
// Attachment service uses the same auth as pageserver: if JWT is enabled
|
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||||
// for us, we will also need it to talk to them.
|
// for us, we will also need it to talk to them.
|
||||||
if matches!(self.conf.http_auth_type, AuthType::NeonJWT) {
|
if matches!(self.conf.http_auth_type, AuthType::NeonJWT) {
|
||||||
let jwt_token = self
|
let jwt_token = self
|
||||||
@@ -214,12 +214,12 @@ impl PageServerNode {
|
|||||||
// Register the node with the storage controller before starting pageserver: pageserver must be registered to
|
// Register the node with the storage controller before starting pageserver: pageserver must be registered to
|
||||||
// successfully call /re-attach and finish starting up.
|
// successfully call /re-attach and finish starting up.
|
||||||
if register {
|
if register {
|
||||||
let attachment_service = AttachmentService::from_env(&self.env);
|
let storage_controller = StorageController::from_env(&self.env);
|
||||||
let (pg_host, pg_port) =
|
let (pg_host, pg_port) =
|
||||||
parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
|
parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
|
||||||
let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr)
|
let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr)
|
||||||
.expect("Unable to parse listen_http_addr");
|
.expect("Unable to parse listen_http_addr");
|
||||||
attachment_service
|
storage_controller
|
||||||
.node_register(NodeRegisterRequest {
|
.node_register(NodeRegisterRequest {
|
||||||
node_id: self.conf.id,
|
node_id: self.conf.id,
|
||||||
listen_pg_addr: pg_host.to_string(),
|
listen_pg_addr: pg_host.to_string(),
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ use utils::{
|
|||||||
id::{NodeId, TenantId},
|
id::{NodeId, TenantId},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct AttachmentService {
|
pub struct StorageController {
|
||||||
env: LocalEnv,
|
env: LocalEnv,
|
||||||
listen: String,
|
listen: String,
|
||||||
path: Utf8PathBuf,
|
path: Utf8PathBuf,
|
||||||
@@ -36,7 +36,7 @@ pub struct AttachmentService {
|
|||||||
|
|
||||||
const COMMAND: &str = "storage_controller";
|
const COMMAND: &str = "storage_controller";
|
||||||
|
|
||||||
const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16;
|
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct AttachHookRequest {
|
pub struct AttachHookRequest {
|
||||||
@@ -59,7 +59,7 @@ pub struct InspectResponse {
|
|||||||
pub attachment: Option<(u32, NodeId)>,
|
pub attachment: Option<(u32, NodeId)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AttachmentService {
|
impl StorageController {
|
||||||
pub fn from_env(env: &LocalEnv) -> Self {
|
pub fn from_env(env: &LocalEnv) -> Self {
|
||||||
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
|
let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@@ -136,27 +136,27 @@ impl AttachmentService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn pid_file(&self) -> Utf8PathBuf {
|
fn pid_file(&self) -> Utf8PathBuf {
|
||||||
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("attachment_service.pid"))
|
Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("storage_controller.pid"))
|
||||||
.expect("non-Unicode path")
|
.expect("non-Unicode path")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// PIDFile for the postgres instance used to store attachment service state
|
/// PIDFile for the postgres instance used to store storage controller state
|
||||||
fn postgres_pid_file(&self) -> Utf8PathBuf {
|
fn postgres_pid_file(&self) -> Utf8PathBuf {
|
||||||
Utf8PathBuf::from_path_buf(
|
Utf8PathBuf::from_path_buf(
|
||||||
self.env
|
self.env
|
||||||
.base_data_dir
|
.base_data_dir
|
||||||
.join("attachment_service_postgres.pid"),
|
.join("storage_controller_postgres.pid"),
|
||||||
)
|
)
|
||||||
.expect("non-Unicode path")
|
.expect("non-Unicode path")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
|
/// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
|
||||||
///
|
///
|
||||||
/// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back
|
/// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
|
||||||
/// to other versions if that one isn't found. Some automated tests create circumstances
|
/// to other versions if that one isn't found. Some automated tests create circumstances
|
||||||
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
|
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
|
||||||
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
|
pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
|
||||||
let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14];
|
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
|
||||||
|
|
||||||
for v in prefer_versions {
|
for v in prefer_versions {
|
||||||
let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
|
let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
|
||||||
@@ -189,7 +189,7 @@ impl AttachmentService {
|
|||||||
///
|
///
|
||||||
/// Returns the database url
|
/// Returns the database url
|
||||||
pub async fn setup_database(&self) -> anyhow::Result<String> {
|
pub async fn setup_database(&self) -> anyhow::Result<String> {
|
||||||
const DB_NAME: &str = "attachment_service";
|
const DB_NAME: &str = "storage_controller";
|
||||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
|
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
|
||||||
|
|
||||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||||
@@ -219,10 +219,10 @@ impl AttachmentService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn start(&self) -> anyhow::Result<()> {
|
pub async fn start(&self) -> anyhow::Result<()> {
|
||||||
// Start a vanilla Postgres process used by the attachment service for persistence.
|
// Start a vanilla Postgres process used by the storage controller for persistence.
|
||||||
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.join("attachment_service_db");
|
.join("storage_controller_db");
|
||||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||||
let pg_log_path = pg_data_path.join("postgres.log");
|
let pg_log_path = pg_data_path.join("postgres.log");
|
||||||
|
|
||||||
@@ -245,7 +245,7 @@ impl AttachmentService {
|
|||||||
.await?;
|
.await?;
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("Starting attachment service database...");
|
println!("Starting storage controller database...");
|
||||||
let db_start_args = [
|
let db_start_args = [
|
||||||
"-w",
|
"-w",
|
||||||
"-D",
|
"-D",
|
||||||
@@ -256,7 +256,7 @@ impl AttachmentService {
|
|||||||
];
|
];
|
||||||
|
|
||||||
background_process::start_process(
|
background_process::start_process(
|
||||||
"attachment_service_db",
|
"storage_controller_db",
|
||||||
&self.env.base_data_dir,
|
&self.env.base_data_dir,
|
||||||
pg_bin_dir.join("pg_ctl").as_std_path(),
|
pg_bin_dir.join("pg_ctl").as_std_path(),
|
||||||
db_start_args,
|
db_start_args,
|
||||||
@@ -300,7 +300,7 @@ impl AttachmentService {
|
|||||||
background_process::start_process(
|
background_process::start_process(
|
||||||
COMMAND,
|
COMMAND,
|
||||||
&self.env.base_data_dir,
|
&self.env.base_data_dir,
|
||||||
&self.env.attachment_service_bin(),
|
&self.env.storage_controller_bin(),
|
||||||
args,
|
args,
|
||||||
[(
|
[(
|
||||||
"NEON_REPO_DIR".to_string(),
|
"NEON_REPO_DIR".to_string(),
|
||||||
@@ -322,10 +322,10 @@ impl AttachmentService {
|
|||||||
pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||||
background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
|
background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
|
||||||
|
|
||||||
let pg_data_path = self.env.base_data_dir.join("attachment_service_db");
|
let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
|
||||||
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
let pg_bin_dir = self.get_pg_bin_dir().await?;
|
||||||
|
|
||||||
println!("Stopping attachment service database...");
|
println!("Stopping storage controller database...");
|
||||||
let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
|
let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
|
||||||
let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
|
let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
|
||||||
.args(pg_stop_args)
|
.args(pg_stop_args)
|
||||||
@@ -344,10 +344,10 @@ impl AttachmentService {
|
|||||||
// fine that stop failed. Otherwise it is an error that stop failed.
|
// fine that stop failed. Otherwise it is an error that stop failed.
|
||||||
const PG_STATUS_NOT_RUNNING: i32 = 3;
|
const PG_STATUS_NOT_RUNNING: i32 = 3;
|
||||||
if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
|
if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
|
||||||
println!("Attachment service data base is already stopped");
|
println!("Storage controller database is already stopped");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
} else {
|
} else {
|
||||||
anyhow::bail!("Failed to stop attachment service database: {stop_status}")
|
anyhow::bail!("Failed to stop storage controller database: {stop_status}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,7 +368,7 @@ impl AttachmentService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Simple HTTP request wrapper for calling into attachment service
|
/// Simple HTTP request wrapper for calling into storage controller
|
||||||
async fn dispatch<RQ, RS>(
|
async fn dispatch<RQ, RS>(
|
||||||
&self,
|
&self,
|
||||||
method: hyper::Method,
|
method: hyper::Method,
|
||||||
@@ -70,9 +70,9 @@ Should only be used e.g. for status check/tenant creation/list.
|
|||||||
Should only be used e.g. for status check.
|
Should only be used e.g. for status check.
|
||||||
Currently also used for connection from any pageserver to any safekeeper.
|
Currently also used for connection from any pageserver to any safekeeper.
|
||||||
|
|
||||||
"generations_api": Provides access to the upcall APIs served by the attachment service or the control plane.
|
"generations_api": Provides access to the upcall APIs served by the storage controller or the control plane.
|
||||||
|
|
||||||
"admin": Provides access to the control plane and admin APIs of the attachment service.
|
"admin": Provides access to the control plane and admin APIs of the storage controller.
|
||||||
|
|
||||||
### CLI
|
### CLI
|
||||||
CLI generates a key pair during call to `neon_local init` with the following commands:
|
CLI generates a key pair during call to `neon_local init` with the following commands:
|
||||||
|
|||||||
@@ -88,8 +88,6 @@ impl FromStr for NodeAvailability {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// FIXME: this is a duplicate of the type in the attachment_service crate, because the
|
|
||||||
/// type needs to be defined with diesel traits in there.
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
|
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
|
||||||
pub enum NodeSchedulingPolicy {
|
pub enum NodeSchedulingPolicy {
|
||||||
Active,
|
Active,
|
||||||
|
|||||||
@@ -1014,24 +1014,24 @@ class NeonEnv:
|
|||||||
self.initial_tenant = config.initial_tenant
|
self.initial_tenant = config.initial_tenant
|
||||||
self.initial_timeline = config.initial_timeline
|
self.initial_timeline = config.initial_timeline
|
||||||
|
|
||||||
# Find two adjacent ports for attachment service and its postgres DB. This
|
# Find two adjacent ports for storage controller and its postgres DB. This
|
||||||
# loop would eventually throw from get_port() if we run out of ports (extremely
|
# loop would eventually throw from get_port() if we run out of ports (extremely
|
||||||
# unlikely): usually we find two adjacent free ports on the first iteration.
|
# unlikely): usually we find two adjacent free ports on the first iteration.
|
||||||
while True:
|
while True:
|
||||||
self.attachment_service_port = self.port_distributor.get_port()
|
self.storage_controller_port = self.port_distributor.get_port()
|
||||||
attachment_service_pg_port = self.port_distributor.get_port()
|
storage_controller_pg_port = self.port_distributor.get_port()
|
||||||
if attachment_service_pg_port == self.attachment_service_port + 1:
|
if storage_controller_pg_port == self.storage_controller_port + 1:
|
||||||
break
|
break
|
||||||
|
|
||||||
# The URL for the pageserver to use as its control_plane_api config
|
# The URL for the pageserver to use as its control_plane_api config
|
||||||
self.control_plane_api: str = f"http://127.0.0.1:{self.attachment_service_port}/upcall/v1"
|
self.control_plane_api: str = f"http://127.0.0.1:{self.storage_controller_port}/upcall/v1"
|
||||||
# The base URL of the attachment service
|
# The base URL of the storage controller
|
||||||
self.attachment_service_api: str = f"http://127.0.0.1:{self.attachment_service_port}"
|
self.storage_controller_api: str = f"http://127.0.0.1:{self.storage_controller_port}"
|
||||||
|
|
||||||
# For testing this with a fake HTTP server, enable passing through a URL from config
|
# For testing this with a fake HTTP server, enable passing through a URL from config
|
||||||
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
|
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
|
||||||
|
|
||||||
self.attachment_service: NeonAttachmentService = NeonAttachmentService(
|
self.storage_controller: NeonStorageController = NeonStorageController(
|
||||||
self, config.auth_enabled
|
self, config.auth_enabled
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1113,16 +1113,16 @@ class NeonEnv:
|
|||||||
self.neon_cli.init(cfg, force=config.config_init_force)
|
self.neon_cli.init(cfg, force=config.config_init_force)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
# Attachment service starts first, so that pageserver /re-attach calls don't
|
# storage controller starts first, so that pageserver /re-attach calls don't
|
||||||
# bounce through retries on startup
|
# bounce through retries on startup
|
||||||
self.attachment_service.start()
|
self.storage_controller.start()
|
||||||
|
|
||||||
def attachment_service_ready():
|
def storage_controller_ready():
|
||||||
assert self.attachment_service.ready() is True
|
assert self.storage_controller.ready() is True
|
||||||
|
|
||||||
# Wait for attachment service readiness to prevent unnecessary post start-up
|
# Wait for storage controller readiness to prevent unnecessary post start-up
|
||||||
# reconcile.
|
# reconcile.
|
||||||
wait_until(30, 1, attachment_service_ready)
|
wait_until(30, 1, storage_controller_ready)
|
||||||
|
|
||||||
# Start up broker, pageserver and all safekeepers
|
# Start up broker, pageserver and all safekeepers
|
||||||
futs = []
|
futs = []
|
||||||
@@ -1153,7 +1153,7 @@ class NeonEnv:
|
|||||||
if ps_assert_metric_no_errors:
|
if ps_assert_metric_no_errors:
|
||||||
pageserver.assert_no_metric_errors()
|
pageserver.assert_no_metric_errors()
|
||||||
pageserver.stop(immediate=immediate)
|
pageserver.stop(immediate=immediate)
|
||||||
self.attachment_service.stop(immediate=immediate)
|
self.storage_controller.stop(immediate=immediate)
|
||||||
self.broker.stop(immediate=immediate)
|
self.broker.stop(immediate=immediate)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -1188,9 +1188,9 @@ class NeonEnv:
|
|||||||
def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]):
|
def get_tenant_pageserver(self, tenant_id: Union[TenantId, TenantShardId]):
|
||||||
"""
|
"""
|
||||||
Get the NeonPageserver where this tenant shard is currently attached, according
|
Get the NeonPageserver where this tenant shard is currently attached, according
|
||||||
to the attachment service.
|
to the storage controller.
|
||||||
"""
|
"""
|
||||||
meta = self.attachment_service.inspect(tenant_id)
|
meta = self.storage_controller.inspect(tenant_id)
|
||||||
if meta is None:
|
if meta is None:
|
||||||
return None
|
return None
|
||||||
pageserver_id = meta[1]
|
pageserver_id = meta[1]
|
||||||
@@ -1697,12 +1697,12 @@ class NeonCli(AbstractNeonCli):
|
|||||||
res.check_returncode()
|
res.check_returncode()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def attachment_service_start(self):
|
def storage_controller_start(self):
|
||||||
cmd = ["attachment_service", "start"]
|
cmd = ["storage_controller", "start"]
|
||||||
return self.raw_cli(cmd)
|
return self.raw_cli(cmd)
|
||||||
|
|
||||||
def attachment_service_stop(self, immediate: bool):
|
def storage_controller_stop(self, immediate: bool):
|
||||||
cmd = ["attachment_service", "stop"]
|
cmd = ["storage_controller", "stop"]
|
||||||
if immediate:
|
if immediate:
|
||||||
cmd.extend(["-m", "immediate"])
|
cmd.extend(["-m", "immediate"])
|
||||||
return self.raw_cli(cmd)
|
return self.raw_cli(cmd)
|
||||||
@@ -1942,14 +1942,14 @@ class Pagectl(AbstractNeonCli):
|
|||||||
return IndexPartDump.from_json(parsed)
|
return IndexPartDump.from_json(parsed)
|
||||||
|
|
||||||
|
|
||||||
class AttachmentServiceApiException(Exception):
|
class StorageControllerApiException(Exception):
|
||||||
def __init__(self, message, status_code: int):
|
def __init__(self, message, status_code: int):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.message = message
|
self.message = message
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
|
|
||||||
|
|
||||||
class NeonAttachmentService(MetricsGetter):
|
class NeonStorageController(MetricsGetter):
|
||||||
def __init__(self, env: NeonEnv, auth_enabled: bool):
|
def __init__(self, env: NeonEnv, auth_enabled: bool):
|
||||||
self.env = env
|
self.env = env
|
||||||
self.running = False
|
self.running = False
|
||||||
@@ -1957,13 +1957,13 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
assert not self.running
|
assert not self.running
|
||||||
self.env.neon_cli.attachment_service_start()
|
self.env.neon_cli.storage_controller_start()
|
||||||
self.running = True
|
self.running = True
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def stop(self, immediate: bool = False) -> "NeonAttachmentService":
|
def stop(self, immediate: bool = False) -> "NeonStorageController":
|
||||||
if self.running:
|
if self.running:
|
||||||
self.env.neon_cli.attachment_service_stop(immediate)
|
self.env.neon_cli.storage_controller_stop(immediate)
|
||||||
self.running = False
|
self.running = False
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -1976,22 +1976,22 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
msg = res.json()["msg"]
|
msg = res.json()["msg"]
|
||||||
except: # noqa: E722
|
except: # noqa: E722
|
||||||
msg = ""
|
msg = ""
|
||||||
raise AttachmentServiceApiException(msg, res.status_code) from e
|
raise StorageControllerApiException(msg, res.status_code) from e
|
||||||
|
|
||||||
def pageserver_api(self) -> PageserverHttpClient:
|
def pageserver_api(self) -> PageserverHttpClient:
|
||||||
"""
|
"""
|
||||||
The attachment service implements a subset of the pageserver REST API, for mapping
|
The storage controller implements a subset of the pageserver REST API, for mapping
|
||||||
per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those
|
per-tenant actions into per-shard actions (e.g. timeline creation). Tests should invoke those
|
||||||
functions via the HttpClient, as an implicit check that these APIs remain compatible.
|
functions via the HttpClient, as an implicit check that these APIs remain compatible.
|
||||||
"""
|
"""
|
||||||
auth_token = None
|
auth_token = None
|
||||||
if self.auth_enabled:
|
if self.auth_enabled:
|
||||||
auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API)
|
auth_token = self.env.auth_keys.generate_token(scope=TokenScope.PAGE_SERVER_API)
|
||||||
return PageserverHttpClient(self.env.attachment_service_port, lambda: True, auth_token)
|
return PageserverHttpClient(self.env.storage_controller_port, lambda: True, auth_token)
|
||||||
|
|
||||||
def request(self, method, *args, **kwargs) -> requests.Response:
|
def request(self, method, *args, **kwargs) -> requests.Response:
|
||||||
resp = requests.request(method, *args, **kwargs)
|
resp = requests.request(method, *args, **kwargs)
|
||||||
NeonAttachmentService.raise_api_exception(resp)
|
NeonStorageController.raise_api_exception(resp)
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@@ -2004,15 +2004,15 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
return headers
|
return headers
|
||||||
|
|
||||||
def get_metrics(self) -> Metrics:
|
def get_metrics(self) -> Metrics:
|
||||||
res = self.request("GET", f"{self.env.attachment_service_api}/metrics")
|
res = self.request("GET", f"{self.env.storage_controller_api}/metrics")
|
||||||
return parse_metrics(res.text)
|
return parse_metrics(res.text)
|
||||||
|
|
||||||
def ready(self) -> bool:
|
def ready(self) -> bool:
|
||||||
status = None
|
status = None
|
||||||
try:
|
try:
|
||||||
resp = self.request("GET", f"{self.env.attachment_service_api}/ready")
|
resp = self.request("GET", f"{self.env.storage_controller_api}/ready")
|
||||||
status = resp.status_code
|
status = resp.status_code
|
||||||
except AttachmentServiceApiException as e:
|
except StorageControllerApiException as e:
|
||||||
status = e.status_code
|
status = e.status_code
|
||||||
|
|
||||||
if status == 503:
|
if status == 503:
|
||||||
@@ -2027,7 +2027,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
) -> int:
|
) -> int:
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
|
f"{self.env.storage_controller_api}/debug/v1/attach-hook",
|
||||||
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
|
json={"tenant_shard_id": str(tenant_shard_id), "node_id": pageserver_id},
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2038,7 +2038,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
|
def attach_hook_drop(self, tenant_shard_id: Union[TenantId, TenantShardId]):
|
||||||
self.request(
|
self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/debug/v1/attach-hook",
|
f"{self.env.storage_controller_api}/debug/v1/attach-hook",
|
||||||
json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
|
json={"tenant_shard_id": str(tenant_shard_id), "node_id": None},
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2049,7 +2049,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
"""
|
"""
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/debug/v1/inspect",
|
f"{self.env.storage_controller_api}/debug/v1/inspect",
|
||||||
json={"tenant_shard_id": str(tenant_shard_id)},
|
json={"tenant_shard_id": str(tenant_shard_id)},
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2070,7 +2070,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
log.info(f"node_register({body})")
|
log.info(f"node_register({body})")
|
||||||
self.request(
|
self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/control/v1/node",
|
f"{self.env.storage_controller_api}/control/v1/node",
|
||||||
json=body,
|
json=body,
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2078,7 +2078,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
def node_list(self):
|
def node_list(self):
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"GET",
|
"GET",
|
||||||
f"{self.env.attachment_service_api}/control/v1/node",
|
f"{self.env.storage_controller_api}/control/v1/node",
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
return response.json()
|
return response.json()
|
||||||
@@ -2088,7 +2088,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
body["node_id"] = node_id
|
body["node_id"] = node_id
|
||||||
self.request(
|
self.request(
|
||||||
"PUT",
|
"PUT",
|
||||||
f"{self.env.attachment_service_api}/control/v1/node/{node_id}/config",
|
f"{self.env.storage_controller_api}/control/v1/node/{node_id}/config",
|
||||||
json=body,
|
json=body,
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2118,7 +2118,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
|
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/v1/tenant",
|
f"{self.env.storage_controller_api}/v1/tenant",
|
||||||
json=body,
|
json=body,
|
||||||
headers=self.headers(TokenScope.PAGE_SERVER_API),
|
headers=self.headers(TokenScope.PAGE_SERVER_API),
|
||||||
)
|
)
|
||||||
@@ -2130,7 +2130,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
"""
|
"""
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"GET",
|
"GET",
|
||||||
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/locate",
|
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/locate",
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
body = response.json()
|
body = response.json()
|
||||||
@@ -2140,7 +2140,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]:
|
def tenant_shard_split(self, tenant_id: TenantId, shard_count: int) -> list[TenantShardId]:
|
||||||
response = self.request(
|
response = self.request(
|
||||||
"PUT",
|
"PUT",
|
||||||
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_id}/shard_split",
|
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_id}/shard_split",
|
||||||
json={"new_shard_count": shard_count},
|
json={"new_shard_count": shard_count},
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2152,7 +2152,7 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
|
def tenant_shard_migrate(self, tenant_shard_id: TenantShardId, dest_ps_id: int):
|
||||||
self.request(
|
self.request(
|
||||||
"PUT",
|
"PUT",
|
||||||
f"{self.env.attachment_service_api}/control/v1/tenant/{tenant_shard_id}/migrate",
|
f"{self.env.storage_controller_api}/control/v1/tenant/{tenant_shard_id}/migrate",
|
||||||
json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
|
json={"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id},
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
@@ -2165,12 +2165,12 @@ class NeonAttachmentService(MetricsGetter):
|
|||||||
"""
|
"""
|
||||||
self.request(
|
self.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{self.env.attachment_service_api}/debug/v1/consistency_check",
|
f"{self.env.storage_controller_api}/debug/v1/consistency_check",
|
||||||
headers=self.headers(TokenScope.ADMIN),
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
log.info("Attachment service passed consistency check")
|
log.info("storage controller passed consistency check")
|
||||||
|
|
||||||
def __enter__(self) -> "NeonAttachmentService":
|
def __enter__(self) -> "NeonStorageController":
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __exit__(
|
def __exit__(
|
||||||
@@ -2401,7 +2401,7 @@ class NeonPageserver(PgProtocol):
|
|||||||
"""
|
"""
|
||||||
client = self.http_client()
|
client = self.http_client()
|
||||||
if generation is None:
|
if generation is None:
|
||||||
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
|
generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
|
||||||
return client.tenant_attach(
|
return client.tenant_attach(
|
||||||
tenant_id,
|
tenant_id,
|
||||||
config,
|
config,
|
||||||
@@ -2410,14 +2410,14 @@ class NeonPageserver(PgProtocol):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def tenant_detach(self, tenant_id: TenantId):
|
def tenant_detach(self, tenant_id: TenantId):
|
||||||
self.env.attachment_service.attach_hook_drop(tenant_id)
|
self.env.storage_controller.attach_hook_drop(tenant_id)
|
||||||
|
|
||||||
client = self.http_client()
|
client = self.http_client()
|
||||||
return client.tenant_detach(tenant_id)
|
return client.tenant_detach(tenant_id)
|
||||||
|
|
||||||
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
|
def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
|
||||||
if config["mode"].startswith("Attached") and "generation" not in config:
|
if config["mode"].startswith("Attached") and "generation" not in config:
|
||||||
config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
|
config["generation"] = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
|
||||||
|
|
||||||
client = self.http_client()
|
client = self.http_client()
|
||||||
return client.tenant_location_conf(tenant_id, config, **kwargs)
|
return client.tenant_location_conf(tenant_id, config, **kwargs)
|
||||||
@@ -2441,14 +2441,14 @@ class NeonPageserver(PgProtocol):
|
|||||||
generation: Optional[int] = None,
|
generation: Optional[int] = None,
|
||||||
) -> TenantId:
|
) -> TenantId:
|
||||||
if generation is None:
|
if generation is None:
|
||||||
generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
|
generation = self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
|
||||||
client = self.http_client(auth_token=auth_token)
|
client = self.http_client(auth_token=auth_token)
|
||||||
return client.tenant_create(tenant_id, conf, generation=generation)
|
return client.tenant_create(tenant_id, conf, generation=generation)
|
||||||
|
|
||||||
def tenant_load(self, tenant_id: TenantId):
|
def tenant_load(self, tenant_id: TenantId):
|
||||||
client = self.http_client()
|
client = self.http_client()
|
||||||
return client.tenant_load(
|
return client.tenant_load(
|
||||||
tenant_id, generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
|
tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -3907,7 +3907,7 @@ def check_restored_datadir_content(test_output_dir: Path, env: NeonEnv, endpoint
|
|||||||
|
|
||||||
psql_path = os.path.join(pg_bin.pg_bin_path, "psql")
|
psql_path = os.path.join(pg_bin.pg_bin_path, "psql")
|
||||||
|
|
||||||
pageserver_id = env.attachment_service.locate(endpoint.tenant_id)[0]["node_id"]
|
pageserver_id = env.storage_controller.locate(endpoint.tenant_id)[0]["node_id"]
|
||||||
cmd = rf"""
|
cmd = rf"""
|
||||||
{psql_path} \
|
{psql_path} \
|
||||||
--no-psqlrc \
|
--no-psqlrc \
|
||||||
@@ -3994,7 +3994,7 @@ def tenant_get_shards(
|
|||||||
us to figure out the shards for a tenant.
|
us to figure out the shards for a tenant.
|
||||||
|
|
||||||
If the caller provides `pageserver_id`, it will be used for all shards, even
|
If the caller provides `pageserver_id`, it will be used for all shards, even
|
||||||
if the shard is indicated by attachment service to be on some other pageserver.
|
if the shard is indicated by storage controller to be on some other pageserver.
|
||||||
|
|
||||||
Caller should over the response to apply their per-pageserver action to
|
Caller should over the response to apply their per-pageserver action to
|
||||||
each shard
|
each shard
|
||||||
@@ -4010,7 +4010,7 @@ def tenant_get_shards(
|
|||||||
TenantShardId.parse(s["shard_id"]),
|
TenantShardId.parse(s["shard_id"]),
|
||||||
override_pageserver or env.get_pageserver(s["node_id"]),
|
override_pageserver or env.get_pageserver(s["node_id"]),
|
||||||
)
|
)
|
||||||
for s in env.attachment_service.locate(tenant_id)
|
for s in env.storage_controller.locate(tenant_id)
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
# Assume an unsharded tenant
|
# Assume an unsharded tenant
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ def single_timeline(
|
|||||||
log.info("detach template tenant form pageserver")
|
log.info("detach template tenant form pageserver")
|
||||||
env.pageserver.tenant_detach(template_tenant)
|
env.pageserver.tenant_detach(template_tenant)
|
||||||
env.pageserver.allowed_errors.append(
|
env.pageserver.allowed_errors.append(
|
||||||
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
|
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
|
||||||
".*Dropped remote consistent LSN updates.*",
|
".*Dropped remote consistent LSN updates.*",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ def setup_env(
|
|||||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||||
env.pageserver.tenant_detach(template_tenant)
|
env.pageserver.tenant_detach(template_tenant)
|
||||||
env.pageserver.allowed_errors.append(
|
env.pageserver.allowed_errors.append(
|
||||||
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
|
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
|
||||||
".*Dropped remote consistent LSN updates.*",
|
".*Dropped remote consistent LSN updates.*",
|
||||||
)
|
)
|
||||||
env.pageserver.tenant_attach(template_tenant, config)
|
env.pageserver.tenant_attach(template_tenant, config)
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ def setup_tenant_template(env: NeonEnv, n_txns: int):
|
|||||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||||
env.pageserver.tenant_detach(template_tenant)
|
env.pageserver.tenant_detach(template_tenant)
|
||||||
env.pageserver.allowed_errors.append(
|
env.pageserver.allowed_errors.append(
|
||||||
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
|
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
|
||||||
".*Dropped remote consistent LSN updates.*",
|
".*Dropped remote consistent LSN updates.*",
|
||||||
)
|
)
|
||||||
env.pageserver.tenant_attach(template_tenant, config)
|
env.pageserver.tenant_attach(template_tenant, config)
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ def setup_tenant_template(env: NeonEnv, pg_bin: PgBin, scale: int):
|
|||||||
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
template_tenant, template_timeline = env.neon_cli.create_tenant(set_default=True)
|
||||||
env.pageserver.tenant_detach(template_tenant)
|
env.pageserver.tenant_detach(template_tenant)
|
||||||
env.pageserver.allowed_errors.append(
|
env.pageserver.allowed_errors.append(
|
||||||
# tenant detach causes this because the underlying attach-hook removes the tenant from attachment_service entirely
|
# tenant detach causes this because the underlying attach-hook removes the tenant from storage controller entirely
|
||||||
".*Dropped remote consistent LSN updates.*",
|
".*Dropped remote consistent LSN updates.*",
|
||||||
)
|
)
|
||||||
env.pageserver.tenant_attach(template_tenant, config)
|
env.pageserver.tenant_attach(template_tenant, config)
|
||||||
|
|||||||
@@ -56,12 +56,12 @@ def measure_recovery_time(env: NeonCompare):
|
|||||||
# Delete the Tenant in the pageserver: this will drop local and remote layers, such that
|
# Delete the Tenant in the pageserver: this will drop local and remote layers, such that
|
||||||
# when we "create" the Tenant again, we will replay the WAL from the beginning.
|
# when we "create" the Tenant again, we will replay the WAL from the beginning.
|
||||||
#
|
#
|
||||||
# This is a "weird" thing to do, and can confuse the attachment service as we're re-using
|
# This is a "weird" thing to do, and can confuse the storage controller as we're re-using
|
||||||
# the same tenant ID for a tenant that is logically different from the pageserver's point
|
# the same tenant ID for a tenant that is logically different from the pageserver's point
|
||||||
# of view, but the same as far as the safekeeper/WAL is concerned. To work around that,
|
# of view, but the same as far as the safekeeper/WAL is concerned. To work around that,
|
||||||
# we will explicitly create the tenant in the same generation that it was previously
|
# we will explicitly create the tenant in the same generation that it was previously
|
||||||
# attached in.
|
# attached in.
|
||||||
attach_status = env.env.attachment_service.inspect(tenant_shard_id=env.tenant)
|
attach_status = env.env.storage_controller.inspect(tenant_shard_id=env.tenant)
|
||||||
assert attach_status is not None
|
assert attach_status is not None
|
||||||
(attach_gen, _) = attach_status
|
(attach_gen, _) = attach_status
|
||||||
|
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
|
|||||||
ps_http.tenant_detach(tenant_id)
|
ps_http.tenant_detach(tenant_id)
|
||||||
assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
|
assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
|
||||||
|
|
||||||
body = {"generation": env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)}
|
body = {"generation": env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)}
|
||||||
|
|
||||||
ps_http.post(
|
ps_http.post(
|
||||||
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
|
f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
|
||||||
|
|||||||
@@ -85,9 +85,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
|
|||||||
# the endpoint. Whereas the previous reconfiguration was like a healthy migration, this
|
# the endpoint. Whereas the previous reconfiguration was like a healthy migration, this
|
||||||
# is more like what happens in an unexpected pageserver failure.
|
# is more like what happens in an unexpected pageserver failure.
|
||||||
#
|
#
|
||||||
# Since we're dual-attached, need to tip-off attachment service to treat the one we're
|
# Since we're dual-attached, need to tip-off storage controller to treat the one we're
|
||||||
# about to start as the attached pageserver
|
# about to start as the attached pageserver
|
||||||
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
|
env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
|
||||||
env.pageservers[0].start()
|
env.pageservers[0].start()
|
||||||
env.pageservers[1].stop()
|
env.pageservers[1].stop()
|
||||||
|
|
||||||
@@ -97,9 +97,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
|
|||||||
assert fetchone() == (100000,)
|
assert fetchone() == (100000,)
|
||||||
|
|
||||||
env.pageservers[0].stop()
|
env.pageservers[0].stop()
|
||||||
# Since we're dual-attached, need to tip-off attachment service to treat the one we're
|
# Since we're dual-attached, need to tip-off storage controller to treat the one we're
|
||||||
# about to start as the attached pageserver
|
# about to start as the attached pageserver
|
||||||
env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
|
env.storage_controller.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
|
||||||
env.pageservers[1].start()
|
env.pageservers[1].start()
|
||||||
|
|
||||||
# Test a (former) bug where a child process spins without updating its connection string
|
# Test a (former) bug where a child process spins without updating its connection string
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ def test_create_snapshot(
|
|||||||
for sk in env.safekeepers:
|
for sk in env.safekeepers:
|
||||||
sk.stop()
|
sk.stop()
|
||||||
env.pageserver.stop()
|
env.pageserver.stop()
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
|
|
||||||
# Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
|
# Directory `compatibility_snapshot_dir` is uploaded to S3 in a workflow, keep the name in sync with it
|
||||||
compatibility_snapshot_dir = (
|
compatibility_snapshot_dir = (
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
|
|||||||
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
|
time.sleep(1.1) # so that we can use change in pre_stat.st_mtime to detect overwrites
|
||||||
|
|
||||||
def get_generation_number():
|
def get_generation_number():
|
||||||
attachment = env.attachment_service.inspect(tenant_id)
|
attachment = env.storage_controller.inspect(tenant_id)
|
||||||
assert attachment is not None
|
assert attachment is not None
|
||||||
return attachment[0]
|
return attachment[0]
|
||||||
|
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
|
|||||||
# Stop default ps/sk
|
# Stop default ps/sk
|
||||||
env.neon_cli.pageserver_stop(env.pageserver.id)
|
env.neon_cli.pageserver_stop(env.pageserver.id)
|
||||||
env.neon_cli.safekeeper_stop()
|
env.neon_cli.safekeeper_stop()
|
||||||
env.neon_cli.attachment_service_stop(False)
|
env.neon_cli.storage_controller_stop(False)
|
||||||
|
|
||||||
# Keep NeonEnv state up to date, it usually owns starting/stopping services
|
# Keep NeonEnv state up to date, it usually owns starting/stopping services
|
||||||
env.pageserver.running = False
|
env.pageserver.running = False
|
||||||
@@ -175,7 +175,7 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
|
|||||||
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)
|
env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)
|
||||||
|
|
||||||
# Stop this to get out of the way of the following `start`
|
# Stop this to get out of the way of the following `start`
|
||||||
env.neon_cli.attachment_service_stop(False)
|
env.neon_cli.storage_controller_stop(False)
|
||||||
|
|
||||||
# Default start
|
# Default start
|
||||||
res = env.neon_cli.raw_cli(["start"])
|
res = env.neon_cli.raw_cli(["start"])
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
|
|||||||
# create new tenant and check it is also there
|
# create new tenant and check it is also there
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
client.tenant_create(
|
client.tenant_create(
|
||||||
tenant_id, generation=env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)
|
tenant_id, generation=env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
|
||||||
)
|
)
|
||||||
assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}
|
assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}
|
||||||
|
|
||||||
|
|||||||
@@ -203,7 +203,7 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
|
|||||||
env.broker.try_start()
|
env.broker.try_start()
|
||||||
for sk in env.safekeepers:
|
for sk in env.safekeepers:
|
||||||
sk.start()
|
sk.start()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
|
env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
|
||||||
|
|
||||||
@@ -285,7 +285,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
|
|||||||
neon_env_builder.num_pageservers = 2
|
neon_env_builder.num_pageservers = 2
|
||||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||||
|
|
||||||
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
|
attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
|
||||||
main_pageserver = env.get_pageserver(attached_to_id)
|
main_pageserver = env.get_pageserver(attached_to_id)
|
||||||
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
|
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
|
||||||
|
|
||||||
@@ -310,7 +310,7 @@ def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
|
|||||||
|
|
||||||
# Now advance the generation in the control plane: subsequent validations
|
# Now advance the generation in the control plane: subsequent validations
|
||||||
# from the running pageserver will fail. No more deletions should happen.
|
# from the running pageserver will fail. No more deletions should happen.
|
||||||
env.attachment_service.attach_hook_issue(env.initial_tenant, other_pageserver.id)
|
env.storage_controller.attach_hook_issue(env.initial_tenant, other_pageserver.id)
|
||||||
generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver)
|
generate_uploads_and_deletions(env, init=False, pageserver=main_pageserver)
|
||||||
|
|
||||||
assert_deletion_queue(ps_http, lambda n: n > 0)
|
assert_deletion_queue(ps_http, lambda n: n > 0)
|
||||||
@@ -366,7 +366,7 @@ def test_deletion_queue_recovery(
|
|||||||
neon_env_builder.num_pageservers = 2
|
neon_env_builder.num_pageservers = 2
|
||||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||||
|
|
||||||
attached_to_id = env.attachment_service.locate(env.initial_tenant)[0]["node_id"]
|
attached_to_id = env.storage_controller.locate(env.initial_tenant)[0]["node_id"]
|
||||||
main_pageserver = env.get_pageserver(attached_to_id)
|
main_pageserver = env.get_pageserver(attached_to_id)
|
||||||
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
|
other_pageserver = [p for p in env.pageservers if p.id != attached_to_id][0]
|
||||||
|
|
||||||
@@ -428,7 +428,7 @@ def test_deletion_queue_recovery(
|
|||||||
|
|
||||||
if keep_attachment == KeepAttachment.LOSE:
|
if keep_attachment == KeepAttachment.LOSE:
|
||||||
some_other_pageserver = other_pageserver.id
|
some_other_pageserver = other_pageserver.id
|
||||||
env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
|
env.storage_controller.attach_hook_issue(env.initial_tenant, some_other_pageserver)
|
||||||
|
|
||||||
main_pageserver.start()
|
main_pageserver.start()
|
||||||
|
|
||||||
@@ -494,7 +494,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Simulate a major incident: the control plane goes offline
|
# Simulate a major incident: the control plane goes offline
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
|
|
||||||
# Remember how many validations had happened before the control plane went offline
|
# Remember how many validations had happened before the control plane went offline
|
||||||
validated = get_deletion_queue_validated(ps_http)
|
validated = get_deletion_queue_validated(ps_http)
|
||||||
@@ -525,7 +525,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
|||||||
assert get_deletion_queue_executed(ps_http) == 0
|
assert get_deletion_queue_executed(ps_http) == 0
|
||||||
|
|
||||||
# When the control plane comes back up, normal service should resume
|
# When the control plane comes back up, normal service should resume
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
ps_http.deletion_queue_flush(execute=True)
|
ps_http.deletion_queue_flush(execute=True)
|
||||||
assert get_deletion_queue_depth(ps_http) == 0
|
assert get_deletion_queue_depth(ps_http) == 0
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
|
|||||||
workload.churn_rows(rng.randint(128, 256), pageserver.id)
|
workload.churn_rows(rng.randint(128, 256), pageserver.id)
|
||||||
workload.validate(pageserver.id)
|
workload.validate(pageserver.id)
|
||||||
elif last_state_ps[0].startswith("Attached"):
|
elif last_state_ps[0].startswith("Attached"):
|
||||||
# The `attachment_service` will only re-attach on startup when a pageserver was the
|
# The `storage_controller` will only re-attach on startup when a pageserver was the
|
||||||
# holder of the latest generation: otherwise the pageserver will revert to detached
|
# holder of the latest generation: otherwise the pageserver will revert to detached
|
||||||
# state if it was running attached with a stale generation
|
# state if it was running attached with a stale generation
|
||||||
last_state[pageserver.id] = ("Detached", None)
|
last_state[pageserver.id] = ("Detached", None)
|
||||||
@@ -182,12 +182,12 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
|
|||||||
generation = last_state_ps[1]
|
generation = last_state_ps[1]
|
||||||
else:
|
else:
|
||||||
# Switch generations, while also jumping between attached states
|
# Switch generations, while also jumping between attached states
|
||||||
generation = env.attachment_service.attach_hook_issue(
|
generation = env.storage_controller.attach_hook_issue(
|
||||||
tenant_id, pageserver.id
|
tenant_id, pageserver.id
|
||||||
)
|
)
|
||||||
latest_attached = pageserver.id
|
latest_attached = pageserver.id
|
||||||
else:
|
else:
|
||||||
generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
|
generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver.id)
|
||||||
latest_attached = pageserver.id
|
latest_attached = pageserver.id
|
||||||
else:
|
else:
|
||||||
generation = None
|
generation = None
|
||||||
@@ -273,7 +273,7 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
|
|||||||
# Encourage the new location to download while still in secondary mode
|
# Encourage the new location to download while still in secondary mode
|
||||||
pageserver_b.http_client().tenant_secondary_download(tenant_id)
|
pageserver_b.http_client().tenant_secondary_download(tenant_id)
|
||||||
|
|
||||||
migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
|
migrated_generation = env.storage_controller.attach_hook_issue(tenant_id, pageserver_b.id)
|
||||||
log.info(f"Acquired generation {migrated_generation} for destination pageserver")
|
log.info(f"Acquired generation {migrated_generation} for destination pageserver")
|
||||||
assert migrated_generation == initial_generation + 1
|
assert migrated_generation == initial_generation + 1
|
||||||
|
|
||||||
@@ -436,7 +436,7 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder):
|
|||||||
remote_storage_kind=RemoteStorageKind.MOCK_S3,
|
remote_storage_kind=RemoteStorageKind.MOCK_S3,
|
||||||
)
|
)
|
||||||
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
|
||||||
assert env.attachment_service is not None
|
assert env.storage_controller is not None
|
||||||
assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter
|
assert isinstance(env.pageserver_remote_storage, S3Storage) # Satisfy linter
|
||||||
|
|
||||||
tenant_id = env.initial_tenant
|
tenant_id = env.initial_tenant
|
||||||
|
|||||||
@@ -169,7 +169,7 @@ def test_remote_storage_backup_and_restore(
|
|||||||
# Ensure that even though the tenant is broken, retrying the attachment fails
|
# Ensure that even though the tenant is broken, retrying the attachment fails
|
||||||
with pytest.raises(Exception, match="Tenant state is Broken"):
|
with pytest.raises(Exception, match="Tenant state is Broken"):
|
||||||
# Use same generation as in previous attempt
|
# Use same generation as in previous attempt
|
||||||
gen_state = env.attachment_service.inspect(tenant_id)
|
gen_state = env.storage_controller.inspect(tenant_id)
|
||||||
assert gen_state is not None
|
assert gen_state is not None
|
||||||
generation = gen_state[0]
|
generation = gen_state[0]
|
||||||
env.pageserver.tenant_attach(tenant_id, generation=generation)
|
env.pageserver.tenant_attach(tenant_id, generation=generation)
|
||||||
@@ -355,7 +355,7 @@ def test_remote_storage_upload_queue_retries(
|
|||||||
env.pageserver.stop(immediate=True)
|
env.pageserver.stop(immediate=True)
|
||||||
env.endpoints.stop_all()
|
env.endpoints.stop_all()
|
||||||
|
|
||||||
# We are about to forcibly drop local dirs. Attachment service will increment generation in re-attach before
|
# We are about to forcibly drop local dirs. Storage controller will increment generation in re-attach before
|
||||||
# we later increment when actually attaching it again, leading to skipping a generation and potentially getting
|
# we later increment when actually attaching it again, leading to skipping a generation and potentially getting
|
||||||
# these warnings if there was a durable but un-executed deletion list at time of restart.
|
# these warnings if there was a durable but un-executed deletion list at time of restart.
|
||||||
env.pageserver.allowed_errors.extend(
|
env.pageserver.allowed_errors.extend(
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ def test_tenant_s3_restore(
|
|||||||
assert (
|
assert (
|
||||||
ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
|
ps_http.get_metric_value("pageserver_tenant_manager_slots") == 0
|
||||||
), "tenant removed before we deletion was issued"
|
), "tenant removed before we deletion was issued"
|
||||||
env.attachment_service.attach_hook_drop(tenant_id)
|
env.storage_controller.attach_hook_drop(tenant_id)
|
||||||
|
|
||||||
tenant_path = env.pageserver.tenant_dir(tenant_id)
|
tenant_path = env.pageserver.tenant_dir(tenant_id)
|
||||||
assert not tenant_path.exists()
|
assert not tenant_path.exists()
|
||||||
@@ -103,7 +103,7 @@ def test_tenant_s3_restore(
|
|||||||
tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion
|
tenant_id, timestamp=ts_before_deletion, done_if_after=ts_after_deletion
|
||||||
)
|
)
|
||||||
|
|
||||||
generation = env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)
|
generation = env.storage_controller.attach_hook_issue(tenant_id, env.pageserver.id)
|
||||||
|
|
||||||
ps_http.tenant_attach(tenant_id, generation=generation)
|
ps_http.tenant_attach(tenant_id, generation=generation)
|
||||||
env.pageserver.quiesce_tenants()
|
env.pageserver.quiesce_tenants()
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ def test_sharding_smoke(
|
|||||||
tenant_id = env.initial_tenant
|
tenant_id = env.initial_tenant
|
||||||
|
|
||||||
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
pageservers = dict((int(p.id), p) for p in env.pageservers)
|
||||||
shards = env.attachment_service.locate(tenant_id)
|
shards = env.storage_controller.locate(tenant_id)
|
||||||
|
|
||||||
def get_sizes():
|
def get_sizes():
|
||||||
sizes = {}
|
sizes = {}
|
||||||
@@ -86,7 +86,7 @@ def test_sharding_smoke(
|
|||||||
)
|
)
|
||||||
assert timelines == {env.initial_timeline, timeline_b}
|
assert timelines == {env.initial_timeline, timeline_b}
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_split_unsharded(
|
def test_sharding_split_unsharded(
|
||||||
@@ -102,7 +102,7 @@ def test_sharding_split_unsharded(
|
|||||||
|
|
||||||
# Check that we created with an unsharded TenantShardId: this is the default,
|
# Check that we created with an unsharded TenantShardId: this is the default,
|
||||||
# but check it in case we change the default in future
|
# but check it in case we change the default in future
|
||||||
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 0)) is not None
|
assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 0)) is not None
|
||||||
|
|
||||||
workload = Workload(env, tenant_id, timeline_id, branch_name="main")
|
workload = Workload(env, tenant_id, timeline_id, branch_name="main")
|
||||||
workload.init()
|
workload.init()
|
||||||
@@ -110,15 +110,15 @@ def test_sharding_split_unsharded(
|
|||||||
workload.validate()
|
workload.validate()
|
||||||
|
|
||||||
# Split one shard into two
|
# Split one shard into two
|
||||||
env.attachment_service.tenant_shard_split(tenant_id, shard_count=2)
|
env.storage_controller.tenant_shard_split(tenant_id, shard_count=2)
|
||||||
|
|
||||||
# Check we got the shard IDs we expected
|
# Check we got the shard IDs we expected
|
||||||
assert env.attachment_service.inspect(TenantShardId(tenant_id, 0, 2)) is not None
|
assert env.storage_controller.inspect(TenantShardId(tenant_id, 0, 2)) is not None
|
||||||
assert env.attachment_service.inspect(TenantShardId(tenant_id, 1, 2)) is not None
|
assert env.storage_controller.inspect(TenantShardId(tenant_id, 1, 2)) is not None
|
||||||
|
|
||||||
workload.validate()
|
workload.validate()
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_split_smoke(
|
def test_sharding_split_smoke(
|
||||||
@@ -161,7 +161,7 @@ def test_sharding_split_smoke(
|
|||||||
workload.write_rows(256)
|
workload.write_rows(256)
|
||||||
|
|
||||||
# Note which pageservers initially hold a shard after tenant creation
|
# Note which pageservers initially hold a shard after tenant creation
|
||||||
pre_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)]
|
pre_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
|
||||||
|
|
||||||
# For pageservers holding a shard, validate their ingest statistics
|
# For pageservers holding a shard, validate their ingest statistics
|
||||||
# reflect a proper splitting of the WAL.
|
# reflect a proper splitting of the WAL.
|
||||||
@@ -213,9 +213,9 @@ def test_sharding_split_smoke(
|
|||||||
# Before split, old shards exist
|
# Before split, old shards exist
|
||||||
assert shards_on_disk(old_shard_ids)
|
assert shards_on_disk(old_shard_ids)
|
||||||
|
|
||||||
env.attachment_service.tenant_shard_split(tenant_id, shard_count=split_shard_count)
|
env.storage_controller.tenant_shard_split(tenant_id, shard_count=split_shard_count)
|
||||||
|
|
||||||
post_split_pageserver_ids = [loc["node_id"] for loc in env.attachment_service.locate(tenant_id)]
|
post_split_pageserver_ids = [loc["node_id"] for loc in env.storage_controller.locate(tenant_id)]
|
||||||
# We should have split into 8 shards, on the same 4 pageservers we started on.
|
# We should have split into 8 shards, on the same 4 pageservers we started on.
|
||||||
assert len(post_split_pageserver_ids) == split_shard_count
|
assert len(post_split_pageserver_ids) == split_shard_count
|
||||||
assert len(set(post_split_pageserver_ids)) == shard_count
|
assert len(set(post_split_pageserver_ids)) == shard_count
|
||||||
@@ -261,7 +261,7 @@ def test_sharding_split_smoke(
|
|||||||
# Check that we didn't do any spurious reconciliations.
|
# Check that we didn't do any spurious reconciliations.
|
||||||
# Total number of reconciles should have been one per original shard, plus
|
# Total number of reconciles should have been one per original shard, plus
|
||||||
# one for each shard that was migrated.
|
# one for each shard that was migrated.
|
||||||
reconcile_ok = env.attachment_service.get_metric_value(
|
reconcile_ok = env.storage_controller.get_metric_value(
|
||||||
"storage_controller_reconcile_complete_total", filter={"status": "ok"}
|
"storage_controller_reconcile_complete_total", filter={"status": "ok"}
|
||||||
)
|
)
|
||||||
assert reconcile_ok == shard_count + split_shard_count // 2
|
assert reconcile_ok == shard_count + split_shard_count // 2
|
||||||
@@ -269,19 +269,19 @@ def test_sharding_split_smoke(
|
|||||||
# Check that no cancelled or errored reconciliations occurred: this test does no
|
# Check that no cancelled or errored reconciliations occurred: this test does no
|
||||||
# failure injection and should run clean.
|
# failure injection and should run clean.
|
||||||
assert (
|
assert (
|
||||||
env.attachment_service.get_metric_value(
|
env.storage_controller.get_metric_value(
|
||||||
"storage_controller_reconcile_complete_total", filter={"status": "cancel"}
|
"storage_controller_reconcile_complete_total", filter={"status": "cancel"}
|
||||||
)
|
)
|
||||||
is None
|
is None
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
env.attachment_service.get_metric_value(
|
env.storage_controller.get_metric_value(
|
||||||
"storage_controller_reconcile_complete_total", filter={"status": "error"}
|
"storage_controller_reconcile_complete_total", filter={"status": "error"}
|
||||||
)
|
)
|
||||||
is None
|
is None
|
||||||
)
|
)
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
# Validate pageserver state
|
# Validate pageserver state
|
||||||
shards_exist: list[TenantShardId] = []
|
shards_exist: list[TenantShardId] = []
|
||||||
@@ -360,7 +360,7 @@ def test_sharding_ingest(
|
|||||||
huge_layer_count = 0
|
huge_layer_count = 0
|
||||||
|
|
||||||
# Inspect the resulting layer map, count how many layers are undersized.
|
# Inspect the resulting layer map, count how many layers are undersized.
|
||||||
for shard in env.attachment_service.locate(tenant_id):
|
for shard in env.storage_controller.locate(tenant_id):
|
||||||
pageserver = env.get_pageserver(shard["node_id"])
|
pageserver = env.get_pageserver(shard["node_id"])
|
||||||
shard_id = shard["shard_id"]
|
shard_id = shard["shard_id"]
|
||||||
layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id)
|
layer_map = pageserver.http_client().layer_map_info(shard_id, timeline_id)
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ from typing import Any, Dict, List, Union
|
|||||||
import pytest
|
import pytest
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
from fixtures.neon_fixtures import (
|
from fixtures.neon_fixtures import (
|
||||||
AttachmentServiceApiException,
|
|
||||||
NeonEnv,
|
NeonEnv,
|
||||||
NeonEnvBuilder,
|
NeonEnvBuilder,
|
||||||
PgBin,
|
PgBin,
|
||||||
|
StorageControllerApiException,
|
||||||
TokenScope,
|
TokenScope,
|
||||||
)
|
)
|
||||||
from fixtures.pageserver.http import PageserverHttpClient
|
from fixtures.pageserver.http import PageserverHttpClient
|
||||||
@@ -36,7 +36,7 @@ from werkzeug.wrappers.response import Response
|
|||||||
def get_node_shard_counts(env: NeonEnv, tenant_ids):
|
def get_node_shard_counts(env: NeonEnv, tenant_ids):
|
||||||
counts: defaultdict[str, int] = defaultdict(int)
|
counts: defaultdict[str, int] = defaultdict(int)
|
||||||
for tid in tenant_ids:
|
for tid in tenant_ids:
|
||||||
for shard in env.attachment_service.locate(tid):
|
for shard in env.storage_controller.locate(tid):
|
||||||
counts[shard["node_id"]] += 1
|
counts[shard["node_id"]] += 1
|
||||||
return counts
|
return counts
|
||||||
|
|
||||||
@@ -62,20 +62,20 @@ def test_sharding_service_smoke(
|
|||||||
|
|
||||||
# Start services by hand so that we can skip a pageserver (this will start + register later)
|
# Start services by hand so that we can skip a pageserver (this will start + register later)
|
||||||
env.broker.try_start()
|
env.broker.try_start()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
env.pageservers[0].start()
|
env.pageservers[0].start()
|
||||||
env.pageservers[1].start()
|
env.pageservers[1].start()
|
||||||
for sk in env.safekeepers:
|
for sk in env.safekeepers:
|
||||||
sk.start()
|
sk.start()
|
||||||
|
|
||||||
# The pageservers we started should have registered with the sharding service on startup
|
# The pageservers we started should have registered with the sharding service on startup
|
||||||
nodes = env.attachment_service.node_list()
|
nodes = env.storage_controller.node_list()
|
||||||
assert len(nodes) == 2
|
assert len(nodes) == 2
|
||||||
assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id}
|
assert set(n["id"] for n in nodes) == {env.pageservers[0].id, env.pageservers[1].id}
|
||||||
|
|
||||||
# Starting an additional pageserver should register successfully
|
# Starting an additional pageserver should register successfully
|
||||||
env.pageservers[2].start()
|
env.pageservers[2].start()
|
||||||
nodes = env.attachment_service.node_list()
|
nodes = env.storage_controller.node_list()
|
||||||
assert len(nodes) == 3
|
assert len(nodes) == 3
|
||||||
assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers}
|
assert set(n["id"] for n in nodes) == {ps.id for ps in env.pageservers}
|
||||||
|
|
||||||
@@ -99,22 +99,22 @@ def test_sharding_service_smoke(
|
|||||||
# Creating and deleting timelines should work, using identical API to pageserver
|
# Creating and deleting timelines should work, using identical API to pageserver
|
||||||
timeline_crud_tenant = next(iter(tenant_ids))
|
timeline_crud_tenant = next(iter(tenant_ids))
|
||||||
timeline_id = TimelineId.generate()
|
timeline_id = TimelineId.generate()
|
||||||
env.attachment_service.pageserver_api().timeline_create(
|
env.storage_controller.pageserver_api().timeline_create(
|
||||||
pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id
|
pg_version=PgVersion.NOT_SET, tenant_id=timeline_crud_tenant, new_timeline_id=timeline_id
|
||||||
)
|
)
|
||||||
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
|
timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
|
||||||
assert len(timelines) == 2
|
assert len(timelines) == 2
|
||||||
assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines)
|
assert timeline_id in set(TimelineId(t["timeline_id"]) for t in timelines)
|
||||||
# virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id)
|
# virtual_ps_http.timeline_delete(tenant_id=timeline_crud_tenant, timeline_id=timeline_id)
|
||||||
timeline_delete_wait_completed(
|
timeline_delete_wait_completed(
|
||||||
env.attachment_service.pageserver_api(), timeline_crud_tenant, timeline_id
|
env.storage_controller.pageserver_api(), timeline_crud_tenant, timeline_id
|
||||||
)
|
)
|
||||||
timelines = env.attachment_service.pageserver_api().timeline_list(timeline_crud_tenant)
|
timelines = env.storage_controller.pageserver_api().timeline_list(timeline_crud_tenant)
|
||||||
assert len(timelines) == 1
|
assert len(timelines) == 1
|
||||||
assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines)
|
assert timeline_id not in set(TimelineId(t["timeline_id"]) for t in timelines)
|
||||||
|
|
||||||
# Marking a pageserver offline should migrate tenants away from it.
|
# Marking a pageserver offline should migrate tenants away from it.
|
||||||
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
||||||
|
|
||||||
def node_evacuated(node_id: int) -> None:
|
def node_evacuated(node_id: int) -> None:
|
||||||
counts = get_node_shard_counts(env, tenant_ids)
|
counts = get_node_shard_counts(env, tenant_ids)
|
||||||
@@ -124,7 +124,7 @@ def test_sharding_service_smoke(
|
|||||||
|
|
||||||
# Marking pageserver active should not migrate anything to it
|
# Marking pageserver active should not migrate anything to it
|
||||||
# immediately
|
# immediately
|
||||||
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Active"})
|
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Active"})
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0
|
assert get_node_shard_counts(env, tenant_ids)[env.pageservers[0].id] == 0
|
||||||
|
|
||||||
@@ -144,13 +144,13 @@ def test_sharding_service_smoke(
|
|||||||
|
|
||||||
# Delete all the tenants
|
# Delete all the tenants
|
||||||
for tid in tenant_ids:
|
for tid in tenant_ids:
|
||||||
tenant_delete_wait_completed(env.attachment_service.pageserver_api(), tid, 10)
|
tenant_delete_wait_completed(env.storage_controller.pageserver_api(), tid, 10)
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
# Set a scheduling policy on one node, create all the tenants, observe
|
# Set a scheduling policy on one node, create all the tenants, observe
|
||||||
# that the scheduling policy is respected.
|
# that the scheduling policy is respected.
|
||||||
env.attachment_service.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
|
env.storage_controller.node_configure(env.pageservers[1].id, {"scheduling": "Draining"})
|
||||||
|
|
||||||
# Create some fresh tenants
|
# Create some fresh tenants
|
||||||
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
|
tenant_ids = set(TenantId.generate() for i in range(0, tenant_count))
|
||||||
@@ -163,7 +163,7 @@ def test_sharding_service_smoke(
|
|||||||
assert counts[env.pageservers[0].id] == tenant_shard_count // 2
|
assert counts[env.pageservers[0].id] == tenant_shard_count // 2
|
||||||
assert counts[env.pageservers[2].id] == tenant_shard_count // 2
|
assert counts[env.pageservers[2].id] == tenant_shard_count // 2
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_node_status_after_restart(
|
def test_node_status_after_restart(
|
||||||
@@ -173,28 +173,28 @@ def test_node_status_after_restart(
|
|||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
# Initially we have two online pageservers
|
# Initially we have two online pageservers
|
||||||
nodes = env.attachment_service.node_list()
|
nodes = env.storage_controller.node_list()
|
||||||
assert len(nodes) == 2
|
assert len(nodes) == 2
|
||||||
|
|
||||||
env.pageservers[1].stop()
|
env.pageservers[1].stop()
|
||||||
|
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
def is_ready():
|
def is_ready():
|
||||||
assert env.attachment_service.ready() is True
|
assert env.storage_controller.ready() is True
|
||||||
|
|
||||||
wait_until(30, 1, is_ready)
|
wait_until(30, 1, is_ready)
|
||||||
|
|
||||||
# We loaded nodes from database on restart
|
# We loaded nodes from database on restart
|
||||||
nodes = env.attachment_service.node_list()
|
nodes = env.storage_controller.node_list()
|
||||||
assert len(nodes) == 2
|
assert len(nodes) == 2
|
||||||
|
|
||||||
# We should still be able to create a tenant, because the pageserver which is still online
|
# We should still be able to create a tenant, because the pageserver which is still online
|
||||||
# should have had its availabilty state set to Active.
|
# should have had its availabilty state set to Active.
|
||||||
env.attachment_service.tenant_create(TenantId.generate())
|
env.storage_controller.tenant_create(TenantId.generate())
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_passthrough(
|
def test_sharding_service_passthrough(
|
||||||
@@ -208,9 +208,9 @@ def test_sharding_service_passthrough(
|
|||||||
neon_env_builder.num_pageservers = 2
|
neon_env_builder.num_pageservers = 2
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
# We will talk to attachment service as if it was a pageserver, using the pageserver
|
# We will talk to storage controller as if it was a pageserver, using the pageserver
|
||||||
# HTTP client
|
# HTTP client
|
||||||
client = PageserverHttpClient(env.attachment_service_port, lambda: True)
|
client = PageserverHttpClient(env.storage_controller_port, lambda: True)
|
||||||
timelines = client.timeline_list(tenant_id=env.initial_tenant)
|
timelines = client.timeline_list(tenant_id=env.initial_tenant)
|
||||||
assert len(timelines) == 1
|
assert len(timelines) == 1
|
||||||
|
|
||||||
@@ -221,22 +221,22 @@ def test_sharding_service_passthrough(
|
|||||||
}
|
}
|
||||||
assert status["state"]["slug"] == "Active"
|
assert status["state"]["slug"] == "Active"
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
|
def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
tenant_a = env.initial_tenant
|
tenant_a = env.initial_tenant
|
||||||
tenant_b = TenantId.generate()
|
tenant_b = TenantId.generate()
|
||||||
env.attachment_service.tenant_create(tenant_b)
|
env.storage_controller.tenant_create(tenant_b)
|
||||||
env.pageserver.tenant_detach(tenant_a)
|
env.pageserver.tenant_detach(tenant_a)
|
||||||
|
|
||||||
# TODO: extend this test to use multiple pageservers, and check that locations don't move around
|
# TODO: extend this test to use multiple pageservers, and check that locations don't move around
|
||||||
# on restart.
|
# on restart.
|
||||||
|
|
||||||
# Attachment service restart
|
# Storage controller restart
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
|
observed = set(TenantId(tenant["id"]) for tenant in env.pageserver.http_client().tenant_list())
|
||||||
|
|
||||||
@@ -255,7 +255,7 @@ def test_sharding_service_restart(neon_env_builder: NeonEnvBuilder):
|
|||||||
assert tenant_a not in observed
|
assert tenant_a not in observed
|
||||||
assert tenant_b in observed
|
assert tenant_b in observed
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("warm_up", [True, False])
|
@pytest.mark.parametrize("warm_up", [True, False])
|
||||||
@@ -271,7 +271,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
# Start services by hand so that we can skip registration on one of the pageservers
|
# Start services by hand so that we can skip registration on one of the pageservers
|
||||||
env = neon_env_builder.init_configs()
|
env = neon_env_builder.init_configs()
|
||||||
env.broker.try_start()
|
env.broker.try_start()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
# This is the pageserver where we'll initially create the tenant. Run it in emergency
|
# This is the pageserver where we'll initially create the tenant. Run it in emergency
|
||||||
# mode so that it doesn't talk to storage controller, and do not register it.
|
# mode so that it doesn't talk to storage controller, and do not register it.
|
||||||
@@ -286,12 +286,12 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
# will be attached after onboarding
|
# will be attached after onboarding
|
||||||
env.pageservers[1].start(register=True)
|
env.pageservers[1].start(register=True)
|
||||||
dest_ps = env.pageservers[1]
|
dest_ps = env.pageservers[1]
|
||||||
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
|
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
|
||||||
|
|
||||||
for sk in env.safekeepers:
|
for sk in env.safekeepers:
|
||||||
sk.start()
|
sk.start()
|
||||||
|
|
||||||
# Create a tenant directly via pageserver HTTP API, skipping the attachment service
|
# Create a tenant directly via pageserver HTTP API, skipping the storage controller
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
generation = 123
|
generation = 123
|
||||||
origin_ps.http_client().tenant_create(tenant_id, generation=generation)
|
origin_ps.http_client().tenant_create(tenant_id, generation=generation)
|
||||||
@@ -324,7 +324,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
|
|
||||||
virtual_ps_http.tenant_secondary_download(tenant_id)
|
virtual_ps_http.tenant_secondary_download(tenant_id)
|
||||||
|
|
||||||
# Call into attachment service to onboard the tenant
|
# Call into storage controller to onboard the tenant
|
||||||
generation += 1
|
generation += 1
|
||||||
virtual_ps_http.tenant_location_conf(
|
virtual_ps_http.tenant_location_conf(
|
||||||
tenant_id,
|
tenant_id,
|
||||||
@@ -347,7 +347,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
# As if doing a live migration, call into the attachment service to
|
# As if doing a live migration, call into the storage controller to
|
||||||
# set it to AttachedSingle: this is a no-op, but we test it because the
|
# set it to AttachedSingle: this is a no-op, but we test it because the
|
||||||
# cloud control plane may call this for symmetry with live migration to
|
# cloud control plane may call this for symmetry with live migration to
|
||||||
# an individual pageserver
|
# an individual pageserver
|
||||||
@@ -375,8 +375,8 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
assert dest_tenants[0]["generation"] == generation + 1
|
assert dest_tenants[0]["generation"] == generation + 1
|
||||||
|
|
||||||
# The onboarded tenant should survive a restart of sharding service
|
# The onboarded tenant should survive a restart of sharding service
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
# The onboarded tenant should surviev a restart of pageserver
|
# The onboarded tenant should surviev a restart of pageserver
|
||||||
dest_ps.stop()
|
dest_ps.stop()
|
||||||
@@ -407,7 +407,7 @@ def test_sharding_service_onboarding(neon_env_builder: NeonEnvBuilder, warm_up:
|
|||||||
dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
|
dest_tenant_conf_after = dest_ps.http_client().tenant_config(tenant_id)
|
||||||
assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf
|
assert dest_tenant_conf_after.tenant_specific_overrides == modified_tenant_conf
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_compute_hook(
|
def test_sharding_service_compute_hook(
|
||||||
@@ -419,7 +419,7 @@ def test_sharding_service_compute_hook(
|
|||||||
Test that the sharding service calls out to the configured HTTP endpoint on attachment changes
|
Test that the sharding service calls out to the configured HTTP endpoint on attachment changes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# We will run two pageserver to migrate and check that the attachment service sends notifications
|
# We will run two pageserver to migrate and check that the storage controller sends notifications
|
||||||
# when migrating.
|
# when migrating.
|
||||||
neon_env_builder.num_pageservers = 2
|
neon_env_builder.num_pageservers = 2
|
||||||
(host, port) = httpserver_listen_address
|
(host, port) = httpserver_listen_address
|
||||||
@@ -450,7 +450,7 @@ def test_sharding_service_compute_hook(
|
|||||||
}
|
}
|
||||||
assert notifications[0] == expect
|
assert notifications[0] == expect
|
||||||
|
|
||||||
env.attachment_service.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
env.storage_controller.node_configure(env.pageservers[0].id, {"availability": "Offline"})
|
||||||
|
|
||||||
def node_evacuated(node_id: int) -> None:
|
def node_evacuated(node_id: int) -> None:
|
||||||
counts = get_node_shard_counts(env, [env.initial_tenant])
|
counts = get_node_shard_counts(env, [env.initial_tenant])
|
||||||
@@ -473,8 +473,8 @@ def test_sharding_service_compute_hook(
|
|||||||
wait_until(20, 0.25, received_migration_notification)
|
wait_until(20, 0.25, received_migration_notification)
|
||||||
|
|
||||||
# When we restart, we should re-emit notifications for all tenants
|
# When we restart, we should re-emit notifications for all tenants
|
||||||
env.attachment_service.stop()
|
env.storage_controller.stop()
|
||||||
env.attachment_service.start()
|
env.storage_controller.start()
|
||||||
|
|
||||||
def received_restart_notification():
|
def received_restart_notification():
|
||||||
assert len(notifications) == 3
|
assert len(notifications) == 3
|
||||||
@@ -483,7 +483,7 @@ def test_sharding_service_compute_hook(
|
|||||||
wait_until(10, 1, received_restart_notification)
|
wait_until(10, 1, received_restart_notification)
|
||||||
|
|
||||||
# Splitting a tenant should cause its stripe size to become visible in the compute notification
|
# Splitting a tenant should cause its stripe size to become visible in the compute notification
|
||||||
env.attachment_service.tenant_shard_split(env.initial_tenant, shard_count=2)
|
env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=2)
|
||||||
expect = {
|
expect = {
|
||||||
"tenant_id": str(env.initial_tenant),
|
"tenant_id": str(env.initial_tenant),
|
||||||
"stripe_size": 32768,
|
"stripe_size": 32768,
|
||||||
@@ -499,7 +499,7 @@ def test_sharding_service_compute_hook(
|
|||||||
|
|
||||||
wait_until(10, 1, received_split_notification)
|
wait_until(10, 1, received_split_notification)
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
|
def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
|
||||||
@@ -512,55 +512,55 @@ def test_sharding_service_debug_apis(neon_env_builder: NeonEnvBuilder):
|
|||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
env.attachment_service.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192)
|
env.storage_controller.tenant_create(tenant_id, shard_count=2, shard_stripe_size=8192)
|
||||||
|
|
||||||
# Check that the consistency check passes on a freshly setup system
|
# Check that the consistency check passes on a freshly setup system
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
# These APIs are intentionally not implemented as methods on NeonAttachmentService, as
|
# These APIs are intentionally not implemented as methods on NeonStorageController, as
|
||||||
# they're just for use in unanticipated circumstances.
|
# they're just for use in unanticipated circumstances.
|
||||||
|
|
||||||
# Initial tenant (1 shard) and the one we just created (2 shards) should be visible
|
# Initial tenant (1 shard) and the one we just created (2 shards) should be visible
|
||||||
response = env.attachment_service.request(
|
response = env.storage_controller.request(
|
||||||
"GET",
|
"GET",
|
||||||
f"{env.attachment_service_api}/debug/v1/tenant",
|
f"{env.storage_controller_api}/debug/v1/tenant",
|
||||||
headers=env.attachment_service.headers(TokenScope.ADMIN),
|
headers=env.storage_controller.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
assert len(response.json()) == 3
|
assert len(response.json()) == 3
|
||||||
|
|
||||||
# Scheduler should report the expected nodes and shard counts
|
# Scheduler should report the expected nodes and shard counts
|
||||||
response = env.attachment_service.request(
|
response = env.storage_controller.request(
|
||||||
"GET", f"{env.attachment_service_api}/debug/v1/scheduler"
|
"GET", f"{env.storage_controller_api}/debug/v1/scheduler"
|
||||||
)
|
)
|
||||||
# Two nodes, in a dict of node_id->node
|
# Two nodes, in a dict of node_id->node
|
||||||
assert len(response.json()["nodes"]) == 2
|
assert len(response.json()["nodes"]) == 2
|
||||||
assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
|
assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
|
||||||
assert all(v["may_schedule"] for v in response.json()["nodes"].values())
|
assert all(v["may_schedule"] for v in response.json()["nodes"].values())
|
||||||
|
|
||||||
response = env.attachment_service.request(
|
response = env.storage_controller.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{env.attachment_service_api}/debug/v1/node/{env.pageservers[1].id}/drop",
|
f"{env.storage_controller_api}/debug/v1/node/{env.pageservers[1].id}/drop",
|
||||||
headers=env.attachment_service.headers(TokenScope.ADMIN),
|
headers=env.storage_controller.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
assert len(env.attachment_service.node_list()) == 1
|
assert len(env.storage_controller.node_list()) == 1
|
||||||
|
|
||||||
response = env.attachment_service.request(
|
response = env.storage_controller.request(
|
||||||
"POST",
|
"POST",
|
||||||
f"{env.attachment_service_api}/debug/v1/tenant/{tenant_id}/drop",
|
f"{env.storage_controller_api}/debug/v1/tenant/{tenant_id}/drop",
|
||||||
headers=env.attachment_service.headers(TokenScope.ADMIN),
|
headers=env.storage_controller.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tenant drop should be reflected in dump output
|
# Tenant drop should be reflected in dump output
|
||||||
response = env.attachment_service.request(
|
response = env.storage_controller.request(
|
||||||
"GET",
|
"GET",
|
||||||
f"{env.attachment_service_api}/debug/v1/tenant",
|
f"{env.storage_controller_api}/debug/v1/tenant",
|
||||||
headers=env.attachment_service.headers(TokenScope.ADMIN),
|
headers=env.storage_controller.headers(TokenScope.ADMIN),
|
||||||
)
|
)
|
||||||
assert len(response.json()) == 1
|
assert len(response.json()) == 1
|
||||||
|
|
||||||
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
|
# Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
|
||||||
# meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
|
# meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_s3_time_travel_recovery(
|
def test_sharding_service_s3_time_travel_recovery(
|
||||||
@@ -584,10 +584,10 @@ def test_sharding_service_s3_time_travel_recovery(
|
|||||||
neon_env_builder.num_pageservers = 1
|
neon_env_builder.num_pageservers = 1
|
||||||
|
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
virtual_ps_http = PageserverHttpClient(env.attachment_service_port, lambda: True)
|
virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
|
||||||
|
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
env.attachment_service.tenant_create(
|
env.storage_controller.tenant_create(
|
||||||
tenant_id,
|
tenant_id,
|
||||||
shard_count=2,
|
shard_count=2,
|
||||||
shard_stripe_size=8192,
|
shard_stripe_size=8192,
|
||||||
@@ -595,7 +595,7 @@ def test_sharding_service_s3_time_travel_recovery(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Check that the consistency check passes
|
# Check that the consistency check passes
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
branch_name = "main"
|
branch_name = "main"
|
||||||
timeline_id = env.neon_cli.create_timeline(
|
timeline_id = env.neon_cli.create_timeline(
|
||||||
@@ -670,28 +670,28 @@ def test_sharding_service_s3_time_travel_recovery(
|
|||||||
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
|
||||||
endpoint.safe_psql("SELECT * FROM created_foo;")
|
endpoint.safe_psql("SELECT * FROM created_foo;")
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|
||||||
|
|
||||||
def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
|
def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
|
||||||
neon_env_builder.auth_enabled = True
|
neon_env_builder.auth_enabled = True
|
||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
svc = env.attachment_service
|
svc = env.storage_controller
|
||||||
api = env.attachment_service_api
|
api = env.storage_controller_api
|
||||||
|
|
||||||
tenant_id = TenantId.generate()
|
tenant_id = TenantId.generate()
|
||||||
body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}
|
body: Dict[str, Any] = {"new_tenant_id": str(tenant_id)}
|
||||||
|
|
||||||
# No token
|
# No token
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Unauthorized: missing authorization header",
|
match="Unauthorized: missing authorization header",
|
||||||
):
|
):
|
||||||
svc.request("POST", f"{env.attachment_service_api}/v1/tenant", json=body)
|
svc.request("POST", f"{env.storage_controller_api}/v1/tenant", json=body)
|
||||||
|
|
||||||
# Token with incorrect scope
|
# Token with incorrect scope
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Forbidden: JWT authentication error",
|
match="Forbidden: JWT authentication error",
|
||||||
):
|
):
|
||||||
svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
|
svc.request("POST", f"{api}/v1/tenant", json=body, headers=svc.headers(TokenScope.ADMIN))
|
||||||
@@ -703,14 +703,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
|
|||||||
|
|
||||||
# No token
|
# No token
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Unauthorized: missing authorization header",
|
match="Unauthorized: missing authorization header",
|
||||||
):
|
):
|
||||||
svc.request("GET", f"{api}/debug/v1/tenant")
|
svc.request("GET", f"{api}/debug/v1/tenant")
|
||||||
|
|
||||||
# Token with incorrect scope
|
# Token with incorrect scope
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Forbidden: JWT authentication error",
|
match="Forbidden: JWT authentication error",
|
||||||
):
|
):
|
||||||
svc.request(
|
svc.request(
|
||||||
@@ -719,14 +719,14 @@ def test_sharding_service_auth(neon_env_builder: NeonEnvBuilder):
|
|||||||
|
|
||||||
# No token
|
# No token
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Unauthorized: missing authorization header",
|
match="Unauthorized: missing authorization header",
|
||||||
):
|
):
|
||||||
svc.request("POST", f"{api}/upcall/v1/re-attach")
|
svc.request("POST", f"{api}/upcall/v1/re-attach")
|
||||||
|
|
||||||
# Token with incorrect scope
|
# Token with incorrect scope
|
||||||
with pytest.raises(
|
with pytest.raises(
|
||||||
AttachmentServiceApiException,
|
StorageControllerApiException,
|
||||||
match="Forbidden: JWT authentication error",
|
match="Forbidden: JWT authentication error",
|
||||||
):
|
):
|
||||||
svc.request(
|
svc.request(
|
||||||
@@ -743,7 +743,7 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
|
|||||||
env = neon_env_builder.init_start()
|
env = neon_env_builder.init_start()
|
||||||
tenant_id = env.initial_tenant
|
tenant_id = env.initial_tenant
|
||||||
|
|
||||||
http = env.attachment_service.pageserver_api()
|
http = env.storage_controller.pageserver_api()
|
||||||
|
|
||||||
default_value = "7days"
|
default_value = "7days"
|
||||||
new_value = "1h"
|
new_value = "1h"
|
||||||
@@ -769,4 +769,4 @@ def test_sharding_service_tenant_conf(neon_env_builder: NeonEnvBuilder):
|
|||||||
assert readback_ps.effective_config["pitr_interval"] == default_value
|
assert readback_ps.effective_config["pitr_interval"] == default_value
|
||||||
assert "pitr_interval" not in readback_ps.tenant_specific_overrides
|
assert "pitr_interval" not in readback_ps.tenant_specific_overrides
|
||||||
|
|
||||||
env.attachment_service.consistency_check()
|
env.storage_controller.consistency_check()
|
||||||
|
|||||||
@@ -1011,7 +1011,7 @@ def test_eager_attach_does_not_queue_up(neon_env_builder: NeonEnvBuilder):
|
|||||||
resp = client.tenant_status(eager_tenant)
|
resp = client.tenant_status(eager_tenant)
|
||||||
assert resp["state"]["slug"] == "Active"
|
assert resp["state"]["slug"] == "Active"
|
||||||
|
|
||||||
gen = env.attachment_service.attach_hook_issue(eager_tenant, env.pageserver.id)
|
gen = env.storage_controller.attach_hook_issue(eager_tenant, env.pageserver.id)
|
||||||
client.tenant_location_conf(
|
client.tenant_location_conf(
|
||||||
eager_tenant,
|
eager_tenant,
|
||||||
{
|
{
|
||||||
@@ -1071,7 +1071,7 @@ def test_lazy_attach_activation(neon_env_builder: NeonEnvBuilder, activation_met
|
|||||||
# attach, it will consume the only permit because logical size calculation
|
# attach, it will consume the only permit because logical size calculation
|
||||||
# is paused.
|
# is paused.
|
||||||
|
|
||||||
gen = env.attachment_service.attach_hook_issue(lazy_tenant, env.pageserver.id)
|
gen = env.storage_controller.attach_hook_issue(lazy_tenant, env.pageserver.id)
|
||||||
client.tenant_location_conf(
|
client.tenant_location_conf(
|
||||||
lazy_tenant,
|
lazy_tenant,
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user