mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-13 08:22:55 +00:00
The binary etc were renamed some time ago, but the path in the source tree remained "attachment_service" to avoid disruption to ongoing PRs. There aren't any big PRs out right now, so it's a good time to cut over. - Rename `attachment_service` to `storage_controller` - Move it to the top level for symmetry with `storage_broker` & to avoid mixing the non-prod neon_local stuff (`control_plane/`) with the storage controller which is a production component.
281 lines
8.5 KiB
Rust
281 lines
8.5 KiB
Rust
use std::str::FromStr;
|
|
|
|
/// Request/response types for the storage controller
|
|
/// API (`/control/v1` prefix). Implemented by the server
|
|
/// in [`storage_controller::http`]
|
|
use serde::{Deserialize, Serialize};
|
|
use utils::id::{NodeId, TenantId};
|
|
|
|
use crate::{
|
|
models::{ShardParameters, TenantConfig},
|
|
shard::{ShardStripeSize, TenantShardId},
|
|
};
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantCreateResponseShard {
|
|
pub shard_id: TenantShardId,
|
|
pub node_id: NodeId,
|
|
pub generation: u32,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantCreateResponse {
|
|
pub shards: Vec<TenantCreateResponseShard>,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct NodeRegisterRequest {
|
|
pub node_id: NodeId,
|
|
|
|
pub listen_pg_addr: String,
|
|
pub listen_pg_port: u16,
|
|
|
|
pub listen_http_addr: String,
|
|
pub listen_http_port: u16,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct NodeConfigureRequest {
|
|
pub node_id: NodeId,
|
|
|
|
pub availability: Option<NodeAvailabilityWrapper>,
|
|
pub scheduling: Option<NodeSchedulingPolicy>,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantPolicyRequest {
|
|
pub placement: Option<PlacementPolicy>,
|
|
pub scheduling: Option<ShardSchedulingPolicy>,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct TenantLocateResponseShard {
|
|
pub shard_id: TenantShardId,
|
|
pub node_id: NodeId,
|
|
|
|
pub listen_pg_addr: String,
|
|
pub listen_pg_port: u16,
|
|
|
|
pub listen_http_addr: String,
|
|
pub listen_http_port: u16,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantLocateResponse {
|
|
pub shards: Vec<TenantLocateResponseShard>,
|
|
pub shard_params: ShardParameters,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantDescribeResponse {
|
|
pub tenant_id: TenantId,
|
|
pub shards: Vec<TenantDescribeResponseShard>,
|
|
pub stripe_size: ShardStripeSize,
|
|
pub policy: PlacementPolicy,
|
|
pub config: TenantConfig,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct NodeDescribeResponse {
|
|
pub id: NodeId,
|
|
|
|
pub availability: NodeAvailabilityWrapper,
|
|
pub scheduling: NodeSchedulingPolicy,
|
|
|
|
pub listen_http_addr: String,
|
|
pub listen_http_port: u16,
|
|
|
|
pub listen_pg_addr: String,
|
|
pub listen_pg_port: u16,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct TenantDescribeResponseShard {
|
|
pub tenant_shard_id: TenantShardId,
|
|
|
|
pub node_attached: Option<NodeId>,
|
|
pub node_secondary: Vec<NodeId>,
|
|
|
|
pub last_error: String,
|
|
|
|
/// A task is currently running to reconcile this tenant's intent state with the state on pageservers
|
|
pub is_reconciling: bool,
|
|
/// This shard failed in sending a compute notification to the cloud control plane, and a retry is pending.
|
|
pub is_pending_compute_notification: bool,
|
|
/// A shard split is currently underway
|
|
pub is_splitting: bool,
|
|
|
|
pub scheduling_policy: ShardSchedulingPolicy,
|
|
}
|
|
|
|
/// Explicitly migrating a particular shard is a low level operation
|
|
/// TODO: higher level "Reschedule tenant" operation where the request
|
|
/// specifies some constraints, e.g. asking it to get off particular node(s)
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct TenantShardMigrateRequest {
|
|
pub tenant_shard_id: TenantShardId,
|
|
pub node_id: NodeId,
|
|
}
|
|
|
|
/// Utilisation score indicating how good a candidate a pageserver
|
|
/// is for scheduling the next tenant. See [`crate::models::PageserverUtilization`].
|
|
/// Lower values are better.
|
|
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Debug)]
|
|
pub struct UtilizationScore(pub u64);
|
|
|
|
impl UtilizationScore {
|
|
pub fn worst() -> Self {
|
|
UtilizationScore(u64::MAX)
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
|
#[serde(into = "NodeAvailabilityWrapper")]
|
|
pub enum NodeAvailability {
|
|
// Normal, happy state
|
|
Active(UtilizationScore),
|
|
// Offline: Tenants shouldn't try to attach here, but they may assume that their
|
|
// secondary locations on this node still exist. Newly added nodes are in this
|
|
// state until we successfully contact them.
|
|
Offline,
|
|
}
|
|
|
|
impl PartialEq for NodeAvailability {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
use NodeAvailability::*;
|
|
matches!((self, other), (Active(_), Active(_)) | (Offline, Offline))
|
|
}
|
|
}
|
|
|
|
impl Eq for NodeAvailability {}
|
|
|
|
// This wrapper provides serde functionality and it should only be used to
|
|
// communicate with external callers which don't know or care about the
|
|
// utilisation score of the pageserver it is targeting.
|
|
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
|
pub enum NodeAvailabilityWrapper {
|
|
Active,
|
|
Offline,
|
|
}
|
|
|
|
impl From<NodeAvailabilityWrapper> for NodeAvailability {
|
|
fn from(val: NodeAvailabilityWrapper) -> Self {
|
|
match val {
|
|
// Assume the worst utilisation score to begin with. It will later be updated by
|
|
// the heartbeats.
|
|
NodeAvailabilityWrapper::Active => NodeAvailability::Active(UtilizationScore::worst()),
|
|
NodeAvailabilityWrapper::Offline => NodeAvailability::Offline,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<NodeAvailability> for NodeAvailabilityWrapper {
|
|
fn from(val: NodeAvailability) -> Self {
|
|
match val {
|
|
NodeAvailability::Active(_) => NodeAvailabilityWrapper::Active,
|
|
NodeAvailability::Offline => NodeAvailabilityWrapper::Offline,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
|
pub enum ShardSchedulingPolicy {
|
|
// Normal mode: the tenant's scheduled locations may be updated at will, including
|
|
// for non-essential optimization.
|
|
Active,
|
|
|
|
// Disable optimizations, but permit scheduling when necessary to fulfil the PlacementPolicy.
|
|
// For example, this still permits a node's attachment location to change to a secondary in
|
|
// response to a node failure, or to assign a new secondary if a node was removed.
|
|
Essential,
|
|
|
|
// No scheduling: leave the shard running wherever it currently is. Even if the shard is
|
|
// unavailable, it will not be rescheduled to another node.
|
|
Pause,
|
|
|
|
// No reconciling: we will make no location_conf API calls to pageservers at all. If the
|
|
// shard is unavailable, it stays that way. If a node fails, this shard doesn't get failed over.
|
|
Stop,
|
|
}
|
|
|
|
impl Default for ShardSchedulingPolicy {
|
|
fn default() -> Self {
|
|
Self::Active
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
|
pub enum NodeSchedulingPolicy {
|
|
Active,
|
|
Filling,
|
|
Pause,
|
|
Draining,
|
|
}
|
|
|
|
impl FromStr for NodeSchedulingPolicy {
|
|
type Err = anyhow::Error;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
match s {
|
|
"active" => Ok(Self::Active),
|
|
"filling" => Ok(Self::Filling),
|
|
"pause" => Ok(Self::Pause),
|
|
"draining" => Ok(Self::Draining),
|
|
_ => Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<NodeSchedulingPolicy> for String {
|
|
fn from(value: NodeSchedulingPolicy) -> String {
|
|
use NodeSchedulingPolicy::*;
|
|
match value {
|
|
Active => "active",
|
|
Filling => "filling",
|
|
Pause => "pause",
|
|
Draining => "draining",
|
|
}
|
|
.to_string()
|
|
}
|
|
}
|
|
|
|
/// Controls how tenant shards are mapped to locations on pageservers, e.g. whether
|
|
/// to create secondary locations.
|
|
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
|
|
pub enum PlacementPolicy {
|
|
/// Normal live state: one attached pageserver and zero or more secondaries.
|
|
Attached(usize),
|
|
/// Create one secondary mode locations. This is useful when onboarding
|
|
/// a tenant, or for an idle tenant that we might want to bring online quickly.
|
|
Secondary,
|
|
|
|
/// Do not attach to any pageservers. This is appropriate for tenants that
|
|
/// have been idle for a long time, where we do not mind some delay in making
|
|
/// them available in future.
|
|
Detached,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct TenantShardMigrateResponse {}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use serde_json;
|
|
|
|
/// Check stability of PlacementPolicy's serialization
|
|
#[test]
|
|
fn placement_policy_encoding() -> anyhow::Result<()> {
|
|
let v = PlacementPolicy::Attached(1);
|
|
let encoded = serde_json::to_string(&v)?;
|
|
assert_eq!(encoded, "{\"Attached\":1}");
|
|
assert_eq!(serde_json::from_str::<PlacementPolicy>(&encoded)?, v);
|
|
|
|
let v = PlacementPolicy::Detached;
|
|
let encoded = serde_json::to_string(&v)?;
|
|
assert_eq!(encoded, "\"Detached\"");
|
|
assert_eq!(serde_json::from_str::<PlacementPolicy>(&encoded)?, v);
|
|
Ok(())
|
|
}
|
|
}
|