mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
feat(storage-controller): add node shards api (#8896)
For control-plane managed tenants, we have the page in the admin console that lists all tenants on a specific pageserver. But for storage-controller managed ones, we don't have that functionality for now. ## Summary of changes Adds an API that lists all shards on a given node (intention + observed) --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
|
|||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use pageserver_api::{
|
use pageserver_api::{
|
||||||
controller_api::{
|
controller_api::{
|
||||||
NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
|
NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
|
||||||
TenantDescribeResponse, TenantPolicyRequest,
|
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||||
},
|
},
|
||||||
models::{
|
models::{
|
||||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||||
@@ -80,7 +80,10 @@ enum Command {
|
|||||||
/// List nodes known to the storage controller
|
/// List nodes known to the storage controller
|
||||||
Nodes {},
|
Nodes {},
|
||||||
/// List tenants known to the storage controller
|
/// List tenants known to the storage controller
|
||||||
Tenants {},
|
Tenants {
|
||||||
|
/// If this field is set, it will list the tenants on a specific node
|
||||||
|
node_id: Option<NodeId>,
|
||||||
|
},
|
||||||
/// Create a new tenant in the storage controller, and by extension on pageservers.
|
/// Create a new tenant in the storage controller, and by extension on pageservers.
|
||||||
TenantCreate {
|
TenantCreate {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
@@ -403,7 +406,41 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
Command::Tenants {} => {
|
Command::Tenants {
|
||||||
|
node_id: Some(node_id),
|
||||||
|
} => {
|
||||||
|
let describe_response = storcon_client
|
||||||
|
.dispatch::<(), NodeShardResponse>(
|
||||||
|
Method::GET,
|
||||||
|
format!("control/v1/node/{node_id}/shards"),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
let shards = describe_response.shards;
|
||||||
|
let mut table = comfy_table::Table::new();
|
||||||
|
table.set_header([
|
||||||
|
"Shard",
|
||||||
|
"Intended Primary/Secondary",
|
||||||
|
"Observed Primary/Secondary",
|
||||||
|
]);
|
||||||
|
for shard in shards {
|
||||||
|
table.add_row([
|
||||||
|
format!("{}", shard.tenant_shard_id),
|
||||||
|
match shard.is_intended_secondary {
|
||||||
|
None => "".to_string(),
|
||||||
|
Some(true) => "Secondary".to_string(),
|
||||||
|
Some(false) => "Primary".to_string(),
|
||||||
|
},
|
||||||
|
match shard.is_observed_secondary {
|
||||||
|
None => "".to_string(),
|
||||||
|
Some(true) => "Secondary".to_string(),
|
||||||
|
Some(false) => "Primary".to_string(),
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
println!("{table}");
|
||||||
|
}
|
||||||
|
Command::Tenants { node_id: None } => {
|
||||||
let mut resp = storcon_client
|
let mut resp = storcon_client
|
||||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||||
Method::GET,
|
Method::GET,
|
||||||
|
|||||||
@@ -112,6 +112,21 @@ pub struct TenantDescribeResponse {
|
|||||||
pub config: TenantConfig,
|
pub config: TenantConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct NodeShardResponse {
|
||||||
|
pub node_id: NodeId,
|
||||||
|
pub shards: Vec<NodeShard>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
pub struct NodeShard {
|
||||||
|
pub tenant_shard_id: TenantShardId,
|
||||||
|
/// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||||
|
pub is_observed_secondary: Option<bool>,
|
||||||
|
/// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||||
|
pub is_intended_secondary: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct NodeDescribeResponse {
|
pub struct NodeDescribeResponse {
|
||||||
pub id: NodeId,
|
pub id: NodeId,
|
||||||
|
|||||||
@@ -539,6 +539,17 @@ async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiErr
|
|||||||
json_response(StatusCode::OK, node_status)
|
json_response(StatusCode::OK, node_status)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_node_shards(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||||
|
check_permissions(&req, Scope::Admin)?;
|
||||||
|
|
||||||
|
let state = get_state(&req);
|
||||||
|
let node_id: NodeId = parse_request_param(&req, "node_id")?;
|
||||||
|
|
||||||
|
let node_status = state.service.get_node_shards(node_id).await?;
|
||||||
|
|
||||||
|
json_response(StatusCode::OK, node_status)
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||||
check_permissions(&req, Scope::Admin)?;
|
check_permissions(&req, Scope::Admin)?;
|
||||||
|
|
||||||
@@ -1109,6 +1120,13 @@ pub fn make_router(
|
|||||||
.get("/control/v1/node/:node_id", |r| {
|
.get("/control/v1/node/:node_id", |r| {
|
||||||
named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
|
named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
|
||||||
})
|
})
|
||||||
|
.get("/control/v1/node/:node_id/shards", |r| {
|
||||||
|
named_request_span(
|
||||||
|
r,
|
||||||
|
handle_node_shards,
|
||||||
|
RequestName("control_v1_node_describe"),
|
||||||
|
)
|
||||||
|
})
|
||||||
.get("/control/v1/leader", |r| {
|
.get("/control/v1/leader", |r| {
|
||||||
named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
|
named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -41,11 +41,11 @@ use itertools::Itertools;
|
|||||||
use pageserver_api::{
|
use pageserver_api::{
|
||||||
controller_api::{
|
controller_api::{
|
||||||
MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest,
|
MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest,
|
||||||
NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
|
NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, ShardSchedulingPolicy,
|
||||||
ShardsPreferredAzsResponse, TenantCreateRequest, TenantCreateResponse,
|
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, TenantCreateRequest,
|
||||||
TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
|
TenantCreateResponse, TenantCreateResponseShard, TenantDescribeResponse,
|
||||||
TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
|
TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest,
|
||||||
TenantShardMigrateResponse,
|
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||||
},
|
},
|
||||||
models::{
|
models::{
|
||||||
SecondaryProgress, TenantConfigRequest, TimelineArchivalConfigRequest,
|
SecondaryProgress, TenantConfigRequest, TimelineArchivalConfigRequest,
|
||||||
@@ -4924,6 +4924,45 @@ impl Service {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn get_node_shards(
|
||||||
|
&self,
|
||||||
|
node_id: NodeId,
|
||||||
|
) -> Result<NodeShardResponse, ApiError> {
|
||||||
|
let locked = self.inner.read().unwrap();
|
||||||
|
let mut shards = Vec::new();
|
||||||
|
for (tid, tenant) in locked.tenants.iter() {
|
||||||
|
let is_intended_secondary = match (
|
||||||
|
tenant.intent.get_attached() == &Some(node_id),
|
||||||
|
tenant.intent.get_secondary().contains(&node_id),
|
||||||
|
) {
|
||||||
|
(true, true) => {
|
||||||
|
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||||
|
"{} attached as primary+secondary on the same node",
|
||||||
|
tid
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
(true, false) => Some(false),
|
||||||
|
(false, true) => Some(true),
|
||||||
|
(false, false) => None,
|
||||||
|
};
|
||||||
|
let is_observed_secondary = if let Some(ObservedStateLocation { conf: Some(conf) }) =
|
||||||
|
tenant.observed.locations.get(&node_id)
|
||||||
|
{
|
||||||
|
Some(conf.secondary_conf.is_some())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
if is_intended_secondary.is_some() || is_observed_secondary.is_some() {
|
||||||
|
shards.push(NodeShard {
|
||||||
|
tenant_shard_id: *tid,
|
||||||
|
is_intended_secondary,
|
||||||
|
is_observed_secondary,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(NodeShardResponse { node_id, shards })
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn get_leader(&self) -> DatabaseResult<Option<ControllerPersistence>> {
|
pub(crate) async fn get_leader(&self) -> DatabaseResult<Option<ControllerPersistence>> {
|
||||||
self.persistence.get_leader().await
|
self.persistence.get_leader().await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,6 +140,14 @@ class TenantId(Id):
|
|||||||
return self.id.hex()
|
return self.id.hex()
|
||||||
|
|
||||||
|
|
||||||
|
class NodeId(Id):
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return f'`NodeId("{self.id.hex()}")'
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.id.hex()
|
||||||
|
|
||||||
|
|
||||||
class TimelineId(Id):
|
class TimelineId(Id):
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f'TimelineId("{self.id.hex()}")'
|
return f'TimelineId("{self.id.hex()}")'
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ from urllib3.util.retry import Retry
|
|||||||
|
|
||||||
from fixtures import overlayfs
|
from fixtures import overlayfs
|
||||||
from fixtures.broker import NeonBroker
|
from fixtures.broker import NeonBroker
|
||||||
from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
|
from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
|
||||||
from fixtures.endpoint.http import EndpointHttpClient
|
from fixtures.endpoint.http import EndpointHttpClient
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||||
@@ -2570,6 +2570,30 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
|
def nodes(self):
|
||||||
|
"""
|
||||||
|
:return: list of {"id": ""}
|
||||||
|
"""
|
||||||
|
response = self.request(
|
||||||
|
"GET",
|
||||||
|
f"{self.api}/control/v1/node",
|
||||||
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def node_shards(self, node_id: NodeId):
|
||||||
|
"""
|
||||||
|
:return: list of {"shard_id": "", "is_secondary": bool}
|
||||||
|
"""
|
||||||
|
response = self.request(
|
||||||
|
"GET",
|
||||||
|
f"{self.api}/control/v1/node/{node_id}/shards",
|
||||||
|
headers=self.headers(TokenScope.ADMIN),
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
def tenant_shard_split(
|
def tenant_shard_split(
|
||||||
self, tenant_id: TenantId, shard_count: int, shard_stripe_size: Optional[int] = None
|
self, tenant_id: TenantId, shard_count: int, shard_stripe_size: Optional[int] = None
|
||||||
) -> list[TenantShardId]:
|
) -> list[TenantShardId]:
|
||||||
|
|||||||
@@ -1552,6 +1552,12 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
|
|||||||
literal_shard_count = 1 if shard_count is None else shard_count
|
literal_shard_count = 1 if shard_count is None else shard_count
|
||||||
assert len(describe["shards"]) == literal_shard_count
|
assert len(describe["shards"]) == literal_shard_count
|
||||||
|
|
||||||
|
nodes = env.storage_controller.nodes()
|
||||||
|
assert len(nodes) == 2
|
||||||
|
describe1 = env.storage_controller.node_shards(nodes[0]["id"])
|
||||||
|
describe2 = env.storage_controller.node_shards(nodes[1]["id"])
|
||||||
|
assert len(describe1["shards"]) + len(describe2["shards"]) == literal_shard_count
|
||||||
|
|
||||||
# Check the data is still there: this implicitly proves that we recovered generation numbers
|
# Check the data is still there: this implicitly proves that we recovered generation numbers
|
||||||
# properly, for the timeline which was written to after a generation bump.
|
# properly, for the timeline which was written to after a generation bump.
|
||||||
for timeline, branch, expect_rows in [
|
for timeline, branch, expect_rows in [
|
||||||
|
|||||||
Reference in New Issue
Block a user