mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
feat(storage-controller): add node shards api (#8896)
For control-plane managed tenants, we have the page in the admin console that lists all tenants on a specific pageserver. But for storage-controller managed ones, we don't have that functionality for now. ## Summary of changes Adds an API that lists all shards on a given node (intention + observed) --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
|
||||
use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
|
||||
TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
@@ -80,7 +80,10 @@ enum Command {
|
||||
/// List nodes known to the storage controller
|
||||
Nodes {},
|
||||
/// List tenants known to the storage controller
|
||||
Tenants {},
|
||||
Tenants {
|
||||
/// If this field is set, it will list the tenants on a specific node
|
||||
node_id: Option<NodeId>,
|
||||
},
|
||||
/// Create a new tenant in the storage controller, and by extension on pageservers.
|
||||
TenantCreate {
|
||||
#[arg(long)]
|
||||
@@ -403,7 +406,41 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::Tenants {} => {
|
||||
Command::Tenants {
|
||||
node_id: Some(node_id),
|
||||
} => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), NodeShardResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/node/{node_id}/shards"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let shards = describe_response.shards;
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"Shard",
|
||||
"Intended Primary/Secondary",
|
||||
"Observed Primary/Secondary",
|
||||
]);
|
||||
for shard in shards {
|
||||
table.add_row([
|
||||
format!("{}", shard.tenant_shard_id),
|
||||
match shard.is_intended_secondary {
|
||||
None => "".to_string(),
|
||||
Some(true) => "Secondary".to_string(),
|
||||
Some(false) => "Primary".to_string(),
|
||||
},
|
||||
match shard.is_observed_secondary {
|
||||
None => "".to_string(),
|
||||
Some(true) => "Secondary".to_string(),
|
||||
Some(false) => "Primary".to_string(),
|
||||
},
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::Tenants { node_id: None } => {
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
|
||||
@@ -112,6 +112,21 @@ pub struct TenantDescribeResponse {
|
||||
pub config: TenantConfig,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct NodeShardResponse {
|
||||
pub node_id: NodeId,
|
||||
pub shards: Vec<NodeShard>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct NodeShard {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
/// Whether the shard is observed secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||
pub is_observed_secondary: Option<bool>,
|
||||
/// Whether the shard is intended to be a secondary on a specific node. True = yes, False = no, None = not on this node.
|
||||
pub is_intended_secondary: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct NodeDescribeResponse {
|
||||
pub id: NodeId,
|
||||
|
||||
@@ -539,6 +539,17 @@ async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiErr
|
||||
json_response(StatusCode::OK, node_status)
|
||||
}
|
||||
|
||||
async fn handle_node_shards(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
let state = get_state(&req);
|
||||
let node_id: NodeId = parse_request_param(&req, "node_id")?;
|
||||
|
||||
let node_status = state.service.get_node_shards(node_id).await?;
|
||||
|
||||
json_response(StatusCode::OK, node_status)
|
||||
}
|
||||
|
||||
async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
@@ -1109,6 +1120,13 @@ pub fn make_router(
|
||||
.get("/control/v1/node/:node_id", |r| {
|
||||
named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
|
||||
})
|
||||
.get("/control/v1/node/:node_id/shards", |r| {
|
||||
named_request_span(
|
||||
r,
|
||||
handle_node_shards,
|
||||
RequestName("control_v1_node_describe"),
|
||||
)
|
||||
})
|
||||
.get("/control/v1/leader", |r| {
|
||||
named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
|
||||
})
|
||||
|
||||
@@ -41,11 +41,11 @@ use itertools::Itertools;
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest,
|
||||
NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy, ShardsPreferredAzsRequest,
|
||||
ShardsPreferredAzsResponse, TenantCreateRequest, TenantCreateResponse,
|
||||
TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard,
|
||||
TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest,
|
||||
TenantShardMigrateResponse,
|
||||
NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, ShardSchedulingPolicy,
|
||||
ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, TenantCreateRequest,
|
||||
TenantCreateResponse, TenantCreateResponseShard, TenantDescribeResponse,
|
||||
TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
},
|
||||
models::{
|
||||
SecondaryProgress, TenantConfigRequest, TimelineArchivalConfigRequest,
|
||||
@@ -4924,6 +4924,45 @@ impl Service {
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) async fn get_node_shards(
|
||||
&self,
|
||||
node_id: NodeId,
|
||||
) -> Result<NodeShardResponse, ApiError> {
|
||||
let locked = self.inner.read().unwrap();
|
||||
let mut shards = Vec::new();
|
||||
for (tid, tenant) in locked.tenants.iter() {
|
||||
let is_intended_secondary = match (
|
||||
tenant.intent.get_attached() == &Some(node_id),
|
||||
tenant.intent.get_secondary().contains(&node_id),
|
||||
) {
|
||||
(true, true) => {
|
||||
return Err(ApiError::InternalServerError(anyhow::anyhow!(
|
||||
"{} attached as primary+secondary on the same node",
|
||||
tid
|
||||
)))
|
||||
}
|
||||
(true, false) => Some(false),
|
||||
(false, true) => Some(true),
|
||||
(false, false) => None,
|
||||
};
|
||||
let is_observed_secondary = if let Some(ObservedStateLocation { conf: Some(conf) }) =
|
||||
tenant.observed.locations.get(&node_id)
|
||||
{
|
||||
Some(conf.secondary_conf.is_some())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if is_intended_secondary.is_some() || is_observed_secondary.is_some() {
|
||||
shards.push(NodeShard {
|
||||
tenant_shard_id: *tid,
|
||||
is_intended_secondary,
|
||||
is_observed_secondary,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(NodeShardResponse { node_id, shards })
|
||||
}
|
||||
|
||||
pub(crate) async fn get_leader(&self) -> DatabaseResult<Option<ControllerPersistence>> {
|
||||
self.persistence.get_leader().await
|
||||
}
|
||||
|
||||
@@ -140,6 +140,14 @@ class TenantId(Id):
|
||||
return self.id.hex()
|
||||
|
||||
|
||||
class NodeId(Id):
|
||||
def __repr__(self) -> str:
|
||||
return f'`NodeId("{self.id.hex()}")'
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.id.hex()
|
||||
|
||||
|
||||
class TimelineId(Id):
|
||||
def __repr__(self) -> str:
|
||||
return f'TimelineId("{self.id.hex()}")'
|
||||
|
||||
@@ -62,7 +62,7 @@ from urllib3.util.retry import Retry
|
||||
|
||||
from fixtures import overlayfs
|
||||
from fixtures.broker import NeonBroker
|
||||
from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
|
||||
from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
|
||||
from fixtures.endpoint.http import EndpointHttpClient
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
@@ -2570,6 +2570,30 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def nodes(self):
|
||||
"""
|
||||
:return: list of {"id": ""}
|
||||
"""
|
||||
response = self.request(
|
||||
"GET",
|
||||
f"{self.api}/control/v1/node",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def node_shards(self, node_id: NodeId):
|
||||
"""
|
||||
:return: list of {"shard_id": "", "is_secondary": bool}
|
||||
"""
|
||||
response = self.request(
|
||||
"GET",
|
||||
f"{self.api}/control/v1/node/{node_id}/shards",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def tenant_shard_split(
|
||||
self, tenant_id: TenantId, shard_count: int, shard_stripe_size: Optional[int] = None
|
||||
) -> list[TenantShardId]:
|
||||
|
||||
@@ -1552,6 +1552,12 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
|
||||
literal_shard_count = 1 if shard_count is None else shard_count
|
||||
assert len(describe["shards"]) == literal_shard_count
|
||||
|
||||
nodes = env.storage_controller.nodes()
|
||||
assert len(nodes) == 2
|
||||
describe1 = env.storage_controller.node_shards(nodes[0]["id"])
|
||||
describe2 = env.storage_controller.node_shards(nodes[1]["id"])
|
||||
assert len(describe1["shards"]) + len(describe2["shards"]) == literal_shard_count
|
||||
|
||||
# Check the data is still there: this implicitly proves that we recovered generation numbers
|
||||
# properly, for the timeline which was written to after a generation bump.
|
||||
for timeline, branch, expect_rows in [
|
||||
|
||||
Reference in New Issue
Block a user