storcon_cli: add 'drop' and eviction interval utilities (#7938)

The storage controller has 'drop' APIs for tenants and nodes, for use in
situations where something weird has happened:
- node-drop is useful until we implement proper node decom, or if we
have a partially provisioned node that somehow gets registered with the
storage controller but is then dead.
- tenant-drop is useful if we accidentally add a tenant that shouldn't
be there at all, or if we want to make the controller forget about a
tenant without deleting its data. For example, if one uses the
tenant-warmup command with a bad tenant ID and needs to clean that up.

The drop commands require an `--unsafe` parameter, to reduce the chance
that someone incorrectly assumes these are the normal/clean ways to
delete things.

This PR also adds a convenience command for setting the time based
eviction parameters on a tenant. This is useful when onboarding an
existing tenant that has high resident size due to storage amplification
in compaction: setting a lower time based eviction threshold brings down
the resident size ahead of doing a shard split.
This commit is contained in:
John Spray
2024-06-03 19:13:01 +01:00
committed by GitHub
parent 7006caf3a1
commit 69026a9a36
3 changed files with 67 additions and 2 deletions

1
Cargo.lock generated
View File

@@ -5820,6 +5820,7 @@ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"comfy-table", "comfy-table",
"humantime",
"hyper 0.14.26", "hyper 0.14.26",
"pageserver_api", "pageserver_api",
"pageserver_client", "pageserver_client",

View File

@@ -9,6 +9,7 @@ license.workspace = true
anyhow.workspace = true anyhow.workspace = true
clap.workspace = true clap.workspace = true
comfy-table.workspace = true comfy-table.workspace = true
humantime.workspace = true
hyper.workspace = true hyper.workspace = true
pageserver_api.workspace = true pageserver_api.workspace = true
pageserver_client.workspace = true pageserver_client.workspace = true

View File

@@ -7,8 +7,9 @@ use pageserver_api::{
TenantDescribeResponse, TenantPolicyRequest, TenantDescribeResponse, TenantPolicyRequest,
}, },
models::{ models::{
LocationConfigSecondary, ShardParameters, TenantConfig, TenantConfigRequest, EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse, ShardParameters, TenantConfig, TenantConfigRequest, TenantCreateRequest,
TenantShardSplitRequest, TenantShardSplitResponse,
}, },
shard::{ShardStripeSize, TenantShardId}, shard::{ShardStripeSize, TenantShardId},
}; };
@@ -125,6 +126,28 @@ enum Command {
#[arg(long)] #[arg(long)]
tenant_id: TenantId, tenant_id: TenantId,
}, },
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
TenantDrop {
#[arg(long)]
tenant_id: TenantId,
#[arg(long)]
unclean: bool,
},
NodeDrop {
#[arg(long)]
node_id: NodeId,
#[arg(long)]
unclean: bool,
},
TenantSetTimeBasedEviction {
#[arg(long)]
tenant_id: TenantId,
#[arg(long)]
period: humantime::Duration,
#[arg(long)]
threshold: humantime::Duration,
},
} }
#[derive(Parser)] #[derive(Parser)]
@@ -674,6 +697,46 @@ async fn main() -> anyhow::Result<()> {
} }
} }
} }
Command::TenantDrop { tenant_id, unclean } => {
if !unclean {
anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed.")
}
storcon_client
.dispatch::<(), ()>(
Method::POST,
format!("debug/v1/tenant/{tenant_id}/drop"),
None,
)
.await?;
}
Command::NodeDrop { node_id, unclean } => {
if !unclean {
anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed.")
}
storcon_client
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
.await?;
}
Command::TenantSetTimeBasedEviction {
tenant_id,
period,
threshold,
} => {
vps_client
.tenant_config(&TenantConfigRequest {
tenant_id,
config: TenantConfig {
eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(
EvictionPolicyLayerAccessThreshold {
period: period.into(),
threshold: threshold.into(),
},
)),
..Default::default()
},
})
.await?;
}
} }
Ok(()) Ok(())