storcon: add per-tenant rate limiting for API requests (#10924)

## Problem

Incoming requests often take the service lock, and sometimes even do
database transactions. That creates a risk that a rogue client can
starve the controller of the ability to do its primary job of
reconciling tenants to an available state.

## Summary of changes

* Use the `governor` crate to rate limit tenant requests at 10 requests
per second. This is ~10-100x lower than the worst "attack" we've seen
from a client bug. Admin APIs are not rate limited.
* Add a `storage_controller_http_request_rate_limited` histogram for
rate limited requests.
* Log a warning every 10 seconds for rate limited tenants.

The rate limiter is parametrized on TenantId, because the kinds of
client bug we're protecting against generally happen within tenant
scope, and the rates should be somewhat stable: we expect the global
rate of requests to increase as we do more work, but we do not expect
the rate of requests to one tenant to increase.

---------

Co-authored-by: John Spray <john@neon.tech>
This commit is contained in:
Erik Grinaker
2025-03-03 23:04:59 +01:00
committed by GitHub
parent 6d0976dad5
commit 65addfc524
8 changed files with 156 additions and 9 deletions

77
Cargo.lock generated
View File

@@ -2410,9 +2410,9 @@ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
[[package]]
name = "futures-timer"
version = "3.0.2"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
[[package]]
name = "futures-util"
@@ -2515,6 +2515,27 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "governor"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "842dc78579ce01e6a1576ad896edc92fca002dd60c9c3746b7fc2bec6fb429d0"
dependencies = [
"cfg-if",
"dashmap 6.1.0",
"futures-sink",
"futures-timer",
"futures-util",
"no-std-compat",
"nonzero_ext",
"parking_lot 0.12.1",
"portable-atomic",
"quanta",
"rand 0.8.5",
"smallvec",
"spinning_top",
]
[[package]]
name = "group"
version = "0.12.1"
@@ -3725,6 +3746,12 @@ dependencies = [
"memoffset 0.9.0",
]
[[package]]
name = "no-std-compat"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
[[package]]
name = "nom"
version = "7.1.3"
@@ -3735,6 +3762,12 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "nonzero_ext"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
[[package]]
name = "notify"
version = "8.0.0"
@@ -4591,6 +4624,12 @@ dependencies = [
"never-say-never",
]
[[package]]
name = "portable-atomic"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
[[package]]
name = "postgres"
version = "0.19.7"
@@ -5052,6 +5091,21 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "quanta"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
dependencies = [
"crossbeam-utils",
"libc",
"once_cell",
"raw-cpuid",
"wasi 0.11.0+wasi-snapshot-preview1",
"web-sys",
"winapi",
]
[[package]]
name = "quick-xml"
version = "0.26.0"
@@ -5182,6 +5236,15 @@ dependencies = [
"num-traits",
]
[[package]]
name = "raw-cpuid"
version = "11.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e"
dependencies = [
"bitflags 2.8.0",
]
[[package]]
name = "rayon"
version = "1.7.0"
@@ -6395,6 +6458,15 @@ version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
[[package]]
name = "spinning_top"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300"
dependencies = [
"lock_api",
]
[[package]]
name = "spki"
version = "0.6.0"
@@ -6471,6 +6543,7 @@ dependencies = [
"diesel_migrations",
"fail",
"futures",
"governor",
"hex",
"http-utils",
"humantime",

View File

@@ -95,6 +95,7 @@ futures = "0.3"
futures-core = "0.3"
futures-util = "0.3"
git-version = "0.3"
governor = "0.8"
hashbrown = "0.14"
hashlink = "0.9.1"
hdrhistogram = "7.5.2"

View File

@@ -21,6 +21,7 @@ clap.workspace = true
cron.workspace = true
fail.workspace = true
futures.workspace = true
governor.workspace = true
hex.workspace = true
hyper0.workspace = true
humantime.workspace = true

View File

@@ -1,5 +1,5 @@
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, LazyLock};
use std::time::{Duration, Instant};
use anyhow::Context;
@@ -33,6 +33,7 @@ use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
use pageserver_client::{BlockUnblock, mgmt_api};
use routerify::Middleware;
use tokio_util::sync::CancellationToken;
use tracing::warn;
use utils::auth::{Scope, SwappableJwtAuth};
use utils::id::{NodeId, TenantId, TimelineId};
@@ -49,6 +50,7 @@ use crate::service::{LeadershipStatus, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIME
pub struct HttpState {
service: Arc<crate::service::Service>,
auth: Option<Arc<SwappableJwtAuth>>,
rate_limiter: governor::DefaultKeyedRateLimiter<TenantId>,
neon_metrics: NeonMetrics,
allowlist_routes: &'static [&'static str],
}
@@ -59,9 +61,11 @@ impl HttpState {
auth: Option<Arc<SwappableJwtAuth>>,
build_info: BuildInfo,
) -> Self {
let quota = governor::Quota::per_second(service.get_config().tenant_rate_limit);
Self {
service,
auth,
rate_limiter: governor::RateLimiter::keyed(quota),
neon_metrics: NeonMetrics::new(build_info),
allowlist_routes: &[
"/status",
@@ -82,6 +86,40 @@ fn get_state(request: &Request<Body>) -> &HttpState {
.as_ref()
}
/// Rate limits tenant requests.
///
/// TODO: this should be a request middleware, but requires us to extract the tenant ID from
/// different URLs in a systematic way.
///
/// TODO: consider returning a 429 response if these start piling up.
async fn maybe_rate_limit(request: &Request<Body>, tenant_id: TenantId) {
// Check if the tenant should be rate-limited.
let rate_limiter = &get_state(request).rate_limiter;
if rate_limiter.check_key(&tenant_id).is_ok() {
return;
}
// Measure the rate limiting delay.
let _timer = METRICS_REGISTRY
.metrics_group
.storage_controller_http_request_rate_limited
.start_timer();
// Log rate limited tenants once every 10 seconds.
static LOG_RATE_LIMITER: LazyLock<governor::DefaultKeyedRateLimiter<TenantId>> =
LazyLock::new(|| {
let quota = governor::Quota::with_period(Duration::from_secs(10)).unwrap();
governor::RateLimiter::keyed(quota)
});
if LOG_RATE_LIMITER.check_key(&tenant_id).is_ok() {
warn!("tenant {tenant_id} is rate limited")
}
// Wait for quota.
rate_limiter.until_key_ready(&tenant_id).await;
}
/// Pageserver calls into this on startup, to learn which tenants it should attach
async fn handle_re_attach(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::GenerationsApi)?;
@@ -247,6 +285,7 @@ async fn handle_tenant_config_get(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -264,6 +303,7 @@ async fn handle_tenant_time_travel_remote_storage(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -311,6 +351,7 @@ async fn handle_tenant_secondary_download(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);
maybe_rate_limit(&req, tenant_id).await;
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -329,6 +370,7 @@ async fn handle_tenant_delete(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -356,6 +398,7 @@ async fn handle_tenant_timeline_create(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -381,6 +424,7 @@ async fn handle_tenant_timeline_delete(
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -457,6 +501,7 @@ async fn handle_tenant_timeline_archival_config(
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -482,6 +527,7 @@ async fn handle_tenant_timeline_detach_ancestor(
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -504,6 +550,7 @@ async fn handle_tenant_timeline_block_unblock_gc(
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
@@ -521,6 +568,7 @@ async fn handle_tenant_timeline_download_heatmap_layers(
let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_shard_id.tenant_id).await;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
let concurrency: Option<usize> = parse_query_param(&req, "concurrency")?;
@@ -550,6 +598,7 @@ async fn handle_tenant_timeline_passthrough(
) -> Result<Response<Body>, ApiError> {
let tenant_or_shard_id: TenantShardId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_or_shard_id.tenant_id).await;
let req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -654,6 +703,7 @@ async fn handle_tenant_locate(
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -669,9 +719,9 @@ async fn handle_tenant_describe(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Scrubber)?;
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::Scrubber)?;
// NB: don't rate limit: scrubber operation.
match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1006,6 +1056,7 @@ async fn handle_tenant_shard_split(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1028,6 +1079,7 @@ async fn handle_tenant_shard_migrate(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1051,6 +1103,7 @@ async fn handle_tenant_shard_migrate_secondary(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1074,6 +1127,7 @@ async fn handle_tenant_shard_cancel_reconcile(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
let req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1093,6 +1147,7 @@ async fn handle_tenant_shard_cancel_reconcile(
async fn handle_tenant_update_policy(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
// NB: don't rate limit: admin operation.
let mut req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1148,9 +1203,9 @@ async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError
}
async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::PageServerApi)?;
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {
@@ -1165,9 +1220,9 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
}
async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::PageServerApi)?;
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
maybe_rate_limit(&req, tenant_id).await;
let req = match maybe_forward(req).await {
ForwardOutcome::Forwarded(res) => {

View File

@@ -1,3 +1,4 @@
use std::num::NonZeroU32;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
@@ -98,6 +99,10 @@ struct Cli {
#[arg(long)]
priority_reconciler_concurrency: Option<usize>,
/// Tenant API rate limit, as requests per second per tenant.
#[arg(long, default_value = "10")]
tenant_rate_limit: NonZeroU32,
/// How long to wait for the initial database connection to be available.
#[arg(long, default_value = "5s")]
db_connect_timeout: humantime::Duration,
@@ -339,6 +344,7 @@ async fn async_main() -> anyhow::Result<()> {
priority_reconciler_concurrency: args
.priority_reconciler_concurrency
.unwrap_or(PRIORITY_RECONCILER_CONCURRENCY_DEFAULT),
tenant_rate_limit: args.tenant_rate_limit,
split_threshold: args.split_threshold,
neon_local_repo_dir: args.neon_local_repo_dir,
max_secondary_lag_bytes: args.max_secondary_lag_bytes,

View File

@@ -76,6 +76,10 @@ pub(crate) struct StorageControllerMetricGroup {
pub(crate) storage_controller_http_request_latency:
measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
/// HTTP rate limiting latency across all tenants and endpoints
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 10.0))]
pub(crate) storage_controller_http_request_rate_limited: measured::Histogram<10>,
/// Count of HTTP requests to the pageserver that resulted in an error,
/// broken down by the pageserver node id, request name and method
pub(crate) storage_controller_pageserver_request_error:

View File

@@ -5,6 +5,7 @@ use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::error::Error;
use std::num::NonZeroU32;
use std::ops::Deref;
use std::path::PathBuf;
use std::str::FromStr;
@@ -365,6 +366,10 @@ pub struct Config {
/// How many high-priority Reconcilers may be spawned concurrently
pub priority_reconciler_concurrency: usize,
/// How many API requests per second to allow per tenant, across all
/// tenant-scoped API endpoints. Further API requests queue until ready.
pub tenant_rate_limit: NonZeroU32,
/// How large must a shard grow in bytes before we split it?
/// None disables auto-splitting.
pub split_threshold: Option<u64>,

View File

@@ -124,6 +124,8 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [
# controller's attempts to notify the endpoint).
".*reconciler.*neon_local notification hook failed.*",
".*reconciler.*neon_local error.*",
# Tenant rate limits may fire in tests that submit lots of API requests.
".*tenant \\S+ is rate limited.*",
]