mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
storage controller: test for large shard counts (#7475)
## Problem Storage controller was observed to have unexpectedly large memory consumption when loaded with many thousands of shards. This was recently fixed: - https://github.com/neondatabase/neon/pull/7493 ...but we need a general test that the controller is well behaved with thousands of shards. Closes: https://github.com/neondatabase/neon/issues/7460 Closes: https://github.com/neondatabase/neon/issues/7463 ## Summary of changes - Add test test_storage_controller_many_tenants to exercise the system's behaviour with a more substantial workload. This test measures memory consumption and reproduces #7460 before the other changes in this PR. - Tweak reconcile_all's return value to make it nonzero if it spawns no reconcilers, but _would_ have spawned some reconcilers if they weren't blocked by the reconcile concurrency limit. This makes the test's reconcile_until_idle behave as expected (i.e. not complete until the system is nice and calm). - Fix an issue where tenant migrations would leave a spurious secondary location when migrated to some location that was not already their secondary (this was an existing low-impact bug that tripped up the test's consistency checks). On the test with 8000 shards, the resident memory per shard is about 20KiB. This is not really per-shard memory: the primary source of memory growth is the number of concurrent network/db clients we create. With 8000 shards, the test takes 125s to run on my workstation.
This commit is contained in:
@@ -90,7 +90,11 @@ const INITIAL_GENERATION: Generation = Generation::new(0);
|
||||
/// up on unresponsive pageservers and proceed.
|
||||
pub(crate) const STARTUP_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
pub const MAX_UNAVAILABLE_INTERVAL_DEFAULT: Duration = Duration::from_secs(30);
|
||||
/// How long a node may be unresponsive to heartbeats before we declare it offline.
|
||||
/// This must be long enough to cover node restarts as well as normal operations: in future
|
||||
/// it should be separated into distinct timeouts for startup vs. normal operation
|
||||
/// (`<https://github.com/neondatabase/neon/issues/7552>`)
|
||||
pub const MAX_UNAVAILABLE_INTERVAL_DEFAULT: Duration = Duration::from_secs(300);
|
||||
|
||||
pub const RECONCILER_CONCURRENCY_DEFAULT: usize = 128;
|
||||
|
||||
@@ -4251,7 +4255,9 @@ impl Service {
|
||||
/// Check all tenants for pending reconciliation work, and reconcile those in need.
|
||||
/// Additionally, reschedule tenants that require it.
|
||||
///
|
||||
/// Returns how many reconciliation tasks were started
|
||||
/// Returns how many reconciliation tasks were started, or `1` if no reconciles were
|
||||
/// spawned but some _would_ have been spawned if `reconciler_concurrency` units where
|
||||
/// available. A return value of 0 indicates that everything is fully reconciled already.
|
||||
fn reconcile_all(&self) -> usize {
|
||||
let mut locked = self.inner.write().unwrap();
|
||||
let (nodes, tenants, _scheduler) = locked.parts_mut();
|
||||
@@ -4266,7 +4272,11 @@ impl Service {
|
||||
}
|
||||
|
||||
// Skip checking if this shard is already enqueued for reconciliation
|
||||
if shard.delayed_reconcile {
|
||||
if shard.delayed_reconcile && self.reconciler_concurrency.available_permits() == 0 {
|
||||
// If there is something delayed, then return a nonzero count so that
|
||||
// callers like reconcile_all_now do not incorrectly get the impression
|
||||
// that the system is in a quiescent state.
|
||||
reconciles_spawned = std::cmp::max(1, reconciles_spawned);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -4451,7 +4461,7 @@ impl Service {
|
||||
waiter_count
|
||||
);
|
||||
|
||||
Ok(waiter_count)
|
||||
Ok(std::cmp::max(waiter_count, reconciles_spawned))
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) {
|
||||
|
||||
@@ -952,8 +952,8 @@ impl TenantShard {
|
||||
|
||||
/// Create a waiter that will wait for some future Reconciler that hasn't been spawned yet.
|
||||
///
|
||||
/// This is appropriate when you can't spawn a recociler (e.g. due to resource limits), but
|
||||
/// you would like to wait until one gets spawned in the background.
|
||||
/// This is appropriate when you can't spawn a reconciler (e.g. due to resource limits), but
|
||||
/// you would like to wait on the next reconciler that gets spawned in the background.
|
||||
pub(crate) fn future_reconcile_waiter(&mut self) -> ReconcilerWaiter {
|
||||
self.ensure_sequence_ahead();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user