Files
neon/pageserver/src/utilization.rs
John Spray 60feb168e2 pageserver: decrease MAX_SHARDS in utilization (#12668)
## Problem

When tenants have a lot of timelines, the number of tenants that a
pageserver can comfortably handle goes down. Branching is much more
widely used in practice now than it was when this code was written, and
we generally run pageservers with a few thousand tenants (where each
tenant has many timelines), rather than the 10k-20k we might have done
historically.

This should really be something configurable, or a more direct proxy for
resource utilization (such as non-archived timeline count), but this
change should be a low effort improvement.

## Summary of changes

* Change the target shard count (MAX_SHARDS) to 2500 from 5000 when
calculating pageserver utilization (i.e. a 200% overcommit now
corresponds to 5000 shards, not 10000 shards)

Co-authored-by: John Spray <john.spray@databricks.com>
2025-07-28 13:50:18 +00:00

74 lines
2.3 KiB
Rust

//! An utilization metric which is used to decide on which pageserver to put next tenant.
//!
//! The metric is exposed via `GET /v1/utilization`. Refer and maintain its openapi spec as the
//! truth.
use std::path::Path;
use anyhow::Context;
use pageserver_api::models::PageserverUtilization;
use utils::serde_percent::Percent;
use crate::config::PageServerConf;
use crate::metrics::NODE_UTILIZATION_SCORE;
use crate::tenant::mgr::TenantManager;
pub(crate) fn regenerate(
conf: &PageServerConf,
tenants_path: &Path,
tenant_manager: &TenantManager,
) -> anyhow::Result<PageserverUtilization> {
let statvfs = nix::sys::statvfs::statvfs(tenants_path)
.map_err(std::io::Error::from)
.context("statvfs tenants directory")?;
// https://unix.stackexchange.com/a/703650
let blocksz = if statvfs.fragment_size() > 0 {
statvfs.fragment_size()
} else {
statvfs.block_size()
};
#[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
let free = statvfs.blocks_available() as u64 * blocksz;
#[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
let used = statvfs
.blocks()
// use blocks_free instead of available here to match df in case someone compares
.saturating_sub(statvfs.blocks_free()) as u64
* blocksz;
let captured_at = std::time::SystemTime::now();
// Calculate aggregate utilization from tenants on this pageserver
let (disk_wanted_bytes, shard_count) = tenant_manager.calculate_utilization()?;
// Fetch the fraction of disk space which may be used
let disk_usable_pct = if conf.disk_usage_based_eviction.enabled {
conf.disk_usage_based_eviction.max_usage_pct
} else {
Percent::new(100).unwrap()
};
// Express a static value for how many shards we may schedule on one node
const MAX_SHARDS: u32 = 2500;
let mut doc = PageserverUtilization {
disk_usage_bytes: used,
free_space_bytes: free,
disk_wanted_bytes,
disk_usable_pct,
shard_count,
max_shard_count: MAX_SHARDS,
utilization_score: None,
captured_at: utils::serde_system_time::SystemTime(captured_at),
};
// Initialize `PageserverUtilization::utilization_score`
let score = doc.cached_score();
NODE_UTILIZATION_SCORE.set(score);
Ok(doc)
}