mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-21 07:00:38 +00:00
## Problem When tenants have a lot of timelines, the number of tenants that a pageserver can comfortably handle goes down. Branching is much more widely used in practice now than it was when this code was written, and we generally run pageservers with a few thousand tenants (where each tenant has many timelines), rather than the 10k-20k we might have done historically. This should really be something configurable, or a more direct proxy for resource utilization (such as non-archived timeline count), but this change should be a low effort improvement. ## Summary of changes * Change the target shard count (MAX_SHARDS) to 2500 from 5000 when calculating pageserver utilization (i.e. a 200% overcommit now corresponds to 5000 shards, not 10000 shards) Co-authored-by: John Spray <john.spray@databricks.com>
74 lines
2.3 KiB
Rust
74 lines
2.3 KiB
Rust
//! An utilization metric which is used to decide on which pageserver to put next tenant.
|
|
//!
|
|
//! The metric is exposed via `GET /v1/utilization`. Refer and maintain its openapi spec as the
|
|
//! truth.
|
|
|
|
use std::path::Path;
|
|
|
|
use anyhow::Context;
|
|
use pageserver_api::models::PageserverUtilization;
|
|
use utils::serde_percent::Percent;
|
|
|
|
use crate::config::PageServerConf;
|
|
use crate::metrics::NODE_UTILIZATION_SCORE;
|
|
use crate::tenant::mgr::TenantManager;
|
|
|
|
pub(crate) fn regenerate(
|
|
conf: &PageServerConf,
|
|
tenants_path: &Path,
|
|
tenant_manager: &TenantManager,
|
|
) -> anyhow::Result<PageserverUtilization> {
|
|
let statvfs = nix::sys::statvfs::statvfs(tenants_path)
|
|
.map_err(std::io::Error::from)
|
|
.context("statvfs tenants directory")?;
|
|
|
|
// https://unix.stackexchange.com/a/703650
|
|
let blocksz = if statvfs.fragment_size() > 0 {
|
|
statvfs.fragment_size()
|
|
} else {
|
|
statvfs.block_size()
|
|
};
|
|
|
|
#[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
|
|
let free = statvfs.blocks_available() as u64 * blocksz;
|
|
|
|
#[cfg_attr(not(target_os = "macos"), allow(clippy::unnecessary_cast))]
|
|
let used = statvfs
|
|
.blocks()
|
|
// use blocks_free instead of available here to match df in case someone compares
|
|
.saturating_sub(statvfs.blocks_free()) as u64
|
|
* blocksz;
|
|
|
|
let captured_at = std::time::SystemTime::now();
|
|
|
|
// Calculate aggregate utilization from tenants on this pageserver
|
|
let (disk_wanted_bytes, shard_count) = tenant_manager.calculate_utilization()?;
|
|
|
|
// Fetch the fraction of disk space which may be used
|
|
let disk_usable_pct = if conf.disk_usage_based_eviction.enabled {
|
|
conf.disk_usage_based_eviction.max_usage_pct
|
|
} else {
|
|
Percent::new(100).unwrap()
|
|
};
|
|
|
|
// Express a static value for how many shards we may schedule on one node
|
|
const MAX_SHARDS: u32 = 2500;
|
|
|
|
let mut doc = PageserverUtilization {
|
|
disk_usage_bytes: used,
|
|
free_space_bytes: free,
|
|
disk_wanted_bytes,
|
|
disk_usable_pct,
|
|
shard_count,
|
|
max_shard_count: MAX_SHARDS,
|
|
utilization_score: None,
|
|
captured_at: utils::serde_system_time::SystemTime(captured_at),
|
|
};
|
|
|
|
// Initialize `PageserverUtilization::utilization_score`
|
|
let score = doc.cached_score();
|
|
NODE_UTILIZATION_SCORE.set(score);
|
|
|
|
Ok(doc)
|
|
}
|