pageserver: Introduce config to enable/disable eviction task (#12496)

## Problem
We lost capability to explicitly disable the global eviction task (for
testing).

## Summary of changes
Add an `enabled` flag to `DiskUsageEvictionTaskConfig` to indicate
whether we should run the eviction job or not.
This commit is contained in:
Trung Dinh
2025-07-08 14:14:04 -07:00
committed by GitHub
parent 09ff22a4d4
commit 4dee2bfd82
5 changed files with 103 additions and 32 deletions

View File

@@ -5,6 +5,7 @@ mod tests;
use const_format::formatcp;
use posthog_client_lite::PostHogClientConfig;
use utils::serde_percent::Percent;
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
@@ -223,7 +224,7 @@ pub struct ConfigToml {
pub metric_collection_bucket: Option<RemoteStorageConfig>,
#[serde(with = "humantime_serde")]
pub synthetic_size_calculation_interval: Duration,
pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
pub disk_usage_based_eviction: DiskUsageEvictionTaskConfig,
pub test_remote_failures: u64,
pub ondemand_download_behavior_treat_error_as_warn: bool,
#[serde(with = "humantime_serde")]
@@ -273,6 +274,7 @@ pub struct ConfigToml {
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(default)]
pub struct DiskUsageEvictionTaskConfig {
pub max_usage_pct: utils::serde_percent::Percent,
pub min_avail_bytes: u64,
@@ -283,6 +285,21 @@ pub struct DiskUsageEvictionTaskConfig {
/// Select sorting for evicted layers
#[serde(default)]
pub eviction_order: EvictionOrder,
pub enabled: bool,
}
impl Default for DiskUsageEvictionTaskConfig {
fn default() -> Self {
Self {
max_usage_pct: Percent::new(80).unwrap(),
min_avail_bytes: 2_000_000_000,
period: Duration::from_secs(60),
#[cfg(feature = "testing")]
mock_statvfs: None,
eviction_order: EvictionOrder::default(),
enabled: true,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -738,7 +755,7 @@ impl Default for ConfigToml {
metric_collection_bucket: (None),
disk_usage_based_eviction: (None),
disk_usage_based_eviction: DiskUsageEvictionTaskConfig::default(),
test_remote_failures: (0),

View File

@@ -28,7 +28,6 @@ use reqwest::Url;
use storage_broker::Uri;
use utils::id::{NodeId, TimelineId};
use utils::logging::{LogFormat, SecretString};
use utils::serde_percent::Percent;
use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
@@ -146,7 +145,7 @@ pub struct PageServerConf {
pub metric_collection_bucket: Option<RemoteStorageConfig>,
pub synthetic_size_calculation_interval: Duration,
pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
pub disk_usage_based_eviction: DiskUsageEvictionTaskConfig,
pub test_remote_failures: u64,
@@ -460,16 +459,7 @@ impl PageServerConf {
metric_collection_endpoint,
metric_collection_bucket,
synthetic_size_calculation_interval,
disk_usage_based_eviction: Some(disk_usage_based_eviction.unwrap_or(
DiskUsageEvictionTaskConfig {
max_usage_pct: Percent::new(80).unwrap(),
min_avail_bytes: 2_000_000_000,
period: Duration::from_secs(60),
#[cfg(feature = "testing")]
mock_statvfs: None,
eviction_order: Default::default(),
},
)),
disk_usage_based_eviction,
test_remote_failures,
ondemand_download_behavior_treat_error_as_warn,
background_task_maximum_delay,
@@ -719,8 +709,9 @@ mod tests {
use std::time::Duration;
use camino::Utf8PathBuf;
use pageserver_api::config::{DiskUsageEvictionTaskConfig, EvictionOrder};
use rstest::rstest;
use utils::id::NodeId;
use utils::{id::NodeId, serde_percent::Percent};
use super::PageServerConf;
@@ -820,19 +811,69 @@ mod tests {
.expect("parse_and_validate");
}
#[test]
fn test_config_disk_usage_based_eviction_is_valid() {
let input = r#"
#[rstest]
#[
case::omit_the_whole_config(
DiskUsageEvictionTaskConfig {
max_usage_pct: Percent::new(80).unwrap(),
min_avail_bytes: 2_000_000_000,
period: Duration::from_secs(60),
eviction_order: Default::default(),
#[cfg(feature = "testing")]
mock_statvfs: None,
enabled: true,
},
r#"
control_plane_api = "http://localhost:6666"
"#;
"#,
)]
#[
case::omit_enabled_field(
DiskUsageEvictionTaskConfig {
max_usage_pct: Percent::new(80).unwrap(),
min_avail_bytes: 1_000_000_000,
period: Duration::from_secs(60),
eviction_order: EvictionOrder::RelativeAccessed {
highest_layer_count_loses_first: true,
},
#[cfg(feature = "testing")]
mock_statvfs: None,
enabled: true,
},
r#"
control_plane_api = "http://localhost:6666"
disk_usage_based_eviction = { max_usage_pct = 80, min_avail_bytes = 1000000000, period = "60s" }
"#,
)]
#[case::disabled(
DiskUsageEvictionTaskConfig {
max_usage_pct: Percent::new(80).unwrap(),
min_avail_bytes: 2_000_000_000,
period: Duration::from_secs(60),
eviction_order: EvictionOrder::RelativeAccessed {
highest_layer_count_loses_first: true,
},
#[cfg(feature = "testing")]
mock_statvfs: None,
enabled: false,
},
r#"
control_plane_api = "http://localhost:6666"
disk_usage_based_eviction = { enabled = false }
"#
)]
fn test_config_disk_usage_based_eviction_is_valid(
#[case] expected_disk_usage_based_eviction: DiskUsageEvictionTaskConfig,
#[case] input: &str,
) {
let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
.expect("disk_usage_based_eviction is valid");
let workdir = Utf8PathBuf::from("/nonexistent");
let config = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir).unwrap();
let disk_usage_based_eviction = config.disk_usage_based_eviction.unwrap();
assert_eq!(disk_usage_based_eviction.max_usage_pct.get(), 80);
assert_eq!(disk_usage_based_eviction.min_avail_bytes, 2_000_000_000);
assert_eq!(disk_usage_based_eviction.period, Duration::from_secs(60));
assert_eq!(disk_usage_based_eviction.eviction_order, Default::default());
let disk_usage_based_eviction = config.disk_usage_based_eviction;
assert_eq!(
expected_disk_usage_based_eviction,
disk_usage_based_eviction
);
}
}

View File

@@ -171,7 +171,8 @@ pub fn launch_disk_usage_global_eviction_task(
tenant_manager: Arc<TenantManager>,
background_jobs_barrier: completion::Barrier,
) -> Option<DiskUsageEvictionTask> {
let Some(task_config) = &conf.disk_usage_based_eviction else {
let task_config = &conf.disk_usage_based_eviction;
if !task_config.enabled {
info!("disk usage based eviction task not configured");
return None;
};
@@ -1268,6 +1269,7 @@ mod filesystem_level_usage {
#[cfg(feature = "testing")]
mock_statvfs: None,
eviction_order: pageserver_api::config::EvictionOrder::default(),
enabled: true,
},
total_bytes: 100_000,
avail_bytes: 0,

View File

@@ -45,9 +45,10 @@ pub(crate) fn regenerate(
let (disk_wanted_bytes, shard_count) = tenant_manager.calculate_utilization()?;
// Fetch the fraction of disk space which may be used
let disk_usable_pct = match conf.disk_usage_based_eviction.clone() {
Some(e) => e.max_usage_pct,
None => Percent::new(100).unwrap(),
let disk_usable_pct = if conf.disk_usage_based_eviction.enabled {
conf.disk_usage_based_eviction.max_usage_pct
} else {
Percent::new(100).unwrap()
};
// Express a static value for how many shards we may schedule on one node

View File

@@ -71,7 +71,13 @@ def test_pageserver_characterize_latencies_with_1_client_and_throughput_with_man
n_clients: int,
):
setup_and_run_pagebench_benchmark(
neon_env_builder, zenbenchmark, pg_bin, n_tenants, pgbench_scale, duration, n_clients
neon_env_builder,
zenbenchmark,
pg_bin,
n_tenants,
pgbench_scale,
duration,
n_clients,
)
@@ -86,7 +92,8 @@ def setup_and_run_pagebench_benchmark(
):
def record(metric, **kwargs):
zenbenchmark.record(
metric_name=f"pageserver_max_throughput_getpage_at_latest_lsn.{metric}", **kwargs
metric_name=f"pageserver_max_throughput_getpage_at_latest_lsn.{metric}",
**kwargs,
)
params: dict[str, tuple[Any, dict[str, Any]]] = {}
@@ -104,7 +111,7 @@ def setup_and_run_pagebench_benchmark(
# configure cache sizes like in prod
page_cache_size = 16384
max_file_descriptors = 500000
neon_env_builder.pageserver_config_override = f"page_cache_size={page_cache_size}; max_file_descriptors={max_file_descriptors}; disk_usage_based_eviction={{max_usage_pct=99, min_avail_bytes=0, period = '999y'}}"
neon_env_builder.pageserver_config_override = f"page_cache_size={page_cache_size}; max_file_descriptors={max_file_descriptors}; disk_usage_based_eviction={{enabled = false}}"
tracing_config = PageserverTracingConfig(
sampling_ratio=(0, 1000),
@@ -120,7 +127,10 @@ def setup_and_run_pagebench_benchmark(
page_cache_size * 8192,
{"unit": "byte"},
),
"pageserver_config_override.max_file_descriptors": (max_file_descriptors, {"unit": ""}),
"pageserver_config_override.max_file_descriptors": (
max_file_descriptors,
{"unit": ""},
),
"pageserver_config_override.sampling_ratio": (ratio, {"unit": ""}),
}
)