From 547acde6cd3f1c062e1bbee775c6778d9e1a26d2 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 10 Jul 2024 19:38:14 +0100 Subject: [PATCH] safekeeper: add eviction_min_resident to stop evictions thrashing (#8335) ## Problem - The condition for eviction is not time-based: it is possible for a timeline to be restored in response to a client, that client times out, and then as soon as the timeline is restored it is immediately evicted again. - There is no delay on eviction at startup of the safekeeper, so when it starts up and sees many idle timelines, it does many evictions which will likely be immediately restored when someone uses the timeline. ## Summary of changes - Add `eviction_min_resident` parameter, and use it in `ready_for_eviction` to avoid evictions if the timeline has been resident for less than this period. - This also implicitly delays evictions at startup for `eviction_min_resident` - Set this to a very low number for the existing eviction test, which expects immediate eviction. The default period is 15 minutes. The general reasoning for that is that in the worst case where we thrash ~10k timelines on one safekeeper, downloading 16MB for each one, we should set a period that would not overwhelm the node's bandwidth. --- safekeeper/src/bin/safekeeper.rs | 11 ++++++++-- safekeeper/src/lib.rs | 7 +++++++ safekeeper/src/timeline_eviction.rs | 4 ++++ safekeeper/src/timeline_manager.rs | 5 +++++ .../tests/walproposer_sim/safekeeper.rs | 1 + test_runner/fixtures/neon_fixtures.py | 21 +++++++++++++++++-- test_runner/regress/test_wal_acceptor.py | 21 ++++++++++--------- 7 files changed, 56 insertions(+), 14 deletions(-) diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 4d580e57ed..9eb6546d6b 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -27,8 +27,8 @@ use utils::pid_file; use metrics::set_build_info_metric; use safekeeper::defaults::{ - DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, - DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, + DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT, + DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, }; use safekeeper::http; @@ -194,6 +194,12 @@ struct Args { /// Number of allowed concurrent uploads of partial segments to remote storage. #[arg(long, default_value = DEFAULT_PARTIAL_BACKUP_CONCURRENCY)] partial_backup_concurrency: usize, + /// How long a timeline must be resident before it is eligible for eviction. + /// Usually, timeline eviction has to wait for `partial_backup_timeout` before being eligible for eviction, + /// but if a timeline is un-evicted and then _not_ written to, it would immediately flap to evicting again, + /// if it weren't for `eviction_min_resident` preventing that. + #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)] + eviction_min_resident: Duration, } // Like PathBufValueParser, but allows empty string. @@ -348,6 +354,7 @@ async fn main() -> anyhow::Result<()> { delete_offloaded_wal: args.delete_offloaded_wal, control_file_save_interval: args.control_file_save_interval, partial_backup_concurrency: args.partial_backup_concurrency, + eviction_min_resident: args.eviction_min_resident, }; // initialize sentry if SENTRY_DSN is provided diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index 5cd676d857..af83feb77f 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -53,6 +53,11 @@ pub mod defaults { pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m"; pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s"; pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5"; + + // By default, our required residency before eviction is the same as the period that passes + // before uploading a partial segment, so that in normal operation the eviction can happen + // as soon as we have done the partial segment upload. + pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT; } #[derive(Debug, Clone)] @@ -93,6 +98,7 @@ pub struct SafeKeeperConf { pub delete_offloaded_wal: bool, pub control_file_save_interval: Duration, pub partial_backup_concurrency: usize, + pub eviction_min_resident: Duration, } impl SafeKeeperConf { @@ -136,6 +142,7 @@ impl SafeKeeperConf { delete_offloaded_wal: false, control_file_save_interval: Duration::from_secs(1), partial_backup_concurrency: 1, + eviction_min_resident: Duration::ZERO, } } } diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index b303d41b7b..e4ab65290d 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -5,6 +5,7 @@ use anyhow::Context; use camino::Utf8PathBuf; use remote_storage::RemotePath; +use std::time::Instant; use tokio::{ fs::File, io::{AsyncRead, AsyncWriteExt}, @@ -48,6 +49,7 @@ impl Manager { .flush_lsn .segment_number(self.wal_seg_size) == self.last_removed_segno + 1 + && self.resident_since.elapsed() >= self.conf.eviction_min_resident } /// Evict the timeline to remote storage. @@ -91,6 +93,8 @@ impl Manager { return; } + self.resident_since = Instant::now(); + info!("successfully restored evicted timeline"); } } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index 62142162de..debf8c824f 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -186,6 +186,10 @@ pub(crate) struct Manager { // misc pub(crate) access_service: AccessService, pub(crate) partial_backup_rate_limiter: RateLimiter, + + // Anti-flapping state: we evict timelines eagerly if they are inactive, but should not + // evict them if they go inactive very soon after being restored. + pub(crate) resident_since: std::time::Instant, } /// This task gets spawned alongside each timeline and is responsible for managing the timeline's @@ -350,6 +354,7 @@ impl Manager { access_service: AccessService::new(manager_tx), tli, partial_backup_rate_limiter, + resident_since: std::time::Instant::now(), } } diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 6bbf96d71d..0c6d97ddfa 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -188,6 +188,7 @@ pub fn run_server(os: NodeOs, disk: Arc) -> Result<()> { delete_offloaded_wal: false, control_file_save_interval: Duration::from_secs(1), partial_backup_concurrency: 1, + eviction_min_resident: Duration::ZERO, }; let mut global = GlobalMap::new(disk, conf.clone())?; diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index cae2e422c1..5ca31644a9 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -492,6 +492,7 @@ class NeonEnvBuilder: pageserver_virtual_file_io_engine: Optional[str] = None, pageserver_aux_file_policy: Optional[AuxFileStore] = None, pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]] = None, + safekeeper_extra_opts: Optional[list[str]] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -557,6 +558,8 @@ class NeonEnvBuilder: self.pageserver_aux_file_policy = pageserver_aux_file_policy + self.safekeeper_extra_opts = safekeeper_extra_opts + assert test_name.startswith( "test_" ), "Unexpectedly instantiated from outside a test function" @@ -1193,7 +1196,9 @@ class NeonEnv: sk_cfg[ "remote_storage" ] = self.safekeepers_remote_storage.to_toml_inline_table().strip() - self.safekeepers.append(Safekeeper(env=self, id=id, port=port)) + self.safekeepers.append( + Safekeeper(env=self, id=id, port=port, extra_opts=config.safekeeper_extra_opts) + ) cfg["safekeepers"].append(sk_cfg) log.info(f"Config: {cfg}") @@ -4016,16 +4021,28 @@ class Safekeeper(LogUtils): id: int running: bool = False - def __init__(self, env: NeonEnv, port: SafekeeperPort, id: int, running: bool = False): + def __init__( + self, + env: NeonEnv, + port: SafekeeperPort, + id: int, + running: bool = False, + extra_opts: Optional[List[str]] = None, + ): self.env = env self.port = port self.id = id self.running = running self.logfile = Path(self.data_dir) / f"safekeeper-{id}.log" + self.extra_opts = extra_opts def start( self, extra_opts: Optional[List[str]] = None, timeout_in_seconds: Optional[int] = None ) -> "Safekeeper": + if extra_opts is None: + # Apply either the extra_opts passed in, or the ones from our constructor: we do not merge the two. + extra_opts = self.extra_opts + assert self.running is False self.env.neon_cli.safekeeper_start( self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index febfc10293..7efd86e349 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -2191,24 +2191,25 @@ def test_s3_eviction( ): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS) - env = neon_env_builder.init_start( - initial_tenant_conf={ - "checkpoint_timeout": "100ms", - } - ) - extra_opts = [ + neon_env_builder.safekeeper_extra_opts = [ "--enable-offload", "--partial-backup-timeout", "50ms", "--control-file-save-interval", "1s", + # Safekeepers usually wait a while before evicting something: for this test we want them to + # evict things as soon as they are inactive. + "--eviction-min-resident=100ms", ] if delete_offloaded_wal: - extra_opts.append("--delete-offloaded-wal") + neon_env_builder.safekeeper_extra_opts.append("--delete-offloaded-wal") - for sk in env.safekeepers: - sk.stop().start(extra_opts=extra_opts) + env = neon_env_builder.init_start( + initial_tenant_conf={ + "checkpoint_timeout": "100ms", + } + ) n_timelines = 5 @@ -2263,7 +2264,7 @@ def test_s3_eviction( # restarting random safekeepers for sk in env.safekeepers: if random.random() < restart_chance: - sk.stop().start(extra_opts=extra_opts) + sk.stop().start() time.sleep(0.5) # require at least one successful eviction in at least one safekeeper