diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 4d580e57ed..9eb6546d6b 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -27,8 +27,8 @@ use utils::pid_file; use metrics::set_build_info_metric; use safekeeper::defaults::{ - DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, - DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, + DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT, + DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, }; use safekeeper::http; @@ -194,6 +194,12 @@ struct Args { /// Number of allowed concurrent uploads of partial segments to remote storage. #[arg(long, default_value = DEFAULT_PARTIAL_BACKUP_CONCURRENCY)] partial_backup_concurrency: usize, + /// How long a timeline must be resident before it is eligible for eviction. + /// Usually, timeline eviction has to wait for `partial_backup_timeout` before being eligible for eviction, + /// but if a timeline is un-evicted and then _not_ written to, it would immediately flap to evicting again, + /// if it weren't for `eviction_min_resident` preventing that. + #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)] + eviction_min_resident: Duration, } // Like PathBufValueParser, but allows empty string. @@ -348,6 +354,7 @@ async fn main() -> anyhow::Result<()> { delete_offloaded_wal: args.delete_offloaded_wal, control_file_save_interval: args.control_file_save_interval, partial_backup_concurrency: args.partial_backup_concurrency, + eviction_min_resident: args.eviction_min_resident, }; // initialize sentry if SENTRY_DSN is provided diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index 5cd676d857..af83feb77f 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -53,6 +53,11 @@ pub mod defaults { pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m"; pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s"; pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5"; + + // By default, our required residency before eviction is the same as the period that passes + // before uploading a partial segment, so that in normal operation the eviction can happen + // as soon as we have done the partial segment upload. + pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT; } #[derive(Debug, Clone)] @@ -93,6 +98,7 @@ pub struct SafeKeeperConf { pub delete_offloaded_wal: bool, pub control_file_save_interval: Duration, pub partial_backup_concurrency: usize, + pub eviction_min_resident: Duration, } impl SafeKeeperConf { @@ -136,6 +142,7 @@ impl SafeKeeperConf { delete_offloaded_wal: false, control_file_save_interval: Duration::from_secs(1), partial_backup_concurrency: 1, + eviction_min_resident: Duration::ZERO, } } } diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index b303d41b7b..e4ab65290d 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -5,6 +5,7 @@ use anyhow::Context; use camino::Utf8PathBuf; use remote_storage::RemotePath; +use std::time::Instant; use tokio::{ fs::File, io::{AsyncRead, AsyncWriteExt}, @@ -48,6 +49,7 @@ impl Manager { .flush_lsn .segment_number(self.wal_seg_size) == self.last_removed_segno + 1 + && self.resident_since.elapsed() >= self.conf.eviction_min_resident } /// Evict the timeline to remote storage. @@ -91,6 +93,8 @@ impl Manager { return; } + self.resident_since = Instant::now(); + info!("successfully restored evicted timeline"); } } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index 62142162de..debf8c824f 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -186,6 +186,10 @@ pub(crate) struct Manager { // misc pub(crate) access_service: AccessService, pub(crate) partial_backup_rate_limiter: RateLimiter, + + // Anti-flapping state: we evict timelines eagerly if they are inactive, but should not + // evict them if they go inactive very soon after being restored. + pub(crate) resident_since: std::time::Instant, } /// This task gets spawned alongside each timeline and is responsible for managing the timeline's @@ -350,6 +354,7 @@ impl Manager { access_service: AccessService::new(manager_tx), tli, partial_backup_rate_limiter, + resident_since: std::time::Instant::now(), } } diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 6bbf96d71d..0c6d97ddfa 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -188,6 +188,7 @@ pub fn run_server(os: NodeOs, disk: Arc) -> Result<()> { delete_offloaded_wal: false, control_file_save_interval: Duration::from_secs(1), partial_backup_concurrency: 1, + eviction_min_resident: Duration::ZERO, }; let mut global = GlobalMap::new(disk, conf.clone())?; diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index cae2e422c1..5ca31644a9 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -492,6 +492,7 @@ class NeonEnvBuilder: pageserver_virtual_file_io_engine: Optional[str] = None, pageserver_aux_file_policy: Optional[AuxFileStore] = None, pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]] = None, + safekeeper_extra_opts: Optional[list[str]] = None, ): self.repo_dir = repo_dir self.rust_log_override = rust_log_override @@ -557,6 +558,8 @@ class NeonEnvBuilder: self.pageserver_aux_file_policy = pageserver_aux_file_policy + self.safekeeper_extra_opts = safekeeper_extra_opts + assert test_name.startswith( "test_" ), "Unexpectedly instantiated from outside a test function" @@ -1193,7 +1196,9 @@ class NeonEnv: sk_cfg[ "remote_storage" ] = self.safekeepers_remote_storage.to_toml_inline_table().strip() - self.safekeepers.append(Safekeeper(env=self, id=id, port=port)) + self.safekeepers.append( + Safekeeper(env=self, id=id, port=port, extra_opts=config.safekeeper_extra_opts) + ) cfg["safekeepers"].append(sk_cfg) log.info(f"Config: {cfg}") @@ -4016,16 +4021,28 @@ class Safekeeper(LogUtils): id: int running: bool = False - def __init__(self, env: NeonEnv, port: SafekeeperPort, id: int, running: bool = False): + def __init__( + self, + env: NeonEnv, + port: SafekeeperPort, + id: int, + running: bool = False, + extra_opts: Optional[List[str]] = None, + ): self.env = env self.port = port self.id = id self.running = running self.logfile = Path(self.data_dir) / f"safekeeper-{id}.log" + self.extra_opts = extra_opts def start( self, extra_opts: Optional[List[str]] = None, timeout_in_seconds: Optional[int] = None ) -> "Safekeeper": + if extra_opts is None: + # Apply either the extra_opts passed in, or the ones from our constructor: we do not merge the two. + extra_opts = self.extra_opts + assert self.running is False self.env.neon_cli.safekeeper_start( self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index febfc10293..7efd86e349 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -2191,24 +2191,25 @@ def test_s3_eviction( ): neon_env_builder.num_safekeepers = 3 neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS) - env = neon_env_builder.init_start( - initial_tenant_conf={ - "checkpoint_timeout": "100ms", - } - ) - extra_opts = [ + neon_env_builder.safekeeper_extra_opts = [ "--enable-offload", "--partial-backup-timeout", "50ms", "--control-file-save-interval", "1s", + # Safekeepers usually wait a while before evicting something: for this test we want them to + # evict things as soon as they are inactive. + "--eviction-min-resident=100ms", ] if delete_offloaded_wal: - extra_opts.append("--delete-offloaded-wal") + neon_env_builder.safekeeper_extra_opts.append("--delete-offloaded-wal") - for sk in env.safekeepers: - sk.stop().start(extra_opts=extra_opts) + env = neon_env_builder.init_start( + initial_tenant_conf={ + "checkpoint_timeout": "100ms", + } + ) n_timelines = 5 @@ -2263,7 +2264,7 @@ def test_s3_eviction( # restarting random safekeepers for sk in env.safekeepers: if random.random() < restart_chance: - sk.stop().start(extra_opts=extra_opts) + sk.stop().start() time.sleep(0.5) # require at least one successful eviction in at least one safekeeper