mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-03 13:30:38 +00:00
safekeeper: add eviction_min_resident to stop evictions thrashing (#8335)
## Problem - The condition for eviction is not time-based: it is possible for a timeline to be restored in response to a client, that client times out, and then as soon as the timeline is restored it is immediately evicted again. - There is no delay on eviction at startup of the safekeeper, so when it starts up and sees many idle timelines, it does many evictions which will likely be immediately restored when someone uses the timeline. ## Summary of changes - Add `eviction_min_resident` parameter, and use it in `ready_for_eviction` to avoid evictions if the timeline has been resident for less than this period. - This also implicitly delays evictions at startup for `eviction_min_resident` - Set this to a very low number for the existing eviction test, which expects immediate eviction. The default period is 15 minutes. The general reasoning for that is that in the worst case where we thrash ~10k timelines on one safekeeper, downloading 16MB for each one, we should set a period that would not overwhelm the node's bandwidth.
This commit is contained in:
@@ -27,8 +27,8 @@ use utils::pid_file;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
use safekeeper::defaults::{
|
||||
DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR,
|
||||
DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
|
||||
DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT,
|
||||
DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
|
||||
DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,
|
||||
};
|
||||
use safekeeper::http;
|
||||
@@ -194,6 +194,12 @@ struct Args {
|
||||
/// Number of allowed concurrent uploads of partial segments to remote storage.
|
||||
#[arg(long, default_value = DEFAULT_PARTIAL_BACKUP_CONCURRENCY)]
|
||||
partial_backup_concurrency: usize,
|
||||
/// How long a timeline must be resident before it is eligible for eviction.
|
||||
/// Usually, timeline eviction has to wait for `partial_backup_timeout` before being eligible for eviction,
|
||||
/// but if a timeline is un-evicted and then _not_ written to, it would immediately flap to evicting again,
|
||||
/// if it weren't for `eviction_min_resident` preventing that.
|
||||
#[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)]
|
||||
eviction_min_resident: Duration,
|
||||
}
|
||||
|
||||
// Like PathBufValueParser, but allows empty string.
|
||||
@@ -348,6 +354,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
delete_offloaded_wal: args.delete_offloaded_wal,
|
||||
control_file_save_interval: args.control_file_save_interval,
|
||||
partial_backup_concurrency: args.partial_backup_concurrency,
|
||||
eviction_min_resident: args.eviction_min_resident,
|
||||
};
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
|
||||
@@ -53,6 +53,11 @@ pub mod defaults {
|
||||
pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m";
|
||||
pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s";
|
||||
pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5";
|
||||
|
||||
// By default, our required residency before eviction is the same as the period that passes
|
||||
// before uploading a partial segment, so that in normal operation the eviction can happen
|
||||
// as soon as we have done the partial segment upload.
|
||||
pub const DEFAULT_EVICTION_MIN_RESIDENT: &str = DEFAULT_PARTIAL_BACKUP_TIMEOUT;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -93,6 +98,7 @@ pub struct SafeKeeperConf {
|
||||
pub delete_offloaded_wal: bool,
|
||||
pub control_file_save_interval: Duration,
|
||||
pub partial_backup_concurrency: usize,
|
||||
pub eviction_min_resident: Duration,
|
||||
}
|
||||
|
||||
impl SafeKeeperConf {
|
||||
@@ -136,6 +142,7 @@ impl SafeKeeperConf {
|
||||
delete_offloaded_wal: false,
|
||||
control_file_save_interval: Duration::from_secs(1),
|
||||
partial_backup_concurrency: 1,
|
||||
eviction_min_resident: Duration::ZERO,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use remote_storage::RemotePath;
|
||||
use std::time::Instant;
|
||||
use tokio::{
|
||||
fs::File,
|
||||
io::{AsyncRead, AsyncWriteExt},
|
||||
@@ -48,6 +49,7 @@ impl Manager {
|
||||
.flush_lsn
|
||||
.segment_number(self.wal_seg_size)
|
||||
== self.last_removed_segno + 1
|
||||
&& self.resident_since.elapsed() >= self.conf.eviction_min_resident
|
||||
}
|
||||
|
||||
/// Evict the timeline to remote storage.
|
||||
@@ -91,6 +93,8 @@ impl Manager {
|
||||
return;
|
||||
}
|
||||
|
||||
self.resident_since = Instant::now();
|
||||
|
||||
info!("successfully restored evicted timeline");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -186,6 +186,10 @@ pub(crate) struct Manager {
|
||||
// misc
|
||||
pub(crate) access_service: AccessService,
|
||||
pub(crate) partial_backup_rate_limiter: RateLimiter,
|
||||
|
||||
// Anti-flapping state: we evict timelines eagerly if they are inactive, but should not
|
||||
// evict them if they go inactive very soon after being restored.
|
||||
pub(crate) resident_since: std::time::Instant,
|
||||
}
|
||||
|
||||
/// This task gets spawned alongside each timeline and is responsible for managing the timeline's
|
||||
@@ -350,6 +354,7 @@ impl Manager {
|
||||
access_service: AccessService::new(manager_tx),
|
||||
tli,
|
||||
partial_backup_rate_limiter,
|
||||
resident_since: std::time::Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -188,6 +188,7 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
|
||||
delete_offloaded_wal: false,
|
||||
control_file_save_interval: Duration::from_secs(1),
|
||||
partial_backup_concurrency: 1,
|
||||
eviction_min_resident: Duration::ZERO,
|
||||
};
|
||||
|
||||
let mut global = GlobalMap::new(disk, conf.clone())?;
|
||||
|
||||
@@ -492,6 +492,7 @@ class NeonEnvBuilder:
|
||||
pageserver_virtual_file_io_engine: Optional[str] = None,
|
||||
pageserver_aux_file_policy: Optional[AuxFileStore] = None,
|
||||
pageserver_default_tenant_config_compaction_algorithm: Optional[Dict[str, Any]] = None,
|
||||
safekeeper_extra_opts: Optional[list[str]] = None,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -557,6 +558,8 @@ class NeonEnvBuilder:
|
||||
|
||||
self.pageserver_aux_file_policy = pageserver_aux_file_policy
|
||||
|
||||
self.safekeeper_extra_opts = safekeeper_extra_opts
|
||||
|
||||
assert test_name.startswith(
|
||||
"test_"
|
||||
), "Unexpectedly instantiated from outside a test function"
|
||||
@@ -1193,7 +1196,9 @@ class NeonEnv:
|
||||
sk_cfg[
|
||||
"remote_storage"
|
||||
] = self.safekeepers_remote_storage.to_toml_inline_table().strip()
|
||||
self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
|
||||
self.safekeepers.append(
|
||||
Safekeeper(env=self, id=id, port=port, extra_opts=config.safekeeper_extra_opts)
|
||||
)
|
||||
cfg["safekeepers"].append(sk_cfg)
|
||||
|
||||
log.info(f"Config: {cfg}")
|
||||
@@ -4016,16 +4021,28 @@ class Safekeeper(LogUtils):
|
||||
id: int
|
||||
running: bool = False
|
||||
|
||||
def __init__(self, env: NeonEnv, port: SafekeeperPort, id: int, running: bool = False):
|
||||
def __init__(
|
||||
self,
|
||||
env: NeonEnv,
|
||||
port: SafekeeperPort,
|
||||
id: int,
|
||||
running: bool = False,
|
||||
extra_opts: Optional[List[str]] = None,
|
||||
):
|
||||
self.env = env
|
||||
self.port = port
|
||||
self.id = id
|
||||
self.running = running
|
||||
self.logfile = Path(self.data_dir) / f"safekeeper-{id}.log"
|
||||
self.extra_opts = extra_opts
|
||||
|
||||
def start(
|
||||
self, extra_opts: Optional[List[str]] = None, timeout_in_seconds: Optional[int] = None
|
||||
) -> "Safekeeper":
|
||||
if extra_opts is None:
|
||||
# Apply either the extra_opts passed in, or the ones from our constructor: we do not merge the two.
|
||||
extra_opts = self.extra_opts
|
||||
|
||||
assert self.running is False
|
||||
self.env.neon_cli.safekeeper_start(
|
||||
self.id, extra_opts=extra_opts, timeout_in_seconds=timeout_in_seconds
|
||||
|
||||
@@ -2191,24 +2191,25 @@ def test_s3_eviction(
|
||||
):
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
neon_env_builder.enable_safekeeper_remote_storage(RemoteStorageKind.LOCAL_FS)
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"checkpoint_timeout": "100ms",
|
||||
}
|
||||
)
|
||||
|
||||
extra_opts = [
|
||||
neon_env_builder.safekeeper_extra_opts = [
|
||||
"--enable-offload",
|
||||
"--partial-backup-timeout",
|
||||
"50ms",
|
||||
"--control-file-save-interval",
|
||||
"1s",
|
||||
# Safekeepers usually wait a while before evicting something: for this test we want them to
|
||||
# evict things as soon as they are inactive.
|
||||
"--eviction-min-resident=100ms",
|
||||
]
|
||||
if delete_offloaded_wal:
|
||||
extra_opts.append("--delete-offloaded-wal")
|
||||
neon_env_builder.safekeeper_extra_opts.append("--delete-offloaded-wal")
|
||||
|
||||
for sk in env.safekeepers:
|
||||
sk.stop().start(extra_opts=extra_opts)
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf={
|
||||
"checkpoint_timeout": "100ms",
|
||||
}
|
||||
)
|
||||
|
||||
n_timelines = 5
|
||||
|
||||
@@ -2263,7 +2264,7 @@ def test_s3_eviction(
|
||||
# restarting random safekeepers
|
||||
for sk in env.safekeepers:
|
||||
if random.random() < restart_chance:
|
||||
sk.stop().start(extra_opts=extra_opts)
|
||||
sk.stop().start()
|
||||
time.sleep(0.5)
|
||||
|
||||
# require at least one successful eviction in at least one safekeeper
|
||||
|
||||
Reference in New Issue
Block a user