eviction_task: only refresh layer accesses once per p.threshold (#3877)

Without this, we run it every p.period, which can be quite low. For
example, the running experiment with 3000 tenants in prod uses a period
of 1 minute.

Doing it once per p.threshold is enough to prevent eviction.
This commit is contained in:
Christian Schwarz
2023-03-27 13:33:40 +02:00
committed by GitHub
parent ff51e96fbd
commit fe15624570
2 changed files with 22 additions and 1 deletions

View File

@@ -71,6 +71,8 @@ use crate::ZERO_PAGE;
use crate::{is_temporary, task_mgr};
use walreceiver::spawn_connection_manager_task;
use self::eviction_task::EvictionTaskTimelineState;
use super::layer_map::BatchedUpdates;
use super::remote_timeline_client::index::IndexPart;
use super::remote_timeline_client::RemoteTimelineClient;
@@ -216,6 +218,8 @@ pub struct Timeline {
download_all_remote_layers_task_info: RwLock<Option<DownloadRemoteLayersTaskInfo>>,
state: watch::Sender<TimelineState>,
eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,
}
/// Internal structure to hold all data needed for logical size calculation.
@@ -1252,6 +1256,10 @@ impl Timeline {
download_all_remote_layers_task_info: RwLock::new(None),
state,
eviction_task_timeline_state: tokio::sync::Mutex::new(
EvictionTaskTimelineState::default(),
),
};
result.repartition_threshold = result.get_checkpoint_distance() / 10;
result

View File

@@ -35,6 +35,11 @@ use crate::{
use super::Timeline;
#[derive(Default)]
pub struct EvictionTaskTimelineState {
last_refresh_required_in_restart: Option<tokio::time::Instant>,
}
impl Timeline {
pub(super) fn launch_eviction_task(self: &Arc<Self>) {
let self_clone = Arc::clone(self);
@@ -139,7 +144,15 @@ impl Timeline {
// for active tenants this will likely materialized page cache or in-memory layers. for
// inactive tenants it will refresh the last_access timestamps so that we will not evict
// and re-download on restart these layers.
self.refresh_layers_required_in_restart(cancel, ctx).await;
let mut state = self.eviction_task_timeline_state.lock().await;
match state.last_refresh_required_in_restart {
Some(ts) if ts.elapsed() < p.threshold => { /* no need to run */ }
_ => {
self.refresh_layers_required_in_restart(cancel, ctx).await;
state.last_refresh_required_in_restart = Some(tokio::time::Instant::now())
}
}
drop(state);
if cancel.is_cancelled() {
return ControlFlow::Break(());