From 8b4dd5dc277164dbb175319c39ee7b64ed9f9f91 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 3 May 2024 13:31:25 +0100 Subject: [PATCH] pageserver: jitter secondary periods (#7544) ## Problem After some time the load from heatmap uploads gets rather spiky. They're unintentionally synchronising. Chart (does this make a _boing_ sound in anyone else's head?): ![image](https://github.com/neondatabase/neon/assets/944640/18829fc8-c5b7-4739-9a9b-491b5d6fcade) ## Summary of changes - Add a helper `period_jitter` and apply a 5% jitter from downloader and heatmap_uploader when updating the next runtime at the end of an interation. - Refactor existing places that we pick a startup interval into `period_warmup`, so that the intent is obvious. --- pageserver/src/tenant/secondary/downloader.rs | 16 +++++++------- .../src/tenant/secondary/heatmap_uploader.rs | 22 +++++++++---------- pageserver/src/tenant/secondary/scheduler.rs | 21 ++++++++++++++++++ 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 8a987b5ade..fb8907b5a8 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -31,7 +31,10 @@ use crate::{ use super::{ heatmap::HeatMapLayer, - scheduler::{self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs}, + scheduler::{ + self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult, + TenantBackgroundJobs, + }, SecondaryTenant, }; @@ -45,7 +48,6 @@ use chrono::format::{DelayedFormat, StrftimeItems}; use futures::Future; use pageserver_api::models::SecondaryProgress; use pageserver_api::shard::TenantShardId; -use rand::Rng; use remote_storage::{DownloadError, Etag, GenericRemoteStorage}; use tokio_util::sync::CancellationToken; @@ -274,7 +276,7 @@ impl JobGenerator SchedulingResult { @@ -305,11 +307,9 @@ impl JobGenerator let state = self .tenants .entry(*tenant.get_tenant_shard_id()) - .or_insert_with(|| { - let jittered_period = rand::thread_rng().gen_range(Duration::ZERO..period); - - UploaderTenantState { - tenant: Arc::downgrade(&tenant), - last_upload: None, - next_upload: Some(now.checked_add(jittered_period).unwrap_or(now)), - last_digest: None, - } + .or_insert_with(|| UploaderTenantState { + tenant: Arc::downgrade(&tenant), + last_upload: None, + next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)), + last_digest: None, }); // Decline to do the upload if insufficient time has passed @@ -274,7 +272,7 @@ impl JobGenerator let next_upload = tenant .get_heatmap_period() - .and_then(|period| now.checked_add(period)); + .and_then(|period| now.checked_add(period_jitter(period, 5))); WriteComplete { tenant_shard_id: *tenant.get_tenant_shard_id(), diff --git a/pageserver/src/tenant/secondary/scheduler.rs b/pageserver/src/tenant/secondary/scheduler.rs index 3bd7be782e..3d042f4513 100644 --- a/pageserver/src/tenant/secondary/scheduler.rs +++ b/pageserver/src/tenant/secondary/scheduler.rs @@ -1,4 +1,5 @@ use futures::Future; +use rand::Rng; use std::{ collections::HashMap, marker::PhantomData, @@ -19,6 +20,26 @@ use super::{CommandRequest, CommandResponse}; const MAX_SCHEDULING_INTERVAL: Duration = Duration::from_secs(10); const MIN_SCHEDULING_INTERVAL: Duration = Duration::from_secs(1); +/// Jitter a Duration by an integer percentage. Returned values are uniform +/// in the range 100-pct..100+pct (i.e. a 5% jitter is 5% either way: a ~10% range) +pub(super) fn period_jitter(d: Duration, pct: u32) -> Duration { + if d == Duration::ZERO { + d + } else { + rand::thread_rng().gen_range((d * (100 - pct)) / 100..(d * (100 + pct)) / 100) + } +} + +/// When a periodic task first starts, it should wait for some time in the range 0..period, so +/// that starting many such tasks at the same time spreads them across the time range. +pub(super) fn period_warmup(period: Duration) -> Duration { + if period == Duration::ZERO { + period + } else { + rand::thread_rng().gen_range(Duration::ZERO..period) + } +} + /// Scheduling helper for background work across many tenants. /// /// Systems that need to run background work across many tenants may use this type