From 71f38d135467ef8691f062c62fa5d8f3bf49ea6d Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 5 Dec 2024 11:37:17 -0800 Subject: [PATCH] feat(pageserver): support schedule gc-compaction (#9809) ## Problem part of https://github.com/neondatabase/neon/issues/9114 gc-compaction can take a long time. This patch adds support for scheduling a gc-compaction job. The compaction loop will first handle L0->L1 compaction, and then gc compaction. The scheduled jobs are stored in a non-persistent queue within the tenant structure. This will be the building block for the partial compaction trigger -- if the system determines that we need to do a gc compaction, it will partition the keyspace and schedule several jobs. Each of these jobs will run for a short amount of time (i.e, 1 min). L0 compaction will be prioritized over gc compaction. ## Summary of changes * Add compaction scheduler in tenant. * Run scheduled compaction in integration tests. * Change the manual compaction API to allow schedule a compaction instead of immediately doing it. * Add LSN upper bound as gc-compaction parameter. If we schedule partial compactions, gc_cutoff might move across different runs. Therefore, we need to pass a pre-determined gc_cutoff beforehand. (TODO: support LSN lower bound so that we can compact arbitrary "rectangle" in the layer map) * Refactor the gc_compaction internal interface. --------- Signed-off-by: Alex Chi Z Co-authored-by: Christian Schwarz --- pageserver/src/http/routes.rs | 66 +++++-- pageserver/src/tenant.rs | 171 ++++++++++++++++--- pageserver/src/tenant/timeline.rs | 29 +++- pageserver/src/tenant/timeline/compaction.rs | 58 ++++--- test_runner/regress/test_compaction.py | 41 +++-- 5 files changed, 291 insertions(+), 74 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e04f1460a8..b3981b4a8e 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -87,7 +87,7 @@ use crate::tenant::timeline::offload::offload_timeline; use crate::tenant::timeline::offload::OffloadError; use crate::tenant::timeline::CompactFlags; use crate::tenant::timeline::CompactOptions; -use crate::tenant::timeline::CompactRange; +use crate::tenant::timeline::CompactRequest; use crate::tenant::timeline::CompactionError; use crate::tenant::timeline::Timeline; use crate::tenant::GetTimelineError; @@ -1978,6 +1978,26 @@ async fn timeline_gc_handler( json_response(StatusCode::OK, gc_result) } +// Cancel scheduled compaction tasks +async fn timeline_cancel_compact_handler( + request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let state = get_state(&request); + async { + let tenant = state + .tenant_manager + .get_attached_tenant_shard(tenant_shard_id)?; + tenant.cancel_scheduled_compaction(timeline_id); + json_response(StatusCode::OK, ()) + } + .instrument(info_span!("timeline_cancel_compact", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id)) + .await +} + // Run compaction immediately on given timeline. async fn timeline_compact_handler( mut request: Request, @@ -1987,7 +2007,7 @@ async fn timeline_compact_handler( let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; - let compact_range = json_request_maybe::>(&mut request).await?; + let compact_request = json_request_maybe::>(&mut request).await?; let state = get_state(&request); @@ -2012,22 +2032,42 @@ async fn timeline_compact_handler( let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); + let wait_until_scheduled_compaction_done = + parse_query_param::<_, bool>(&request, "wait_until_scheduled_compaction_done")? + .unwrap_or(false); + let options = CompactOptions { - compact_range, + compact_range: compact_request + .as_ref() + .and_then(|r| r.compact_range.clone()), + compact_below_lsn: compact_request.as_ref().and_then(|r| r.compact_below_lsn), flags, }; + let scheduled = compact_request.map(|r| r.scheduled).unwrap_or(false); + async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; - timeline - .compact_with_options(&cancel, options, &ctx) - .await - .map_err(|e| ApiError::InternalServerError(e.into()))?; - if wait_until_uploaded { - timeline.remote_client.wait_completion().await - // XXX map to correct ApiError for the cases where it's due to shutdown - .context("wait completion").map_err(ApiError::InternalServerError)?; + if scheduled { + let tenant = state + .tenant_manager + .get_attached_tenant_shard(tenant_shard_id)?; + let rx = tenant.schedule_compaction(timeline_id, options).await; + if wait_until_scheduled_compaction_done { + // It is possible that this will take a long time, dropping the HTTP request will not cancel the compaction. + rx.await.ok(); + } + } else { + timeline + .compact_with_options(&cancel, options, &ctx) + .await + .map_err(|e| ApiError::InternalServerError(e.into()))?; + if wait_until_uploaded { + timeline.remote_client.wait_completion().await + // XXX map to correct ApiError for the cases where it's due to shutdown + .context("wait completion").map_err(ApiError::InternalServerError)?; + } } json_response(StatusCode::OK, ()) } @@ -3301,6 +3341,10 @@ pub fn make_router( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", |r| api_handler(r, timeline_compact_handler), ) + .delete( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact", + |r| api_handler(r, timeline_cancel_compact_handler), + ) .put( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/offload", |r| testing_api_handler("attempt timeline offload", r, timeline_offload_handler), diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5a9e398586..306ec9f548 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -37,14 +37,18 @@ use remote_timeline_client::manifest::{ }; use remote_timeline_client::UploadQueueNotReadyError; use std::collections::BTreeMap; +use std::collections::VecDeque; use std::fmt; use std::future::Future; use std::sync::atomic::AtomicBool; use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; +use timeline::compaction::ScheduledCompactionTask; use timeline::import_pgdata; use timeline::offload::offload_timeline; +use timeline::CompactFlags; +use timeline::CompactOptions; use timeline::ShutdownMode; use tokio::io::BufReader; use tokio::sync::watch; @@ -339,6 +343,11 @@ pub struct Tenant { /// Overhead of mutex is acceptable because compaction is done with a multi-second period. compaction_circuit_breaker: std::sync::Mutex, + /// Scheduled compaction tasks. Currently, this can only be populated by triggering + /// a manual gc-compaction from the manual compaction API. + scheduled_compaction_tasks: + std::sync::Mutex>>, + /// If the tenant is in Activating state, notify this to encourage it /// to proceed to Active as soon as possible, rather than waiting for lazy /// background warmup. @@ -2953,27 +2962,68 @@ impl Tenant { for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload { + // pending_task_left == None: cannot compact, maybe still pending tasks + // pending_task_left == Some(true): compaction task left + // pending_task_left == Some(false): no compaction task left let pending_task_left = if *can_compact { - Some( - timeline - .compact(cancel, EnumSet::empty(), ctx) - .instrument(info_span!("compact_timeline", %timeline_id)) - .await - .inspect_err(|e| match e { - timeline::CompactionError::ShuttingDown => (), - timeline::CompactionError::Offload(_) => { - // Failures to offload timelines do not trip the circuit breaker, because - // they do not do lots of writes the way compaction itself does: it is cheap - // to retry, and it would be bad to stop all compaction because of an issue with offloading. + let has_pending_l0_compaction_task = timeline + .compact(cancel, EnumSet::empty(), ctx) + .instrument(info_span!("compact_timeline", %timeline_id)) + .await + .inspect_err(|e| match e { + timeline::CompactionError::ShuttingDown => (), + timeline::CompactionError::Offload(_) => { + // Failures to offload timelines do not trip the circuit breaker, because + // they do not do lots of writes the way compaction itself does: it is cheap + // to retry, and it would be bad to stop all compaction because of an issue with offloading. + } + timeline::CompactionError::Other(e) => { + self.compaction_circuit_breaker + .lock() + .unwrap() + .fail(&CIRCUIT_BREAKERS_BROKEN, e); + } + })?; + if has_pending_l0_compaction_task { + Some(true) + } else { + let has_pending_scheduled_compaction_task; + let next_scheduled_compaction_task = { + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + if let Some(tline_pending_tasks) = guard.get_mut(timeline_id) { + let next_task = tline_pending_tasks.pop_front(); + has_pending_scheduled_compaction_task = !tline_pending_tasks.is_empty(); + next_task + } else { + has_pending_scheduled_compaction_task = false; + None + } + }; + if let Some(mut next_scheduled_compaction_task) = next_scheduled_compaction_task + { + if !next_scheduled_compaction_task + .options + .flags + .contains(CompactFlags::EnhancedGcBottomMostCompaction) + { + warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", next_scheduled_compaction_task.options); + } else { + let _ = timeline + .compact_with_options( + cancel, + next_scheduled_compaction_task.options, + ctx, + ) + .instrument(info_span!("scheduled_compact_timeline", %timeline_id)) + .await?; + if let Some(tx) = next_scheduled_compaction_task.result_tx.take() { + // TODO: we can send compaction statistics in the future + tx.send(()).ok(); } - timeline::CompactionError::Other(e) => { - self.compaction_circuit_breaker - .lock() - .unwrap() - .fail(&CIRCUIT_BREAKERS_BROKEN, e); - } - })?, - ) + } + } + Some(has_pending_scheduled_compaction_task) + } } else { None }; @@ -2993,6 +3043,36 @@ impl Tenant { Ok(has_pending_task) } + /// Cancel scheduled compaction tasks + pub(crate) fn cancel_scheduled_compaction( + &self, + timeline_id: TimelineId, + ) -> Vec { + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + if let Some(tline_pending_tasks) = guard.get_mut(&timeline_id) { + let current_tline_pending_tasks = std::mem::take(tline_pending_tasks); + current_tline_pending_tasks.into_iter().collect() + } else { + Vec::new() + } + } + + /// Schedule a compaction task for a timeline. + pub(crate) async fn schedule_compaction( + &self, + timeline_id: TimelineId, + options: CompactOptions, + ) -> tokio::sync::oneshot::Receiver<()> { + let (tx, rx) = tokio::sync::oneshot::channel(); + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + let tline_pending_tasks = guard.entry(timeline_id).or_default(); + tline_pending_tasks.push_back(ScheduledCompactionTask { + options, + result_tx: Some(tx), + }); + rx + } + // Call through to all timelines to freeze ephemeral layers if needed. Usually // this happens during ingest: this background housekeeping is for freezing layers // that are open but haven't been written to for some time. @@ -4005,6 +4085,7 @@ impl Tenant { // use an extremely long backoff. Some(Duration::from_secs(3600 * 24)), )), + scheduled_compaction_tasks: Mutex::new(Default::default()), activate_now_sem: tokio::sync::Semaphore::new(0), attach_wal_lag_cooldown: Arc::new(std::sync::OnceLock::new()), cancel: CancellationToken::default(), @@ -9163,6 +9244,7 @@ mod tests { CompactOptions { flags: dryrun_flags, compact_range: None, + compact_below_lsn: None, }, &ctx, ) @@ -9399,6 +9481,7 @@ mod tests { CompactOptions { flags: dryrun_flags, compact_range: None, + compact_below_lsn: None, }, &ctx, ) @@ -9885,7 +9968,15 @@ mod tests { // Do a partial compaction on key range 0..2 tline - .partial_compact_with_gc(get_key(0)..get_key(2), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(0)..get_key(2)).into()), + compact_below_lsn: None, + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -9924,7 +10015,15 @@ mod tests { // Do a partial compaction on key range 2..4 tline - .partial_compact_with_gc(get_key(2)..get_key(4), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(2)..get_key(4)).into()), + compact_below_lsn: None, + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -9968,7 +10067,15 @@ mod tests { // Do a partial compaction on key range 4..9 tline - .partial_compact_with_gc(get_key(4)..get_key(9), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(4)..get_key(9)).into()), + compact_below_lsn: None, + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -10011,7 +10118,15 @@ mod tests { // Do a partial compaction on key range 9..10 tline - .partial_compact_with_gc(get_key(9)..get_key(10), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(9)..get_key(10)).into()), + compact_below_lsn: None, + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; @@ -10059,7 +10174,15 @@ mod tests { // Do a partial compaction on key range 0..10, all image layers below LSN 20 can be replaced with new ones. tline - .partial_compact_with_gc(get_key(0)..get_key(10), &cancel, EnumSet::new(), &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: EnumSet::new(), + compact_range: Some((get_key(0)..get_key(10)).into()), + compact_below_lsn: None, + }, + &ctx, + ) .await .unwrap(); let all_layers = inspect_and_sort(&tline, Some(get_key(0)..get_key(10))).await; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index fc741826ab..fc69525bf4 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -768,7 +768,7 @@ pub enum GetLogicalSizePriority { Background, } -#[derive(enumset::EnumSetType)] +#[derive(Debug, enumset::EnumSetType)] pub(crate) enum CompactFlags { ForceRepartition, ForceImageLayerCreation, @@ -777,6 +777,16 @@ pub(crate) enum CompactFlags { DryRun, } +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize)] +pub(crate) struct CompactRequest { + pub compact_range: Option, + pub compact_below_lsn: Option, + /// Whether the compaction job should be scheduled. + #[serde(default)] + pub scheduled: bool, +} + #[serde_with::serde_as] #[derive(Debug, Clone, serde::Deserialize)] pub(crate) struct CompactRange { @@ -786,10 +796,24 @@ pub(crate) struct CompactRange { pub end: Key, } -#[derive(Clone, Default)] +impl From> for CompactRange { + fn from(range: Range) -> Self { + CompactRange { + start: range.start, + end: range.end, + } + } +} + +#[derive(Debug, Clone, Default)] pub(crate) struct CompactOptions { pub flags: EnumSet, + /// If set, the compaction will only compact the key range specified by this option. + /// This option is only used by GC compaction. pub compact_range: Option, + /// If set, the compaction will only compact the LSN below this value. + /// This option is only used by GC compaction. + pub compact_below_lsn: Option, } impl std::fmt::Debug for Timeline { @@ -1604,6 +1628,7 @@ impl Timeline { CompactOptions { flags, compact_range: None, + compact_below_lsn: None, }, ctx, ) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index ecd68ba55e..8ececa2bfb 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -16,7 +16,6 @@ use super::{ use anyhow::{anyhow, bail, Context}; use bytes::Bytes; -use enumset::EnumSet; use fail::fail_point; use itertools::Itertools; use pageserver_api::key::KEY_SIZE; @@ -64,6 +63,12 @@ use super::CompactionError; /// Maximum number of deltas before generating an image layer in bottom-most compaction. const COMPACTION_DELTA_THRESHOLD: usize = 5; +/// A scheduled compaction task. +pub struct ScheduledCompactionTask { + pub options: CompactOptions, + pub result_tx: Option>, +} + pub struct GcCompactionJobDescription { /// All layers to read in the compaction job selected_layers: Vec, @@ -1746,24 +1751,6 @@ impl Timeline { Ok(()) } - pub(crate) async fn compact_with_gc( - self: &Arc, - cancel: &CancellationToken, - options: CompactOptions, - ctx: &RequestContext, - ) -> anyhow::Result<()> { - self.partial_compact_with_gc( - options - .compact_range - .map(|range| range.start..range.end) - .unwrap_or_else(|| Key::MIN..Key::MAX), - cancel, - options.flags, - ctx, - ) - .await - } - /// An experimental compaction building block that combines compaction with garbage collection. /// /// The current implementation picks all delta + image layers that are below or intersecting with @@ -1771,17 +1758,19 @@ impl Timeline { /// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon, /// and create delta layers with all deltas >= gc horizon. /// - /// If `key_range` is provided, it will only compact the keys within the range, aka partial compaction. + /// If `options.compact_range` is provided, it will only compact the keys within the range, aka partial compaction. /// Partial compaction will read and process all layers overlapping with the key range, even if it might /// contain extra keys. After the gc-compaction phase completes, delta layers that are not fully contained /// within the key range will be rewritten to ensure they do not overlap with the delta layers. Providing /// Key::MIN..Key..MAX to the function indicates a full compaction, though technically, `Key::MAX` is not /// part of the range. - pub(crate) async fn partial_compact_with_gc( + /// + /// If `options.compact_below_lsn` is provided, the compaction will only compact layers below or intersect with + /// the LSN. Otherwise, it will use the gc cutoff by default. + pub(crate) async fn compact_with_gc( self: &Arc, - compaction_key_range: Range, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> anyhow::Result<()> { // Block other compaction/GC tasks from running for now. GC-compaction could run along @@ -1803,6 +1792,12 @@ impl Timeline { ) .await?; + let flags = options.flags; + let compaction_key_range = options + .compact_range + .map(|range| range.start..range.end) + .unwrap_or_else(|| Key::MIN..Key::MAX); + let dry_run = flags.contains(CompactFlags::DryRun); if compaction_key_range == (Key::MIN..Key::MAX) { @@ -1826,7 +1821,18 @@ impl Timeline { let layers = guard.layer_map()?; let gc_info = self.gc_info.read().unwrap(); let mut retain_lsns_below_horizon = Vec::new(); - let gc_cutoff = gc_info.cutoffs.select_min(); + let gc_cutoff = { + let real_gc_cutoff = gc_info.cutoffs.select_min(); + // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for + // each of the retain_lsn. Therefore, if the user-provided `compact_below_lsn` is larger than the real gc cutoff, we will use + // the real cutoff. + let mut gc_cutoff = options.compact_below_lsn.unwrap_or(real_gc_cutoff); + if gc_cutoff > real_gc_cutoff { + warn!("provided compact_below_lsn={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff); + gc_cutoff = real_gc_cutoff; + } + gc_cutoff + }; for (lsn, _timeline_id, _is_offloaded) in &gc_info.retain_lsns { if lsn < &gc_cutoff { retain_lsns_below_horizon.push(*lsn); @@ -1846,7 +1852,7 @@ impl Timeline { .map(|desc| desc.get_lsn_range().end) .max() else { - info!("no layers to compact with gc"); + info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff); return Ok(()); }; // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key @@ -1869,7 +1875,7 @@ impl Timeline { } } if selected_layers.is_empty() { - info!("no layers to compact with gc"); + info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compaction_key_range.start, compaction_key_range.end); return Ok(()); } retain_lsns_below_horizon.sort(); diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index b6741aed68..de6653eb3f 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -15,7 +15,7 @@ from fixtures.pageserver.http import PageserverApiException from fixtures.utils import skip_in_debug_build, wait_until from fixtures.workload import Workload -AGGRESIVE_COMPACTION_TENANT_CONF = { +AGGRESSIVE_COMPACTION_TENANT_CONF = { # Disable gc and compaction. The test runs compaction manually. "gc_period": "0s", "compaction_period": "0s", @@ -24,6 +24,7 @@ AGGRESIVE_COMPACTION_TENANT_CONF = { # Compact small layers "compaction_target_size": 1024**2, "image_creation_threshold": 2, + # "lsn_lease_length": "0s", -- TODO: would cause branch creation errors, should fix later } @@ -51,7 +52,7 @@ def test_pageserver_compaction_smoke( page_cache_size=10 """ - env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + env = neon_env_builder.init_start(initial_tenant_conf=AGGRESSIVE_COMPACTION_TENANT_CONF) tenant_id = env.initial_tenant timeline_id = env.initial_timeline @@ -120,14 +121,25 @@ page_cache_size=10 assert vectored_average < 8 +@skip_in_debug_build("only run with release build") def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): - env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + SMOKE_CONF = { + # Run both gc and gc-compaction. + "gc_period": "5s", + "compaction_period": "5s", + # No PiTR interval and small GC horizon + "pitr_interval": "0s", + "gc_horizon": f"{1024 ** 2}", + "lsn_lease_length": "0s", + } + + env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF) tenant_id = env.initial_tenant timeline_id = env.initial_timeline - row_count = 1000 - churn_rounds = 10 + row_count = 10000 + churn_rounds = 50 ps_http = env.pageserver.http_client() @@ -141,20 +153,27 @@ def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): if i % 10 == 0: log.info(f"Running churn round {i}/{churn_rounds} ...") - workload.churn_rows(row_count, env.pageserver.id) - # Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run. - ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True) - assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1 ps_http.timeline_compact( tenant_id, timeline_id, enhanced_gc_bottom_most_compaction=True, body={ - "start": "000000000000000000000000000000000000", - "end": "030000000000000000000000000000000000", + "scheduled": True, + "compact_range": { + "start": "000000000000000000000000000000000000", + # skip the SLRU range for now -- it races with get-lsn-by-timestamp, TODO: fix this + "end": "010000000000000000000000000000000000", + }, }, ) + workload.churn_rows(row_count, env.pageserver.id) + + # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked) + env.pageserver.assert_log_contains( + "scheduled_compact_timeline.*picked .* layers for compaction" + ) + log.info("Validating at workload end ...") workload.validate(env.pageserver.id)