pageserver: enable l0_flush_delay_threshold by default (#11214)

## Problem

`l0_flush_delay_threshold` has already been set to 30 in production for
a couple of weeks. Let's harmonize the default.

## Summary of changes

Update `DEFAULT_L0_FLUSH_DELAY_FACTOR` to 3 such that the default
`l0_flush_delay_threshold` is `3 * compaction_threshold`.

This differs from the production setting, which is hardcoded to 30 (with
`compaction_threshold` at 10), and is more appropriate for any tenants
that have custom `compaction_threshold` overrides.
This commit is contained in:
Erik Grinaker
2025-03-13 20:15:22 +01:00
committed by GitHub
parent 066b0a1be9
commit 8afae9d03c
2 changed files with 10 additions and 8 deletions

View File

@@ -278,10 +278,10 @@ pub struct TenantConfigToml {
/// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
/// has an effect if `compaction_l0_first` is true. Defaults to true.
pub compaction_l0_semaphore: bool,
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
/// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
/// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
/// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
/// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
pub l0_flush_delay_threshold: Option<usize>,
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
/// to avoid deadlock. 0 to disable. Disabled by default.

View File

@@ -2476,8 +2476,9 @@ impl Timeline {
}
fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
// Disable L0 flushes by default. This and compaction needs further tuning.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
// By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold
// defaults to 10, and L0 compaction is generally able to keep L0 counts below 30.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;
// If compaction is disabled, don't delay.
if self.get_compaction_period() == Duration::ZERO {
@@ -2505,8 +2506,9 @@ impl Timeline {
}
fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
// Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
// Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't
// responsive, and it can e.g. block on other compaction via the compaction semaphore or
// sibling timelines. We need more confidence before enabling this.
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5
// If compaction is disabled, don't stall.