pageserver: disable L0 backpressure by default (#10535)

## Problem

We'll need further improvements to compaction before enabling L0 flush
backpressure by default. See:
https://neondb.slack.com/archives/C033RQ5SPDH/p1738066068960519?thread_ts=1737818888.474179&cid=C033RQ5SPDH.

Touches #5415.

## Summary of changes

Disable `l0_flush_delay_threshold` by default.
This commit is contained in:
Erik Grinaker
2025-01-28 15:51:30 +01:00
committed by GitHub
parent 83b6bfa229
commit 47677ba578
2 changed files with 10 additions and 17 deletions

View File

@@ -260,11 +260,10 @@ pub struct TenantConfigToml {
/// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
/// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
/// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
/// blowing up. Should be >compaction_threshold. If None, defaults to 2 * compaction_threshold.
/// 0 to disable.
/// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
pub l0_flush_delay_threshold: Option<usize>,
/// Level0 delta layer threshold at which to stall layer flushes. 0 to disable. If None,
/// defaults to 4 * compaction_threshold. Must be >compaction_threshold to avoid deadlock.
/// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
/// to avoid deadlock. 0 to disable. Disabled by default.
pub l0_flush_stall_threshold: Option<usize>,
// Determines how much history is retained, to allow
// branching and read replicas at an older point in time.

View File

@@ -2172,8 +2172,8 @@ impl Timeline {
}
fn get_l0_flush_delay_threshold(&self) -> Option<usize> {
// Default to delay L0 flushes at 3x compaction threshold.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3;
// Disable L0 flushes by default. This and compaction needs further tuning.
const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3
// If compaction is disabled, don't delay.
if self.get_compaction_period() == Duration::ZERO {
@@ -2201,10 +2201,9 @@ impl Timeline {
}
fn get_l0_flush_stall_threshold(&self) -> Option<usize> {
// Default to stall L0 flushes at 5x compaction threshold.
// TODO: stalls are temporarily disabled by default, see below.
#[allow(unused)]
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 5;
// Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5
// If compaction is disabled, don't stall.
if self.get_compaction_period() == Duration::ZERO {
@@ -2236,13 +2235,8 @@ impl Timeline {
return None;
}
// Disable stalls by default. In ingest benchmarks, we see image compaction take >10
// minutes, blocking L0 compaction, and we can't stall L0 flushes for that long.
//
// TODO: fix this.
// let l0_flush_stall_threshold = l0_flush_stall_threshold
// .unwrap_or(DEFAULT_L0_FLUSH_STALL_FACTOR * compaction_threshold);
let l0_flush_stall_threshold = l0_flush_stall_threshold?;
let l0_flush_stall_threshold = l0_flush_stall_threshold
.unwrap_or(DEFAULT_L0_FLUSH_STALL_FACTOR * compaction_threshold);
// 0 disables backpressure.
if l0_flush_stall_threshold == 0 {