fix(pageserver): lower L0 compaction threshold (#11617)

## Problem

We saw OOMs due to L0 compaction happening simultaneously for all shards
of the same tenant right after the shard split.

## Summary of changes

Lower the threshold so that we compact fewer files.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-04-17 15:51:28 -04:00
committed by GitHub
parent ad0c5fdae7
commit 748539b222

View File

@@ -682,10 +682,10 @@ pub mod tenant_conf_defaults {
pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;
// This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
// 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
// be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
// with this config, we can get a maximum peak compaction usage of 9 GB.
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
// 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of
// DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak
// compaction usage of 15360MB.
pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;
// Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
// read amp.
pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
@@ -702,8 +702,11 @@ pub mod tenant_conf_defaults {
// Relevant: https://github.com/neondatabase/neon/issues/3394
pub const DEFAULT_GC_PERIOD: &str = "1 hr";
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
// If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
// layer creation will end immediately. Set to 0 to disable.
// Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure
// without looking at the exact number of L0 layers.
// It was expected to have the following behavior:
// > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
// > layer creation will end immediately. Set to 0 to disable.
pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";