fix(pageserver): use better estimation for compaction memory usage (#11904)

## Problem

Hopefully resolves `test_gc_feedback` flakiness.

## Summary of changes

`accumulated_values` should not exceed 512MB to avoid OOM. Previously we
only use number of items, which is not a good estimation.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-05-14 16:32:55 +08:00
committed by GitHub
parent d47e88e353
commit 81fd652151
2 changed files with 27 additions and 3 deletions

View File

@@ -36,6 +36,24 @@ impl Value {
Value::WalRecord(rec) => rec.will_init(),
}
}
#[inline(always)]
pub fn estimated_size(&self) -> usize {
match self {
Value::Image(image) => image.len(),
Value::WalRecord(NeonWalRecord::AuxFile {
content: Some(content),
..
}) => content.len(),
Value::WalRecord(NeonWalRecord::Postgres { rec, .. }) => rec.len(),
Value::WalRecord(NeonWalRecord::ClogSetAborted { xids }) => xids.len() * 4,
Value::WalRecord(NeonWalRecord::ClogSetCommitted { xids, .. }) => xids.len() * 4,
Value::WalRecord(NeonWalRecord::MultixactMembersCreate { members, .. }) => {
members.len() * 8
}
_ => 8192, /* use image size as the estimation */
}
}
}
#[derive(Debug, PartialEq)]

View File

@@ -3435,6 +3435,7 @@ impl Timeline {
// Step 2: Produce images+deltas.
let mut accumulated_values = Vec::new();
let mut accumulated_values_estimated_size = 0;
let mut last_key: Option<Key> = None;
// Only create image layers when there is no ancestor branches. TODO: create covering image layer
@@ -3611,12 +3612,16 @@ impl Timeline {
if last_key.is_none() {
last_key = Some(key);
}
accumulated_values_estimated_size += val.estimated_size();
accumulated_values.push((key, lsn, val));
if accumulated_values.len() >= 65536 {
// Assume all of them are images, that would be 512MB of data in memory for a single key.
// Accumulated values should never exceed 512MB.
if accumulated_values_estimated_size >= 1024 * 1024 * 512 {
return Err(CompactionError::Other(anyhow!(
"too many values for a single key, giving up gc-compaction"
"too many values for a single key: {} for key {}, {} items",
accumulated_values_estimated_size,
key,
accumulated_values.len()
)));
}
} else {
@@ -3651,6 +3656,7 @@ impl Timeline {
.map_err(CompactionError::Other)?;
accumulated_values.clear();
*last_key = key;
accumulated_values_estimated_size = val.estimated_size();
accumulated_values.push((key, lsn, val));
}
}