feat(pageserver): do space check before gc-compaction (#9250)

part of https://github.com/neondatabase/neon/issues/9114

## Summary of changes

gc-compaction may take a lot of disk space, and if it does, the caller
should do a partial gc-compaction. This patch adds space check for the
compaction job.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-10-17 10:29:53 -04:00
committed by GitHub
parent a7c05686cc
commit f3a3eefd26
4 changed files with 63 additions and 10 deletions

View File

@@ -1218,16 +1218,7 @@ mod filesystem_level_usage {
let stat = Statvfs::get(tenants_dir, mock_config)
.context("statvfs failed, presumably directory got unlinked")?;
// https://unix.stackexchange.com/a/703650
let blocksize = if stat.fragment_size() > 0 {
stat.fragment_size()
} else {
stat.block_size()
};
// use blocks_available (b_avail) since, pageserver runs as unprivileged user
let avail_bytes = stat.blocks_available() * blocksize;
let total_bytes = stat.blocks() * blocksize;
let (avail_bytes, total_bytes) = stat.get_avail_total_bytes();
Ok(Usage {
config,

View File

@@ -53,6 +53,22 @@ impl Statvfs {
Statvfs::Mock(stat) => stat.block_size,
}
}
/// Get the available and total bytes on the filesystem.
pub fn get_avail_total_bytes(&self) -> (u64, u64) {
// https://unix.stackexchange.com/a/703650
let blocksize = if self.fragment_size() > 0 {
self.fragment_size()
} else {
self.block_size()
};
// use blocks_available (b_avail) since, pageserver runs as unprivileged user
let avail_bytes = self.blocks_available() * blocksize;
let total_bytes = self.blocks() * blocksize;
(avail_bytes, total_bytes)
}
}
pub mod mock {

View File

@@ -341,6 +341,10 @@ impl Layer {
Ok(())
}
pub(crate) async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
self.0.needs_download().await
}
/// Assuming the layer is already downloaded, returns a guard which will prohibit eviction
/// while the guard exists.
///

View File

@@ -29,6 +29,7 @@ use utils::id::TimelineId;
use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
use crate::page_cache;
use crate::statvfs::Statvfs;
use crate::tenant::checks::check_valid_layermap;
use crate::tenant::remote_timeline_client::WaitCompletionError;
use crate::tenant::storage_layer::filter_iterator::FilterIterator;
@@ -1691,6 +1692,45 @@ impl Timeline {
unreachable!("key retention is empty")
}
/// Check how much space is left on the disk
async fn check_available_space(self: &Arc<Self>) -> anyhow::Result<u64> {
let tenants_dir = self.conf.tenants_path();
let stat = Statvfs::get(&tenants_dir, None)
.context("statvfs failed, presumably directory got unlinked")?;
let (avail_bytes, _) = stat.get_avail_total_bytes();
Ok(avail_bytes)
}
/// Check if the compaction can proceed safely without running out of space. We assume the size
/// upper bound of the produced files of a compaction job is the same as all layers involved in
/// the compaction. Therefore, we need `2 * layers_to_be_compacted_size` at least to do a
/// compaction.
async fn check_compaction_space(
self: &Arc<Self>,
layer_selection: &[Layer],
) -> anyhow::Result<()> {
let available_space = self.check_available_space().await?;
let mut remote_layer_size = 0;
let mut all_layer_size = 0;
for layer in layer_selection {
let needs_download = layer.needs_download().await?;
if needs_download.is_some() {
remote_layer_size += layer.layer_desc().file_size;
}
all_layer_size += layer.layer_desc().file_size;
}
let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */
if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space
{
return Err(anyhow!("not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}",
available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size));
}
Ok(())
}
/// An experimental compaction building block that combines compaction with garbage collection.
///
/// The current implementation picks all delta + image layers that are below or intersecting with
@@ -1806,6 +1846,8 @@ impl Timeline {
lowest_retain_lsn
);
self.check_compaction_space(&layer_selection).await?;
// Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
// Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)