mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-05 09:20:38 +00:00
Compare commits
2 Commits
remove_ini
...
jcsp/disk-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed3e3b6f61 | ||
|
|
098ef0956b |
@@ -1479,6 +1479,8 @@ threshold = "20m"
|
|||||||
Some(DiskUsageEvictionTaskConfig {
|
Some(DiskUsageEvictionTaskConfig {
|
||||||
max_usage_pct: Percent::new(80).unwrap(),
|
max_usage_pct: Percent::new(80).unwrap(),
|
||||||
min_avail_bytes: 0,
|
min_avail_bytes: 0,
|
||||||
|
target_avail_bytes: None,
|
||||||
|
target_usage_pct: None,
|
||||||
period: Duration::from_secs(10),
|
period: Duration::from_secs(10),
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
mock_statvfs: None,
|
mock_statvfs: None,
|
||||||
|
|||||||
@@ -67,16 +67,40 @@ use crate::{
|
|||||||
pub struct DiskUsageEvictionTaskConfig {
|
pub struct DiskUsageEvictionTaskConfig {
|
||||||
pub max_usage_pct: Percent,
|
pub max_usage_pct: Percent,
|
||||||
pub min_avail_bytes: u64,
|
pub min_avail_bytes: u64,
|
||||||
|
|
||||||
|
// Control how far we will go when evicting: when usage exceeds max_usage_pct or min_avail_bytes,
|
||||||
|
// we will keep evicting layers until we reach the target. The resulting disk usage should look
|
||||||
|
// like a sawtooth bouncing between the upper max/min line and the lower target line.
|
||||||
|
#[serde(default)]
|
||||||
|
pub target_usage_pct: Option<Percent>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub target_avail_bytes: Option<u64>,
|
||||||
|
|
||||||
#[serde(with = "humantime_serde")]
|
#[serde(with = "humantime_serde")]
|
||||||
pub period: Duration,
|
pub period: Duration,
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
pub mock_statvfs: Option<crate::statvfs::mock::Behavior>,
|
pub mock_statvfs: Option<crate::statvfs::mock::Behavior>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
enum Status {
|
||||||
|
/// We are within disk limits, and not currently doing any eviction
|
||||||
|
#[default]
|
||||||
|
Idle,
|
||||||
|
/// Disk limits have been exceeded: we will evict soon
|
||||||
|
UnderPressure,
|
||||||
|
/// We are currently doing an eviction pass.
|
||||||
|
Evicting,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct State {
|
pub struct State {
|
||||||
/// Exclude http requests and background task from running at the same time.
|
/// Exclude http requests and background task from running at the same time.
|
||||||
mutex: tokio::sync::Mutex<()>,
|
mutex: tokio::sync::Mutex<()>,
|
||||||
|
|
||||||
|
/// Publish the current status of eviction work, for visibility to other subsystems
|
||||||
|
/// that modify their behavior if disk pressure is high or if eviction is going on.
|
||||||
|
status: std::sync::RwLock<Status>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn launch_disk_usage_global_eviction_task(
|
pub fn launch_disk_usage_global_eviction_task(
|
||||||
@@ -176,7 +200,9 @@ async fn disk_usage_eviction_task(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub trait Usage: Clone + Copy + std::fmt::Debug {
|
pub trait Usage: Clone + Copy + std::fmt::Debug {
|
||||||
fn has_pressure(&self) -> bool;
|
fn pressure(&self) -> f64;
|
||||||
|
fn over_pressure(&self) -> bool;
|
||||||
|
fn no_pressure(&self) -> bool;
|
||||||
fn add_available_bytes(&mut self, bytes: u64);
|
fn add_available_bytes(&mut self, bytes: u64);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,13 +215,19 @@ async fn disk_usage_eviction_task_iteration(
|
|||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
|
let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
|
||||||
.context("get filesystem-level disk usage before evictions")?;
|
.context("get filesystem-level disk usage before evictions")?;
|
||||||
|
|
||||||
|
if usage_pre.over_pressure() {
|
||||||
|
*state.status.write().unwrap() = Status::Evicting;
|
||||||
|
}
|
||||||
|
|
||||||
let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
|
let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
|
||||||
match res {
|
match res {
|
||||||
Ok(outcome) => {
|
Ok(outcome) => {
|
||||||
debug!(?outcome, "disk_usage_eviction_iteration finished");
|
debug!(?outcome, "disk_usage_eviction_iteration finished");
|
||||||
match outcome {
|
let new_status = match outcome {
|
||||||
IterationOutcome::NoPressure | IterationOutcome::Cancelled => {
|
IterationOutcome::NoPressure | IterationOutcome::Cancelled => {
|
||||||
// nothing to do, select statement below will handle things
|
// nothing to do, select statement below will handle things
|
||||||
|
Status::Idle
|
||||||
}
|
}
|
||||||
IterationOutcome::Finished(outcome) => {
|
IterationOutcome::Finished(outcome) => {
|
||||||
// Verify with statvfs whether we made any real progress
|
// Verify with statvfs whether we made any real progress
|
||||||
@@ -205,21 +237,30 @@ async fn disk_usage_eviction_task_iteration(
|
|||||||
|
|
||||||
debug!(?after, "disk usage");
|
debug!(?after, "disk usage");
|
||||||
|
|
||||||
if after.has_pressure() {
|
if after.over_pressure() {
|
||||||
// Don't bother doing an out-of-order iteration here now.
|
// Don't bother doing an out-of-order iteration here now.
|
||||||
// In practice, the task period is set to a value in the tens-of-seconds range,
|
// In practice, the task period is set to a value in the tens-of-seconds range,
|
||||||
// which will cause another iteration to happen soon enough.
|
// which will cause another iteration to happen soon enough.
|
||||||
// TODO: deltas between the three different usages would be helpful,
|
// TODO: deltas between the three different usages would be helpful,
|
||||||
// consider MiB, GiB, TiB
|
// consider MiB, GiB, TiB
|
||||||
warn!(?outcome, ?after, "disk usage still high");
|
warn!(?outcome, ?after, "disk usage still high");
|
||||||
|
Status::UnderPressure
|
||||||
} else {
|
} else {
|
||||||
info!(?outcome, ?after, "disk usage pressure relieved");
|
info!(?outcome, ?after, "disk usage pressure relieved");
|
||||||
|
Status::Idle
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
*state.status.write().unwrap() = new_status;
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("disk_usage_eviction_iteration failed: {:#}", e);
|
error!("disk_usage_eviction_iteration failed: {:#}", e);
|
||||||
|
*state.status.write().unwrap() = if usage_pre.over_pressure() {
|
||||||
|
Status::UnderPressure
|
||||||
|
} else {
|
||||||
|
Status::Idle
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,8 +326,10 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
|||||||
|
|
||||||
debug!(?usage_pre, "disk usage");
|
debug!(?usage_pre, "disk usage");
|
||||||
|
|
||||||
if !usage_pre.has_pressure() {
|
if !usage_pre.over_pressure() {
|
||||||
return Ok(IterationOutcome::NoPressure);
|
return Ok(IterationOutcome::NoPressure);
|
||||||
|
} else {
|
||||||
|
*state.status.write().unwrap() = Status::Evicting;
|
||||||
}
|
}
|
||||||
|
|
||||||
warn!(
|
warn!(
|
||||||
@@ -334,7 +377,7 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
|
|||||||
let mut warned = None;
|
let mut warned = None;
|
||||||
let mut usage_planned = usage_pre;
|
let mut usage_planned = usage_pre;
|
||||||
for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
|
for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
|
||||||
if !usage_planned.has_pressure() {
|
if usage_planned.no_pressure() {
|
||||||
debug!(
|
debug!(
|
||||||
no_candidates_evicted = i,
|
no_candidates_evicted = i,
|
||||||
"took enough candidates for pressure to be relieved"
|
"took enough candidates for pressure to be relieved"
|
||||||
@@ -644,22 +687,57 @@ mod filesystem_level_usage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl super::Usage for Usage<'_> {
|
impl super::Usage for Usage<'_> {
|
||||||
fn has_pressure(&self) -> bool {
|
/// Does the pressure exceed 1.0, i.e. has the disk usage exceeded upper bounds?
|
||||||
let usage_pct =
|
///
|
||||||
(100.0 * (1.0 - ((self.avail_bytes as f64) / (self.total_bytes as f64)))) as u64;
|
/// This is the condition for starting eviction.
|
||||||
|
fn over_pressure(&self) -> bool {
|
||||||
|
self.pressure() >= 1.0
|
||||||
|
}
|
||||||
|
|
||||||
let pressures = [
|
/// Is the pressure <0, ie.. has disk usage gone below the target bound?
|
||||||
(
|
///
|
||||||
"min_avail_bytes",
|
/// This is the condition for dropping out of eviction.
|
||||||
self.avail_bytes < self.config.min_avail_bytes,
|
fn no_pressure(&self) -> bool {
|
||||||
),
|
self.pressure() <= 0.0
|
||||||
(
|
}
|
||||||
"max_usage_pct",
|
|
||||||
usage_pct >= self.config.max_usage_pct.get() as u64,
|
|
||||||
),
|
|
||||||
];
|
|
||||||
|
|
||||||
pressures.into_iter().any(|(_, has_pressure)| has_pressure)
|
fn pressure(&self) -> f64 {
|
||||||
|
let max_usage = std::cmp::min(
|
||||||
|
self.total_bytes - self.config.min_avail_bytes,
|
||||||
|
(self.total_bytes as f64 * (self.config.max_usage_pct.get() as f64 / 100.0)) as u64,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut target_usage = max_usage;
|
||||||
|
if let Some(target_avail_bytes) = self.config.target_avail_bytes {
|
||||||
|
target_usage = std::cmp::min(target_usage, self.total_bytes - target_avail_bytes);
|
||||||
|
}
|
||||||
|
if let Some(target_usage_pct) = self.config.target_usage_pct {
|
||||||
|
target_usage = std::cmp::min(
|
||||||
|
target_usage,
|
||||||
|
(self.total_bytes as f64 * (target_usage_pct.get() as f64 / 100.0)) as u64,
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
let usage = self.total_bytes - self.avail_bytes;
|
||||||
|
eprintln!(
|
||||||
|
"pressure: {} {}, current {}",
|
||||||
|
target_usage, max_usage, usage
|
||||||
|
);
|
||||||
|
if target_usage == max_usage {
|
||||||
|
// We are configured with a zero sized range: treat anything at+beyond limit as pressure 1.0, else 0.0
|
||||||
|
if usage >= max_usage {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
} else if usage <= target_usage {
|
||||||
|
// No pressure.
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
// We are above target: pressure is the ratio of how much we exceed target to the size of the gap
|
||||||
|
let range_size = (max_usage - target_usage) as f64;
|
||||||
|
(usage - target_usage) as f64 / range_size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_available_bytes(&mut self, bytes: u64) {
|
fn add_available_bytes(&mut self, bytes: u64) {
|
||||||
@@ -713,6 +791,8 @@ mod filesystem_level_usage {
|
|||||||
config: &DiskUsageEvictionTaskConfig {
|
config: &DiskUsageEvictionTaskConfig {
|
||||||
max_usage_pct: Percent::new(85).unwrap(),
|
max_usage_pct: Percent::new(85).unwrap(),
|
||||||
min_avail_bytes: 0,
|
min_avail_bytes: 0,
|
||||||
|
target_avail_bytes: None,
|
||||||
|
target_usage_pct: None,
|
||||||
period: Duration::MAX,
|
period: Duration::MAX,
|
||||||
#[cfg(feature = "testing")]
|
#[cfg(feature = "testing")]
|
||||||
mock_statvfs: None,
|
mock_statvfs: None,
|
||||||
@@ -721,24 +801,24 @@ mod filesystem_level_usage {
|
|||||||
avail_bytes: 0,
|
avail_bytes: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(usage.has_pressure(), "expected pressure at 100%");
|
assert!(usage.over_pressure(), "expected pressure at 100%");
|
||||||
|
|
||||||
usage.add_available_bytes(14_000);
|
usage.add_available_bytes(14_000);
|
||||||
assert!(usage.has_pressure(), "expected pressure at 86%");
|
assert!(usage.over_pressure(), "expected pressure at 86%");
|
||||||
|
|
||||||
usage.add_available_bytes(999);
|
usage.add_available_bytes(999);
|
||||||
assert!(usage.has_pressure(), "expected pressure at 85.001%");
|
assert!(usage.over_pressure(), "expected pressure at 85.001%");
|
||||||
|
|
||||||
usage.add_available_bytes(1);
|
usage.add_available_bytes(1);
|
||||||
assert!(usage.has_pressure(), "expected pressure at precisely 85%");
|
assert!(usage.over_pressure(), "expected pressure at precisely 85%");
|
||||||
|
|
||||||
usage.add_available_bytes(1);
|
usage.add_available_bytes(1);
|
||||||
assert!(!usage.has_pressure(), "no pressure at 84.999%");
|
assert!(!usage.over_pressure(), "no pressure at 84.999%");
|
||||||
|
|
||||||
usage.add_available_bytes(999);
|
usage.add_available_bytes(999);
|
||||||
assert!(!usage.has_pressure(), "no pressure at 84%");
|
assert!(!usage.over_pressure(), "no pressure at 84%");
|
||||||
|
|
||||||
usage.add_available_bytes(16_000);
|
usage.add_available_bytes(16_000);
|
||||||
assert!(!usage.has_pressure());
|
assert!(!usage.over_pressure());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1452,10 +1452,22 @@ async fn disk_usage_eviction_run(
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl crate::disk_usage_eviction_task::Usage for Usage {
|
impl crate::disk_usage_eviction_task::Usage for Usage {
|
||||||
fn has_pressure(&self) -> bool {
|
fn over_pressure(&self) -> bool {
|
||||||
self.config.evict_bytes > self.freed_bytes
|
self.config.evict_bytes > self.freed_bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn no_pressure(&self) -> bool {
|
||||||
|
!self.over_pressure()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pressure(&self) -> f64 {
|
||||||
|
if self.over_pressure() {
|
||||||
|
1.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn add_available_bytes(&mut self, bytes: u64) {
|
fn add_available_bytes(&mut self, bytes: u64) {
|
||||||
self.freed_bytes += bytes;
|
self.freed_bytes += bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user