mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-19 06:00:38 +00:00
## Problem We lack a rust bench for the inmemory layer and delta layer write paths: it is useful to benchmark these components independent of postgres & WAL decoding. Related: https://github.com/neondatabase/neon/issues/8452 ## Summary of changes - Refactor DeltaLayerWriter to avoid carrying a Timeline, so that it can be cleanly tested + benched without a Tenant/Timeline test harness. It only needed the Timeline for building `Layer`, so this can be done in a separate step. - Add `bench_ingest`, which exercises a variety of workload "shapes" (big values, small values, sequential keys, random keys) - Include a small uncontroversial optimization: in `freeze`, only exhaustively walk values to assert ordering relative to end_lsn in debug mode. These benches are limited by drive performance on a lot of machines, but still useful as a local tool for iterating on CPU/memory improvements around this code path. Anecdotal measurements on Hetzner AX102 (Ryzen 7950xd): ``` ingest-small-values/ingest 128MB/100b seq time: [1.1160 s 1.1230 s 1.1289 s] thrpt: [113.38 MiB/s 113.98 MiB/s 114.70 MiB/s] Found 1 outliers among 10 measurements (10.00%) 1 (10.00%) low mild Benchmarking ingest-small-values/ingest 128MB/100b rand: Warming up for 3.0000 s Warning: Unable to complete 10 samples in 10.0s. You may wish to increase target time to 18.9s. ingest-small-values/ingest 128MB/100b rand time: [1.9001 s 1.9056 s 1.9110 s] thrpt: [66.982 MiB/s 67.171 MiB/s 67.365 MiB/s] Benchmarking ingest-small-values/ingest 128MB/100b rand-1024keys: Warming up for 3.0000 s Warning: Unable to complete 10 samples in 10.0s. You may wish to increase target time to 11.0s. ingest-small-values/ingest 128MB/100b rand-1024keys time: [1.0715 s 1.0828 s 1.0937 s] thrpt: [117.04 MiB/s 118.21 MiB/s 119.46 MiB/s] ingest-small-values/ingest 128MB/100b seq, no delta time: [425.49 ms 429.07 ms 432.04 ms] thrpt: [296.27 MiB/s 298.32 MiB/s 300.83 MiB/s] Found 1 outliers among 10 measurements (10.00%) 1 (10.00%) low mild ingest-big-values/ingest 128MB/8k seq time: [373.03 ms 375.84 ms 379.17 ms] thrpt: [337.58 MiB/s 340.57 MiB/s 343.13 MiB/s] Found 1 outliers among 10 measurements (10.00%) 1 (10.00%) high mild ingest-big-values/ingest 128MB/8k seq, no delta time: [81.534 ms 82.811 ms 83.364 ms] thrpt: [1.4994 GiB/s 1.5095 GiB/s 1.5331 GiB/s] Found 1 outliers among 10 measurements (10.00%) ```
57 lines
1.6 KiB
Rust
57 lines
1.6 KiB
Rust
use std::{num::NonZeroUsize, sync::Arc};
|
|
|
|
use crate::tenant::ephemeral_file;
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize)]
|
|
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
|
|
pub enum L0FlushConfig {
|
|
PageCached,
|
|
#[serde(rename_all = "snake_case")]
|
|
Direct {
|
|
max_concurrency: NonZeroUsize,
|
|
},
|
|
}
|
|
|
|
impl Default for L0FlushConfig {
|
|
fn default() -> Self {
|
|
Self::Direct {
|
|
// TODO: using num_cpus results in different peak memory usage on different instance types.
|
|
max_concurrency: NonZeroUsize::new(usize::max(1, num_cpus::get())).unwrap(),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct L0FlushGlobalState(Arc<Inner>);
|
|
|
|
pub enum Inner {
|
|
PageCached,
|
|
Direct { semaphore: tokio::sync::Semaphore },
|
|
}
|
|
|
|
impl L0FlushGlobalState {
|
|
pub fn new(config: L0FlushConfig) -> Self {
|
|
match config {
|
|
L0FlushConfig::PageCached => Self(Arc::new(Inner::PageCached)),
|
|
L0FlushConfig::Direct { max_concurrency } => {
|
|
let semaphore = tokio::sync::Semaphore::new(max_concurrency.get());
|
|
Self(Arc::new(Inner::Direct { semaphore }))
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn inner(&self) -> &Arc<Inner> {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl L0FlushConfig {
|
|
pub(crate) fn prewarm_on_write(&self) -> ephemeral_file::PrewarmPageCacheOnWrite {
|
|
use L0FlushConfig::*;
|
|
match self {
|
|
PageCached => ephemeral_file::PrewarmPageCacheOnWrite::Yes,
|
|
Direct { .. } => ephemeral_file::PrewarmPageCacheOnWrite::No,
|
|
}
|
|
}
|
|
}
|