pageserver: batching & concurrent IO: update binary-built-in defaults; reduce CI matrix (#11923)

Use the current production config for batching & concurrent IO.

Remove the permutation testing for unit tests from CI.
(The pageserver unit test matrix takes ~10min for debug builds).

Drive-by-fix use of `if cfg!(test)` inside crate `pageserver_api`.
It is ineffective for early-enabling new defaults for pageserver unit
tests only.
The reason is that the `test` cfg is only set for the crate under test
but not its dependencies.
So, `cargo test -p pageserver` will build `pageserver_api` with
`cfg!(test) == false`.
Resort to checking for feature flag `testing` instead, since all our
unit tests are run with `--feature testing`.

refs
- `scattered-lsn` batching has been implemented and rolled out in all
envs, cf https://github.com/neondatabase/neon/issues/10765
- preliminary for https://github.com/neondatabase/neon/pull/10466
- epic https://github.com/neondatabase/neon/issues/9377
- epic https://github.com/neondatabase/neon/issues/9378
- drive-by fix
https://neondb.slack.com/archives/C0277TKAJCA/p1746821515504219
This commit is contained in:
Christian Schwarz
2025-05-14 18:30:21 +02:00
committed by GitHub
parent 68120cfa31
commit 32a12783fd
5 changed files with 17 additions and 42 deletions

View File

@@ -279,18 +279,14 @@ jobs:
# run all non-pageserver tests
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
# run pageserver tests with different settings
for get_vectored_concurrent_io in sequential sidecar-task; do
for io_engine in std-fs tokio-epoll-uring ; do
for io_mode in buffered direct direct-rw ; do
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
done
done
# run pageserver tests
# (When developing new pageserver features gated by config fields, we commonly make the rust
# unit tests sensitive to an environment variable NEON_PAGESERVER_UNIT_TEST_FEATURENAME.
# Then run the nextest invocation below for all relevant combinations. Singling out the
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
@@ -405,8 +401,6 @@ jobs:
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
# Temporary disable this step until we figure out why it's so flaky

View File

@@ -323,8 +323,6 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
SYNC_BETWEEN_TESTS: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones

View File

@@ -639,23 +639,15 @@ impl Default for ConfigToml {
tenant_config: TenantConfigToml::default(),
no_sync: None,
wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
page_service_pipelining: if !cfg!(test) {
PageServicePipeliningConfig::Serial
} else {
// Do not turn this into the default until scattered reads have been
// validated and rolled-out fully.
PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
page_service_pipelining: PageServicePipeliningConfig::Pipelined(
PageServicePipeliningConfigPipelined {
max_batch_size: NonZeroUsize::new(32).unwrap(),
execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
batching: PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn,
})
},
get_vectored_concurrent_io: if !cfg!(test) {
GetVectoredConcurrentIo::Sequential
} else {
GetVectoredConcurrentIo::SidecarTask
},
enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
},
),
get_vectored_concurrent_io: GetVectoredConcurrentIo::SidecarTask,
enable_read_path_debugging: if cfg!(feature = "testing") {
Some(true)
} else {
None

View File

@@ -1803,7 +1803,6 @@ pub struct TopTenantShardsResponse {
}
pub mod virtual_file {
use std::sync::LazyLock;
#[derive(
Copy,
@@ -1851,15 +1850,7 @@ pub mod virtual_file {
impl IoMode {
pub fn preferred() -> Self {
// The default behavior when running Rust unit tests without any further
// flags is to use the newest behavior (DirectRw).
// The CI uses the environment variable to unit tests for all different modes.
// NB: the Python regression & perf tests have their own defaults management
// that writes pageserver.toml; they do not use this variable.
static ENV_OVERRIDE: LazyLock<Option<IoMode>> = LazyLock::new(|| {
utils::env::var_serde_json_string("NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE")
});
ENV_OVERRIDE.unwrap_or(IoMode::DirectRw)
IoMode::DirectRw
}
}

View File

@@ -127,12 +127,12 @@ macro_rules! __check_fields_present {
match check_fields_present0($extractors) {
Ok(FoundEverything) => Ok(()),
Ok(Unconfigured) if cfg!(test) => {
Ok(Unconfigured) if cfg!(feature = "testing") => {
// allow unconfigured in tests
Ok(())
},
Ok(Unconfigured) => {
panic!("utils::tracing_span_assert: outside of #[cfg(test)] expected tracing to be configured with tracing_error::ErrorLayer")
panic!(r#"utils::tracing_span_assert: outside of #[cfg(feature = "testing")] expected tracing to be configured with tracing_error::ErrorLayer"#)
},
Err(missing) => Err(missing)
}