pageserver: enable virtual_file_io_mode=direct in CI by default

Signed-off-by: Yuchen Liang <yuchen@neon.tech>
This commit is contained in:
Yuchen Liang
2024-11-11 16:24:14 +00:00
parent 54a1676680
commit 17bf707bdf
14 changed files with 103 additions and 46 deletions

View File

@@ -222,7 +222,7 @@ jobs:
# run pageserver tests with different settings
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE=direct ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
# Run separate tests for real S3
@@ -300,6 +300,7 @@ jobs:
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
# Temporary disable this step until we figure out why it's so flaky
# Ref https://github.com/neondatabase/neon/issues/4540

View File

@@ -334,6 +334,7 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct
SYNC_BETWEEN_TESTS: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones

View File

@@ -106,7 +106,7 @@ pub struct ConfigToml {
pub timeline_offloading: bool,
pub ephemeral_bytes_per_memory_kb: usize,
pub l0_flush: Option<crate::models::L0FlushConfig>,
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoModeKind>,
#[serde(skip_serializing_if = "Option::is_none")]
pub no_sync: Option<bool>,
}

View File

@@ -974,35 +974,16 @@ pub mod virtual_file {
serde_with::DeserializeFromStr,
serde_with::SerializeDisplay,
Debug,
Default,
)]
#[strum(serialize_all = "kebab-case")]
#[repr(u8)]
pub enum IoMode {
pub enum IoModeKind {
/// Uses buffered IO.
#[default]
Buffered,
/// Uses direct IO, error out if the operation fails.
#[cfg(target_os = "linux")]
Direct,
}
impl IoMode {
pub const fn preferred() -> Self {
Self::Buffered
}
}
impl TryFrom<u8> for IoMode {
type Error = u8;
fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
#[cfg(target_os = "linux")]
v if v == (IoMode::Direct as u8) => IoMode::Direct,
x => return Err(x),
})
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -543,7 +543,7 @@ impl Client {
/// Configs io mode at runtime.
pub async fn put_io_mode(
&self,
mode: &pageserver_api::models::virtual_file::IoMode,
mode: &pageserver_api::models::virtual_file::IoModeKind,
) -> Result<()> {
let uri = format!("{}/v1/io_mode", self.mgmt_api_endpoint);
self.request(Method::PUT, uri, mode)

View File

@@ -7,7 +7,7 @@ use camino::{Utf8Path, Utf8PathBuf};
use pageserver::context::{DownloadBehavior, RequestContext};
use pageserver::task_mgr::TaskKind;
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::virtual_file::api::IoMode;
use pageserver::virtual_file::api::IoModeKind;
use std::cmp::Ordering;
use std::collections::BinaryHeap;
use std::ops::Range;
@@ -137,7 +137,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
pageserver::virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::default(),
);
pageserver::page_cache::init(100);

View File

@@ -8,7 +8,7 @@ use pageserver::task_mgr::TaskKind;
use pageserver::tenant::storage_layer::{delta_layer, image_layer};
use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use pageserver::virtual_file::api::IoMode;
use pageserver::virtual_file::api::IoModeKind;
use pageserver::{page_cache, virtual_file};
use std::fs::{self, File};
use utils::id::{TenantId, TimelineId};
@@ -50,7 +50,7 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::default(),
);
page_cache::init(100);
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
@@ -64,7 +64,7 @@ async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::default(),
);
page_cache::init(100);
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
@@ -170,7 +170,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
pageserver::virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::default(),
);
pageserver::page_cache::init(100);

View File

@@ -24,7 +24,7 @@ use pageserver::{
page_cache,
task_mgr::TaskKind,
tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
virtual_file::{self, api::IoMode},
virtual_file::{self, api::IoModeKind},
};
use pageserver_api::shard::TenantShardId;
use postgres_ffi::ControlFileData;
@@ -208,7 +208,7 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
virtual_file::init(
10,
virtual_file::api::IoEngineKind::StdFs,
IoMode::preferred(),
IoModeKind::default(),
);
page_cache::init(100);
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

View File

@@ -61,7 +61,7 @@ pub(crate) struct Args {
/// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).
#[clap(long)]
set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,
set_io_mode: Option<pageserver_api::models::virtual_file::IoModeKind>,
targets: Option<Vec<TenantTimelineId>>,
}

View File

@@ -178,7 +178,7 @@ pub struct PageServerConf {
pub l0_flush: crate::l0_flush::L0FlushConfig,
/// Direct IO settings
pub virtual_file_io_mode: virtual_file::IoMode,
pub virtual_file_io_mode: virtual_file::IoModeKind,
/// Optionally disable disk syncs (unsafe!)
pub no_sync: bool,
@@ -415,7 +415,8 @@ impl PageServerConf {
l0_flush: l0_flush
.map(crate::l0_flush::L0FlushConfig::from)
.unwrap_or_default(),
virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
virtual_file_io_mode: virtual_file_io_mode
.unwrap_or(virtual_file::IoModeKind::default()),
no_sync: no_sync.unwrap_or(false),
};

View File

@@ -17,7 +17,7 @@ use hyper::header;
use hyper::StatusCode;
use hyper::{Body, Request, Response, Uri};
use metrics::launch_timestamp::LaunchTimestamp;
use pageserver_api::models::virtual_file::IoMode;
use pageserver_api::models::virtual_file::IoModeKind;
use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
use pageserver_api::models::IngestAuxFilesRequest;
use pageserver_api::models::ListAuxFilesRequest;
@@ -2617,7 +2617,7 @@ async fn put_io_mode_handler(
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
check_permission(&r, None)?;
let mode: IoMode = json_request(&mut r).await?;
let mode: IoModeKind = json_request(&mut r).await?;
crate::virtual_file::set_io_mode(mode);
json_response(StatusCode::OK, ())
}

View File

@@ -43,7 +43,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
mod metadata;
mod open_options;
use self::owned_buffers_io::write::OwnedAsyncWriter;
pub(crate) use api::IoMode;
pub(crate) use api::IoModeKind;
pub(crate) use io_engine::IoEngineKind;
pub(crate) use metadata::Metadata;
pub(crate) use open_options::*;
@@ -138,6 +138,7 @@ impl VirtualFile {
ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
) -> Result<Self, std::io::Error> {
let file = match get_io_mode() {
IoMode::NotSet => panic!("not initialized"),
IoMode::Buffered => {
let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
VirtualFile {
@@ -1332,7 +1333,7 @@ impl OpenFiles {
/// server startup.
///
#[cfg(not(test))]
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) {
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoModeKind) {
if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
panic!("virtual_file::init called twice");
}
@@ -1370,14 +1371,69 @@ pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment()
pub(crate) type IoPageSlice<'a> =
AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
#[derive(Clone, Copy, Debug)]
#[repr(u8)]
pub(crate) enum IoMode {
NotSet,
Buffered,
Direct,
}
pub(crate) fn set_io_mode(mode: IoMode) {
impl TryFrom<u8> for IoMode {
type Error = u8;
fn try_from(value: u8) -> Result<Self, Self::Error> {
Ok(match value {
v if v == (IoMode::NotSet as u8) => IoMode::NotSet,
v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
v if v == (IoMode::Direct as u8) => IoMode::Direct,
x => return Err(x),
})
}
}
impl From<IoModeKind> for IoMode {
fn from(value: IoModeKind) -> Self {
match value {
IoModeKind::Buffered => IoMode::Buffered,
IoModeKind::Direct => IoMode::Direct,
}
}
}
static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::NotSet as u8);
pub(crate) fn set_io_mode(mode_kind: IoModeKind) {
let mode: IoMode = mode_kind.into();
IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
}
pub(crate) fn get_io_mode() -> IoMode {
IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
fn get_io_mode() -> IoMode {
let mode = IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap();
if cfg!(test) {
match mode {
IoMode::NotSet => {
let env_var_name = "NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOMODE";
let kind = match std::env::var(env_var_name) {
Ok(v) => match v.parse::<IoModeKind>() {
Ok(mode_kind) => mode_kind,
Err(e) => {
panic!("invalid VirtualFile io mode for env var {env_var_name}: {e:#}: {v:?}")
}
},
Err(std::env::VarError::NotPresent) => IoModeKind::Direct,
Err(std::env::VarError::NotUnicode(_)) => {
panic!("env var {env_var_name} is not unicode");
}
};
set_io_mode(kind);
get_io_mode()
}
x => x,
}
} else {
mode
}
}
#[cfg(test)]
mod tests {

View File

@@ -64,6 +64,18 @@ def main(args: argparse.Namespace):
else:
pageserver_virtual_file_io_engine_parameter = ""
# If a test run has non-default PAGESERVER_VIRTUAL_FILE_IO_MODE (i.e. not empty, not direct),
# use it to parametrize test name along with build_type and pg_version
#
# See test_runner/fixtures/parametrize.py for details
if (io_mode := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE", "")) not in (
"",
"direct",
):
pageserver_virtual_file_io_mode_parameter = f"-{io_mode}"
else:
pageserver_virtual_file_io_mode_parameter = ""
# re-use existing records of flaky tests from before parametrization by compaction_algorithm
def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
"""Duplicated from parametrize.py"""
@@ -90,10 +102,10 @@ def main(args: argparse.Namespace):
if row["name"].endswith("]"):
parametrized_test = row["name"].replace(
"[",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
f"[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_virtual_file_io_mode_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}-",
)
else:
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
parametrized_test = f"{row['name']}[{build_type}-pg{pg_version}{pageserver_virtual_file_io_engine_parameter}{pageserver_virtual_file_io_mode_parameter}{pageserver_default_tenant_config_compaction_algorithm_parameter}]"
res[row["parent_suite"]][row["suite"]][parametrized_test] = True

View File

@@ -81,6 +81,11 @@ def pytest_generate_tests(metafunc: Metafunc):
):
metafunc.parametrize("pageserver_virtual_file_io_engine", [io_engine])
# A hacky way to parametrize tests only for `pageserver_virtual_file_io_mode=buffered`
# And do not change test name for default `pageserver_virtual_file_io_mode=direct` to keep tests statistics
if (io_mode := os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE", "")) not in ("", "direct"):
metafunc.parametrize("pageserver_virtual_file_io_mode", [io_mode])
# Same hack for pageserver_default_tenant_config_compaction_algorithm
if (
explicit_default := get_pageserver_default_tenant_config_compaction_algorithm()