mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
feat: Add configurable Direct IO alignment support (#12821)
## Problem Neon's storage system currently has hard-coded 512-byte block size for Direct IO operations, which causes I/O errors on systems with disks that have 4096-byte block sizes. This results in errors like "vec read failed" and "Invalid argument (os error 22)" on certain hardware configurations. See issue #12623 for details. ## Summary of changes Make Direct IO alignment configurable at build time to support both 512-byte and 4096-byte block sizes: - Add `io-align-512` and `io-align-4k` cargo features (default: 512-byte for backward compatibility) - Make `DEFAULT_IO_BUFFER_ALIGNMENT` configurable via cargo features in `pageserver_api` - Update `DIO_CHUNK_SIZE` in vectored_dio_read to use the configured alignment value dynamically - Add `IO_ALIGNMENT` build argument to Dockerfile to allow building images with different alignment settings - Add startup logging to display the configured IO buffer alignment for operational visibility - Fix validation logic in `virtual_file.rs` to use the configured alignment instead of hard-coded 512 This change allows Neon to run on systems with different disk block sizes by building with the appropriate feature flag, addressing the compatibility issues described in the RFC on Direct IO implementation ## Performance Note Benchmarks show 512-byte alignment performs significantly better than 4k: - Write: 512-byte is 21-71% faster across percentiles (p99: 71% faster) - Read: 512-byte is slightly faster (5-21% improvement) This is why 512-byte remains the default. However, some storage systems require 4k alignment and will fail with EINVAL otherwise. This change adds build-time configuration to support both environments.
This commit is contained in:
@@ -353,6 +353,10 @@ fn start_pageserver(
|
||||
launch_ts.to_string(),
|
||||
BUILD_TAG,
|
||||
);
|
||||
info!(
|
||||
"IO buffer alignment: {} bytes",
|
||||
pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT
|
||||
);
|
||||
set_build_info_metric(GIT_VERSION, BUILD_TAG);
|
||||
set_launch_timestamp_metric(launch_ts);
|
||||
#[cfg(target_os = "linux")]
|
||||
|
||||
@@ -73,7 +73,7 @@ pub trait Buffer: std::ops::Deref<Target = [u8]> {
|
||||
}
|
||||
|
||||
/// The minimum alignment and size requirement for disk offsets and memory buffer size for direct IO.
|
||||
const DIO_CHUNK_SIZE: usize = 512;
|
||||
const DIO_CHUNK_SIZE: usize = crate::virtual_file::get_io_buffer_alignment();
|
||||
|
||||
/// If multiple chunks need to be read, merge adjacent chunk reads into batches of max size `MAX_CHUNK_BATCH_SIZE`.
|
||||
/// (The unit is the number of chunks.)
|
||||
|
||||
@@ -852,7 +852,7 @@ impl VirtualFileInner {
|
||||
// Because the alloctor might return _more_ aligned addresses than requested,
|
||||
// there is a chance that testing would not catch violations of a runtime requirement stricter than 512.
|
||||
{
|
||||
let requirement = 512;
|
||||
let requirement = get_io_buffer_alignment();
|
||||
let remainder = addr % requirement;
|
||||
assert!(
|
||||
remainder == 0,
|
||||
@@ -866,7 +866,7 @@ impl VirtualFileInner {
|
||||
// So enforce just that and not anything more restrictive.
|
||||
// Even the shallowest testing will expose more restrictive requirements if those ever arise.
|
||||
{
|
||||
let requirement = 512;
|
||||
let requirement = get_io_buffer_alignment() as u64;
|
||||
let remainder = offset % requirement;
|
||||
assert!(
|
||||
remainder == 0,
|
||||
@@ -879,7 +879,7 @@ impl VirtualFileInner {
|
||||
// The requirement in Linux 6.1 is bdev_logical_block_size().
|
||||
// On our production systems, that is 512.
|
||||
{
|
||||
let requirement = 512;
|
||||
let requirement = get_io_buffer_alignment();
|
||||
let remainder = size % requirement;
|
||||
assert!(
|
||||
remainder == 0,
|
||||
|
||||
Reference in New Issue
Block a user