diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 994075bef6..9e4530ba3c 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -96,7 +96,7 @@ pub mod defaults { pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0; - pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 0; + pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512; /// /// Default built-in configuration file. diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 00ef5b0afd..885eb13b29 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -2283,7 +2283,7 @@ pub(crate) mod test { .await .unwrap(); let delta_layer = resident_layer.get_as_delta(&ctx).await.unwrap(); - for max_read_size in [1, 2048] { + for max_read_size in [1, 1024] { for batch_size in [1, 2, 4, 8, 3, 7, 13] { println!("running with batch_size={batch_size} max_read_size={max_read_size}"); // Test if the batch size is correctly determined @@ -2297,7 +2297,7 @@ pub(crate) mod test { // every key should be a batch b/c the value is larger than max_read_size assert_eq!(iter.key_values_batch.len(), 1); } else { - assert_eq!(iter.key_values_batch.len(), batch_size); + assert!(iter.key_values_batch.len() <= batch_size); } if num_items >= N { break; diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 38411e9d9e..4c22541e02 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -1367,7 +1367,7 @@ mod test { .await .unwrap(); let img_layer = resident_layer.get_as_image(&ctx).await.unwrap(); - for max_read_size in [1, 2048] { + for max_read_size in [1, 1024] { for batch_size in [1, 2, 4, 8, 3, 7, 13] { println!("running with batch_size={batch_size} max_read_size={max_read_size}"); // Test if the batch size is correctly determined @@ -1381,7 +1381,7 @@ mod test { // every key should be a batch b/c the value is larger than max_read_size assert_eq!(iter.key_values_batch.len(), 1); } else { - assert_eq!(iter.key_values_batch.len(), batch_size); + assert!(iter.key_values_batch.len() <= batch_size); } if num_items >= N { break; diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs index 80bc56092d..146bcf0e35 100644 --- a/pageserver/src/tenant/vectored_blob_io.rs +++ b/pageserver/src/tenant/vectored_blob_io.rs @@ -25,7 +25,6 @@ use tokio_epoll_uring::BoundedBuf; use utils::lsn::Lsn; use utils::vec_map::VecMap; -use crate::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT; use crate::context::RequestContext; use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK}; use crate::virtual_file::{self, VirtualFile}; @@ -91,7 +90,7 @@ impl VectoredReadCoalesceMode { /// whereas [`ChunkedVectoredReadBuilder`] is used for alignment requirement 1 and higher. pub(crate) fn get() -> Self { let align = virtual_file::get_io_buffer_alignment_raw(); - if align == DEFAULT_IO_BUFFER_ALIGNMENT { + if align == 0 { VectoredReadCoalesceMode::AdjacentOnly } else { VectoredReadCoalesceMode::Chunked(align) @@ -735,27 +734,32 @@ mod tests { fn planner_chunked_coalesce_all_test() { use crate::virtual_file; - const CHUNK_SIZE: u64 = 512; - virtual_file::set_io_buffer_alignment(CHUNK_SIZE as usize).unwrap(); - let max_read_size = CHUNK_SIZE as usize * 8; + let chunk_size = virtual_file::get_io_buffer_alignment() as u64; + + // The test explicitly does not check chunk size < 512 + if chunk_size < 512 { + return; + } + + let max_read_size = chunk_size as usize * 8; let key = Key::MIN; let lsn = Lsn(0); let blob_descriptions = [ - (key, lsn, CHUNK_SIZE / 8, BlobFlag::None), // Read 1 BEGIN - (key, lsn, CHUNK_SIZE / 4, BlobFlag::Ignore), // Gap - (key, lsn, CHUNK_SIZE / 2, BlobFlag::None), - (key, lsn, CHUNK_SIZE - 2, BlobFlag::Ignore), // Gap - (key, lsn, CHUNK_SIZE, BlobFlag::None), - (key, lsn, CHUNK_SIZE * 2 - 1, BlobFlag::None), - (key, lsn, CHUNK_SIZE * 2 + 1, BlobFlag::Ignore), // Gap - (key, lsn, CHUNK_SIZE * 3 + 1, BlobFlag::None), - (key, lsn, CHUNK_SIZE * 5 + 1, BlobFlag::None), - (key, lsn, CHUNK_SIZE * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce. - (key, lsn, CHUNK_SIZE * 7 + 1, BlobFlag::None), - (key, lsn, CHUNK_SIZE * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size) - (key, lsn, CHUNK_SIZE * 9, BlobFlag::Ignore), // ==== skipped a chunk - (key, lsn, CHUNK_SIZE * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce) + (key, lsn, chunk_size / 8, BlobFlag::None), // Read 1 BEGIN + (key, lsn, chunk_size / 4, BlobFlag::Ignore), // Gap + (key, lsn, chunk_size / 2, BlobFlag::None), + (key, lsn, chunk_size - 2, BlobFlag::Ignore), // Gap + (key, lsn, chunk_size, BlobFlag::None), + (key, lsn, chunk_size * 2 - 1, BlobFlag::None), + (key, lsn, chunk_size * 2 + 1, BlobFlag::Ignore), // Gap + (key, lsn, chunk_size * 3 + 1, BlobFlag::None), + (key, lsn, chunk_size * 5 + 1, BlobFlag::None), + (key, lsn, chunk_size * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce. + (key, lsn, chunk_size * 7 + 1, BlobFlag::None), + (key, lsn, chunk_size * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size) + (key, lsn, chunk_size * 9, BlobFlag::Ignore), // ==== skipped a chunk + (key, lsn, chunk_size * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce) ]; let ranges = [ @@ -834,18 +838,19 @@ mod tests { #[test] fn planner_replacement_test() { - let max_read_size = 128 * 1024; + let chunk_size = virtual_file::get_io_buffer_alignment() as u64; + let max_read_size = 128 * chunk_size as usize; let first_key = Key::MIN; let second_key = first_key.next(); let lsn = Lsn(0); let blob_descriptions = vec![ - (first_key, lsn, 0, BlobFlag::None), // First in read 1 - (first_key, lsn, 1024, BlobFlag::None), // Last in read 1 - (second_key, lsn, 2 * 1024, BlobFlag::ReplaceAll), - (second_key, lsn, 3 * 1024, BlobFlag::None), - (second_key, lsn, 4 * 1024, BlobFlag::ReplaceAll), // First in read 2 - (second_key, lsn, 5 * 1024, BlobFlag::None), // Last in read 2 + (first_key, lsn, 0, BlobFlag::None), // First in read 1 + (first_key, lsn, chunk_size, BlobFlag::None), // Last in read 1 + (second_key, lsn, 2 * chunk_size, BlobFlag::ReplaceAll), + (second_key, lsn, 3 * chunk_size, BlobFlag::None), + (second_key, lsn, 4 * chunk_size, BlobFlag::ReplaceAll), // First in read 2 + (second_key, lsn, 5 * chunk_size, BlobFlag::None), // Last in read 2 ]; let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]]; @@ -855,7 +860,7 @@ mod tests { planner.handle(key, lsn, offset, flag); } - planner.handle_range_end(6 * 1024); + planner.handle_range_end(6 * chunk_size); let reads = planner.finish(); assert_eq!(reads.len(), 2); diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 4b11dc1a94..97d966e2da 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -1196,15 +1196,11 @@ pub(crate) fn get_io_buffer_alignment_raw() -> usize { if cfg!(test) { let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT"; - if align == DEFAULT_IO_BUFFER_ALIGNMENT { - if let Some(test_align) = utils::env::var(env_var_name) { - if is_zero_or_power_of_two(test_align) { - test_align - } else { - panic!("IO buffer alignment ({test_align}) is not a power of two"); - } + if let Some(test_align) = utils::env::var(env_var_name) { + if is_zero_or_power_of_two(test_align) { + test_align } else { - crate::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT + panic!("IO buffer alignment ({test_align}) is not a power of two"); } } else { align @@ -1219,11 +1215,7 @@ pub(crate) fn get_io_buffer_alignment_raw() -> usize { /// This function should be used for getting the actual alignment value to use. pub(crate) fn get_io_buffer_alignment() -> usize { let align = get_io_buffer_alignment_raw(); - if align == DEFAULT_IO_BUFFER_ALIGNMENT { - 1 - } else { - align - } + align.max(1) } #[cfg(test)]