From b3541d10e1ab86b27ca7bcd8cffce776fa48b20a Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Thu, 23 Oct 2025 12:57:52 -0700 Subject: [PATCH] chore: Use smaller merge buffers. (#74) ## What Reduce the per-segment buffer sizes from 4MB to 512KB. ## Why #71 moved from buffers which covered the entire file to maximum 4MB buffers. But for merges with very large segment counts, we need to be using more conservative buffer sizes. 512KB will still eliminate most posting list reads: posting lists larger than 512KB will skip the buffer. --- common/src/buffered_file_slice.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/src/buffered_file_slice.rs b/common/src/buffered_file_slice.rs index fe96fe22d..ce863a26c 100644 --- a/common/src/buffered_file_slice.rs +++ b/common/src/buffered_file_slice.rs @@ -6,7 +6,7 @@ use std::ops::Range; use super::file_slice::FileSlice; use super::{HasLen, OwnedBytes}; -const DEFAULT_BUFFER_MAX_SIZE: usize = 4 * 1024 * 1024; // 4 MB +const DEFAULT_BUFFER_MAX_SIZE: usize = 512 * 1024; // 512K /// A buffered reader for a FileSlice. /// @@ -14,8 +14,8 @@ const DEFAULT_BUFFER_MAX_SIZE: usize = 4 * 1024 * 1024; // 4 MB /// the cost of `read_bytes` calls, while keeping peak memory usage under control. /// /// TODO: Rather than wrapping a `FileSlice` in buffering, it will usually be better to adjust a -/// `FileHandle` to directly handle buffering itself (as that allows separate `FileSlice`s read -/// from the same `FileHandle` to share buffers.) +/// `FileHandle` to directly handle buffering itself. +/// TODO: See: https://github.com/paradedb/paradedb/issues/3374 pub struct BufferedFileSlice { file_slice: FileSlice, buffer: RefCell,