From dbe5b52494279e2f5e4885bef27474dbef884d44 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 10 Nov 2022 14:05:13 +0200 Subject: [PATCH] Avoid some vector-growing overhead. I saw this in 'perf' profile of a sequential scan: > - 31.93% 0.21% compute request pageserver [.] ::request_redo > - 31.72% ::request_redo > - 31.26% pageserver::walredo::PostgresRedoManager::apply_batch_postgres > + 7.64% ::write > + 6.17% nix::poll::poll > + 3.58% ::read > + 2.96% std::sync::condvar::Condvar::notify_one > + 2.48% std::sys::unix::locks::futex::Condvar::wait > + 2.19% alloc::raw_vec::RawVec::reserve::do_reserve_and_handle > + 1.14% std::sys::unix::locks::futex::Mutex::lock_contended > 0.67% __rust_alloc_zeroed > 0.62% __stpcpy_ssse3 > 0.56% std::sys::unix::locks::futex::Mutex::wake Note the 'do_reserve_handle' overhead. That's caused by having to grow the buffer used to construct the WAL redo request. This commit eliminates that overhead. It's only about 2% of the overall CPU usage, but every little helps. Also reuse the temp buffer when reading records from a DeltaLayer, and call Vec::reserve to avoid growing a buffer when reading a blob across pages. I saw a reduction from 2% to 1% of CPU spent in do_reserve_and_handle in that codepath, but that's such a small change that it could be just noise. Seems like it shouldn't hurt though. --- pageserver/src/tenant/blob_io.rs | 1 + pageserver/src/tenant/delta_layer.rs | 3 ++- pageserver/src/walredo.rs | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index 78ecbcb9c1..52eafc72ee 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -74,6 +74,7 @@ where }; dstbuf.clear(); + dstbuf.reserve(len); // Read the payload let mut remain = len; diff --git a/pageserver/src/tenant/delta_layer.rs b/pageserver/src/tenant/delta_layer.rs index a908d66200..dcd6956640 100644 --- a/pageserver/src/tenant/delta_layer.rs +++ b/pageserver/src/tenant/delta_layer.rs @@ -260,8 +260,9 @@ impl Layer for DeltaLayer { // Ok, 'offsets' now contains the offsets of all the entries we need to read let mut cursor = file.block_cursor(); + let mut buf = Vec::new(); for (entry_lsn, pos) in offsets { - let buf = cursor.read_blob(pos).with_context(|| { + cursor.read_blob_into_buf(pos, &mut buf).with_context(|| { format!( "Failed to read blob from virtual file {}", file.file.path.display() diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 59dadbb1d3..f05bf46d96 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -740,7 +740,11 @@ impl PostgresRedoProcess { // This could be problematic if there are millions of records to replay, // but in practice the number of records is usually so small that it doesn't // matter, and it's better to keep this code simple. - let mut writebuf: Vec = Vec::new(); + // + // Most requests start with a before-image with BLCKSZ bytes, followed by + // by some other WAL records. Start with a buffer that can hold that + // comfortably. + let mut writebuf: Vec = Vec::with_capacity((BLCKSZ as usize) * 3); build_begin_redo_for_block_msg(tag, &mut writebuf); if let Some(img) = base_img { build_push_page_msg(tag, &img, &mut writebuf);