Avoid some vector-growing overhead.

I saw this in 'perf' profile of a sequential scan:

> -   31.93%     0.21%  compute request  pageserver         [.] <pageserver::walredo::PostgresRedoManager as pageserver::walredo::WalRedoManager>::request_redo
>    - 31.72% <pageserver::walredo::PostgresRedoManager as pageserver::walredo::WalRedoManager>::request_redo
>       - 31.26% pageserver::walredo::PostgresRedoManager::apply_batch_postgres
>          + 7.64% <std::process::ChildStdin as std::io::Write>::write
>          + 6.17% nix::poll::poll
>          + 3.58% <std::process::ChildStderr as std::io::Read>::read
>          + 2.96% std::sync::condvar::Condvar::notify_one
>          + 2.48% std::sys::unix::locks::futex::Condvar::wait
>          + 2.19% alloc::raw_vec::RawVec<T,A>::reserve::do_reserve_and_handle
>          + 1.14% std::sys::unix::locks::futex::Mutex::lock_contended
>            0.67% __rust_alloc_zeroed
>            0.62% __stpcpy_ssse3
>            0.56% std::sys::unix::locks::futex::Mutex::wake

Note the 'do_reserve_handle' overhead. That's caused by having to grow
the buffer used to construct the WAL redo request. This commit
eliminates that overhead. It's only about 2% of the overall CPU usage,
but every little helps.

Also reuse the temp buffer when reading records from a DeltaLayer, and
call Vec::reserve to avoid growing a buffer when reading a blob across
pages. I saw a reduction from 2% to 1% of CPU spent in
do_reserve_and_handle in that codepath, but that's such a small change
that it could be just noise. Seems like it shouldn't hurt though.
This commit is contained in:
Heikki Linnakangas
2022-11-10 14:05:13 +02:00
committed by Heikki Linnakangas
parent 4131a6efae
commit dbe5b52494
3 changed files with 8 additions and 2 deletions

View File

@@ -74,6 +74,7 @@ where
};
dstbuf.clear();
dstbuf.reserve(len);
// Read the payload
let mut remain = len;

View File

@@ -260,8 +260,9 @@ impl Layer for DeltaLayer {
// Ok, 'offsets' now contains the offsets of all the entries we need to read
let mut cursor = file.block_cursor();
let mut buf = Vec::new();
for (entry_lsn, pos) in offsets {
let buf = cursor.read_blob(pos).with_context(|| {
cursor.read_blob_into_buf(pos, &mut buf).with_context(|| {
format!(
"Failed to read blob from virtual file {}",
file.file.path.display()

View File

@@ -740,7 +740,11 @@ impl PostgresRedoProcess {
// This could be problematic if there are millions of records to replay,
// but in practice the number of records is usually so small that it doesn't
// matter, and it's better to keep this code simple.
let mut writebuf: Vec<u8> = Vec::new();
//
// Most requests start with a before-image with BLCKSZ bytes, followed by
// by some other WAL records. Start with a buffer that can hold that
// comfortably.
let mut writebuf: Vec<u8> = Vec::with_capacity((BLCKSZ as usize) * 3);
build_begin_redo_for_block_msg(tag, &mut writebuf);
if let Some(img) = base_img {
build_push_page_msg(tag, &img, &mut writebuf);