use smallvec & pooling to avoid allocations on reconstruction path

This commit is contained in:
Christian Schwarz
2024-01-30 09:37:53 +00:00
parent a28cdf1c28
commit de8076d97d
7 changed files with 107 additions and 16 deletions

4
Cargo.lock generated
View File

@@ -5129,9 +5129,9 @@ dependencies = [
[[package]]
name = "smallvec"
version = "1.11.0"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "smol_str"

View File

@@ -138,7 +138,7 @@ serde_with = "2.0"
serde_assert = "0.5.0"
sha2 = "0.10.2"
signal-hook = "0.3"
smallvec = "1.11"
smallvec = { version = "1.13.1", features = [ "const_generics" ] }
smol_str = { version = "0.2.0", features = ["serde"] }
socket2 = "0.5"
strum = "0.24"

View File

@@ -18,6 +18,40 @@ use crate::virtual_file::VirtualFile;
use std::cmp::min;
use std::io::{Error, ErrorKind};
pub trait VecIsh {
fn clear(&mut self);
fn reserve(&mut self, len: usize);
fn extend_from_slice(&mut self, o: &[u8]);
}
impl VecIsh for Vec<u8> {
fn clear(&mut self) {
Vec::clear(self)
}
fn reserve(&mut self, len: usize) {
Vec::reserve(self, len)
}
fn extend_from_slice(&mut self, o: &[u8]) {
Vec::extend_from_slice(self, o)
}
}
impl<const N: usize> VecIsh for smallvec::SmallVec<[u8; N]> {
fn clear(&mut self) {
smallvec::SmallVec::clear(self)
}
fn reserve(&mut self, len: usize) {
smallvec::SmallVec::reserve(self, len)
}
fn extend_from_slice(&mut self, o: &[u8]) {
smallvec::SmallVec::extend_from_slice(self, o)
}
}
impl<'a> BlockCursor<'a> {
/// Read a blob into a new buffer.
pub async fn read_blob(
@@ -32,10 +66,10 @@ impl<'a> BlockCursor<'a> {
}
/// Read blob into the given buffer. Any previous contents in the buffer
/// are overwritten.
pub async fn read_blob_into_buf(
pub async fn read_blob_into_buf<B: VecIsh>(
&self,
offset: u64,
dstbuf: &mut Vec<u8>,
dstbuf: &mut B,
ctx: &RequestContext,
) -> Result<(), std::io::Error> {
let mut blknum = (offset / PAGE_SZ as u64) as u32;

View File

@@ -34,6 +34,8 @@ pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
use super::disk_btree::PAGE_SZ;
pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
where
T: PartialOrd<T>,
@@ -63,9 +65,9 @@ where
///
#[derive(Debug)]
pub struct ValueReconstructState {
pub records: Vec<(Lsn, NeonWalRecord)>,
pub records: smallvec::SmallVec<[(Lsn, NeonWalRecord); 300]>,
pub img: Option<(Lsn, Bytes)>,
pub(crate) scratch: Vec<u8>,
pub(crate) scratch: smallvec::SmallVec<[u8; 2 * PAGE_SZ]>,
}
impl ValueReconstructState {

View File

@@ -747,7 +747,7 @@ impl DeltaLayerInner {
);
let search_key = DeltaKey::from_key_lsn(&key, Lsn(lsn_range.end.0 - 1));
let mut offsets: Vec<(Lsn, u64)> = Vec::new();
let mut offsets = smallvec::SmallVec::<[(Lsn, u64); 4]>::new();
tree_reader
.visit(
@@ -805,7 +805,7 @@ impl DeltaLayerInner {
reconstruct_state.records.push((entry_lsn, rec));
if will_init {
// This WAL record initializes the page, so no need to go further back
need_image = false;
need_image = false; // we bail out of this function anyway, might as well move ownership of the img
break;
}
}

View File

@@ -3,6 +3,7 @@ mod eviction_task;
mod init;
pub mod layer_manager;
pub(crate) mod logical_size;
mod reconstruct_state_pool;
pub mod span;
pub mod uninit;
mod walreceiver;
@@ -598,11 +599,7 @@ impl Timeline {
ctx.task_kind()
);
let mut reconstruct_state = ValueReconstructState {
records: Vec::new(),
img: None,
scratch: Vec::with_capacity(2 * 8192), // for good measure
};
let mut reconstruct_state = reconstruct_state_pool::get();
let timer = crate::metrics::GET_RECONSTRUCT_DATA_TIME.start_timer();
let path = self
@@ -4329,10 +4326,10 @@ impl Timeline {
&self,
key: Key,
request_lsn: Lsn,
mut data: ValueReconstructState,
mut data: reconstruct_state_pool::Pooled,
) -> Result<Bytes, PageReconstructError> {
// Perform WAL redo if needed
data.records.reverse();
data.records.reverse(); // TODO: avoid this, walredo code should simply walk backwards
// If we have a page image, and no WAL, we're all set
if data.records.is_empty() {

View File

@@ -0,0 +1,58 @@
use std::cell::RefCell;
use crate::tenant::storage_layer::ValueReconstructState;
struct Content(ValueReconstructState);
impl Content {
fn empty() -> Self {
Content(ValueReconstructState {
records: smallvec::SmallVec::new(),
img: None,
scratch: smallvec::SmallVec::new(),
})
}
fn reset(&mut self) {
let inner = &mut self.0;
inner.records.clear();
inner.img = None;
inner.scratch.clear();
}
}
pub struct Pooled(Option<Box<Content>>);
// Thread-local list of re-usable buffers.
thread_local! {
static POOL: RefCell<Vec<Box<Content>>> = RefCell::new(Vec::new());
}
pub(crate) fn get() -> Pooled {
let maybe = POOL.with(|rc| rc.borrow_mut().pop());
match maybe {
Some(buf) => Pooled(Some(buf)),
None => Pooled(Some(Box::new(Content::empty()))),
}
}
impl Drop for Pooled {
fn drop(&mut self) {
let mut content = self.0.take().unwrap();
content.reset();
POOL.with(|rc| rc.borrow_mut().push(content))
}
}
impl std::ops::Deref for Pooled {
type Target = super::ValueReconstructState;
fn deref(&self) -> &Self::Target {
&self.0.as_ref().unwrap().as_ref().0
}
}
impl std::ops::DerefMut for Pooled {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0.as_mut().unwrap().as_mut().0
}
}