Make EphemeralFile::write_blob function async (#5056)

## Problem

The `EphemeralFile::write_blob` function accesses the page cache
internally. We want to require `async` for these accesses in #5023.

## Summary of changes

This removes the implementaiton of the `BlobWriter` trait for
`EphemeralFile` and turns the `write_blob` function into an inherent
function. We can then make it async as well as the `push_bytes`
function. We move the `SER_BUFFER` thread-local into the
`InMemoryLayerInner` so that the same buffer can be accessed by
different threads as the async is (potentially) moved between threads.

Part of #4743, preparation for #5023.
This commit is contained in:
Arpad Müller
2023-08-24 19:18:30 +02:00
committed by GitHub
parent e8f9aaf78c
commit 227c87e333
6 changed files with 34 additions and 41 deletions

6
Cargo.lock generated
View File

@@ -2596,6 +2596,7 @@ dependencies = [
"serde_json",
"serde_with",
"signal-hook",
"smallvec",
"storage_broker",
"strum",
"strum_macros",
@@ -3972,9 +3973,9 @@ dependencies = [
[[package]]
name = "smallvec"
version = "1.10.0"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"
[[package]]
name = "socket2"
@@ -5373,6 +5374,7 @@ dependencies = [
"scopeguard",
"serde",
"serde_json",
"smallvec",
"socket2 0.4.9",
"syn 1.0.109",
"syn 2.0.16",

View File

@@ -111,6 +111,7 @@ serde_json = "1"
serde_with = "2.0"
sha2 = "0.10.2"
signal-hook = "0.3"
smallvec = "1.11"
socket2 = "0.5"
strum = "0.24"
strum_macros = "0.24"

View File

@@ -51,6 +51,7 @@ serde.workspace = true
serde_json = { workspace = true, features = ["raw_value"] }
serde_with.workspace = true
signal-hook.workspace = true
smallvec = { workspace = true, features = ["write"] }
svg_fmt.workspace = true
sync_wrapper.workspace = true
tokio-tar.workspace = true

View File

@@ -3,7 +3,6 @@
use crate::config::PageServerConf;
use crate::page_cache::{self, PAGE_SZ};
use crate::tenant::blob_io::BlobWriter;
use crate::tenant::block_io::{BlockLease, BlockReader};
use crate::virtual_file::VirtualFile;
use std::cmp::min;
@@ -61,19 +60,8 @@ impl EphemeralFile {
pub(crate) fn len(&self) -> u64 {
self.len
}
}
/// Does the given filename look like an ephemeral file?
pub fn is_ephemeral_file(filename: &str) -> bool {
if let Some(rest) = filename.strip_prefix("ephemeral-") {
rest.parse::<u32>().is_ok()
} else {
false
}
}
impl BlobWriter for EphemeralFile {
fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, io::Error> {
pub(crate) async fn write_blob(&mut self, srcbuf: &[u8]) -> Result<u64, io::Error> {
struct Writer<'a> {
ephemeral_file: &'a mut EphemeralFile,
/// The block to which the next [`push_bytes`] will write.
@@ -90,7 +78,7 @@ impl BlobWriter for EphemeralFile {
})
}
#[inline(always)]
fn push_bytes(&mut self, src: &[u8]) -> Result<(), io::Error> {
async fn push_bytes(&mut self, src: &[u8]) -> Result<(), io::Error> {
let mut src_remaining = src;
while !src_remaining.is_empty() {
let dst_remaining = &mut self.ephemeral_file.mutable_tail[self.off..];
@@ -161,15 +149,15 @@ impl BlobWriter for EphemeralFile {
if srcbuf.len() < 0x80 {
// short one-byte length header
let len_buf = [srcbuf.len() as u8];
writer.push_bytes(&len_buf)?;
writer.push_bytes(&len_buf).await?;
} else {
let mut len_buf = u32::to_be_bytes(srcbuf.len() as u32);
len_buf[0] |= 0x80;
writer.push_bytes(&len_buf)?;
writer.push_bytes(&len_buf).await?;
}
// Write the payload
writer.push_bytes(srcbuf)?;
writer.push_bytes(srcbuf).await?;
if srcbuf.len() < 0x80 {
self.len += 1;
@@ -182,6 +170,15 @@ impl BlobWriter for EphemeralFile {
}
}
/// Does the given filename look like an ephemeral file?
pub fn is_ephemeral_file(filename: &str) -> bool {
if let Some(rest) = filename.strip_prefix("ephemeral-") {
rest.parse::<u32>().is_ok()
} else {
false
}
}
impl Drop for EphemeralFile {
fn drop(&mut self) {
// drop all pages from page cache
@@ -251,7 +248,6 @@ impl BlockReader for EphemeralFile {
#[cfg(test)]
mod tests {
use super::*;
use crate::tenant::blob_io::BlobWriter;
use crate::tenant::block_io::BlockCursor;
use rand::{thread_rng, RngCore};
use std::fs;
@@ -280,12 +276,12 @@ mod tests {
let mut file = EphemeralFile::create(conf, tenant_id, timeline_id)?;
let pos_foo = file.write_blob(b"foo")?;
let pos_foo = file.write_blob(b"foo").await?;
assert_eq!(
b"foo",
file.block_cursor().read_blob(pos_foo).await?.as_slice()
);
let pos_bar = file.write_blob(b"bar")?;
let pos_bar = file.write_blob(b"bar").await?;
assert_eq!(
b"foo",
file.block_cursor().read_blob(pos_foo).await?.as_slice()
@@ -298,13 +294,13 @@ mod tests {
let mut blobs = Vec::new();
for i in 0..10000 {
let data = Vec::from(format!("blob{}", i).as_bytes());
let pos = file.write_blob(&data)?;
let pos = file.write_blob(&data).await?;
blobs.push((pos, data));
}
// also test with a large blobs
for i in 0..100 {
let data = format!("blob{}", i).as_bytes().repeat(100);
let pos = file.write_blob(&data)?;
let pos = file.write_blob(&data).await?;
blobs.push((pos, data));
}
@@ -318,7 +314,7 @@ mod tests {
let mut large_data = Vec::new();
large_data.resize(20000, 0);
thread_rng().fill_bytes(&mut large_data);
let pos_large = file.write_blob(&large_data)?;
let pos_large = file.write_blob(&large_data).await?;
let result = file.block_cursor().read_blob(pos_large).await?;
assert_eq!(result, large_data);

View File

@@ -7,14 +7,12 @@
use crate::config::PageServerConf;
use crate::context::RequestContext;
use crate::repository::{Key, Value};
use crate::tenant::blob_io::BlobWriter;
use crate::tenant::block_io::BlockReader;
use crate::tenant::ephemeral_file::EphemeralFile;
use crate::tenant::storage_layer::{ValueReconstructResult, ValueReconstructState};
use crate::walrecord;
use anyhow::{ensure, Result};
use pageserver_api::models::InMemoryLayerInfo;
use std::cell::RefCell;
use std::collections::HashMap;
use std::sync::OnceLock;
use tracing::*;
@@ -32,12 +30,6 @@ use tokio::sync::RwLock;
use super::{DeltaLayer, DeltaLayerWriter, Layer};
thread_local! {
/// A buffer for serializing object during [`InMemoryLayer::put_value`].
/// This buffer is reused for each serialization to avoid additional malloc calls.
static SER_BUFFER: RefCell<Vec<u8>> = RefCell::new(Vec::new());
}
pub struct InMemoryLayer {
conf: &'static PageServerConf,
tenant_id: TenantId,
@@ -273,17 +265,17 @@ impl InMemoryLayer {
/// Adds the page version to the in-memory tree
pub async fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
trace!("put_value key {} at {}/{}", key, self.timeline_id, lsn);
let mut inner = self.inner.write().await;
let inner: &mut _ = &mut *self.inner.write().await;
self.assert_writable();
let off = {
SER_BUFFER.with(|x| -> Result<_> {
let mut buf = x.borrow_mut();
buf.clear();
val.ser_into(&mut (*buf))?;
let off = inner.file.write_blob(&buf)?;
Ok(off)
})?
// Avoid doing allocations for "small" values.
// In the regression test suite, the limit of 256 avoided allocations in 95% of cases:
// https://github.com/neondatabase/neon/pull/5056#discussion_r1301975061
let mut buf = smallvec::SmallVec::<[u8; 256]>::new();
buf.clear();
val.ser_into(&mut buf)?;
inner.file.write_blob(&buf).await?
};
let vec_map = inner.index.entry(key).or_default();

View File

@@ -45,6 +45,7 @@ rustls = { version = "0.20", features = ["dangerous_configuration"] }
scopeguard = { version = "1" }
serde = { version = "1", features = ["alloc", "derive"] }
serde_json = { version = "1", features = ["raw_value"] }
smallvec = { version = "1", default-features = false, features = ["write"] }
socket2 = { version = "0.4", default-features = false, features = ["all"] }
tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
tokio-rustls = { version = "0.23" }