fix(remote_storage): buffering, by using streams for upload and download (#5446)

There is double buffering in remote_storage and in pageserver for 8KiB
in using `tokio::io::copy` to read `BufReader<ReaderStream<_>>`.

Switches downloads and uploads to use `Stream<Item =
std::io::Result<Bytes>>`. Caller and only caller now handles setting up
buffering. For reading, `Stream<Item = ...>` is also a `AsyncBufRead`,
so when writing to a file, we now have `tokio::io::copy_buf` reading
full buffers and writing them to `tokio::io::BufWriter` which handles
the buffering before dispatching over to `tokio::fs::File`.

Additionally implements streaming uploads for azure. With azure
downloads are a bit nicer than before, but not much; instead of one huge
vec they just hold on to N allocations we got over the wire.

This PR will also make it trivial to switch reading and writing to
io-uring based methods.

Cc: #5563.
This commit is contained in:
Joonas Koivunen
2023-12-07 17:52:22 +02:00
committed by GitHub
parent 880663f6bc
commit b492cedf51
15 changed files with 468 additions and 218 deletions

View File

@@ -494,15 +494,13 @@ async fn backup_object(
.as_ref()
.unwrap();
let file = tokio::io::BufReader::new(
File::open(&source_file)
.await
.with_context(|| format!("Failed to open file {} for wal backup", source_file))?,
);
storage
.upload_storage_object(Box::new(file), size, target_file)
let file = File::open(&source_file)
.await
.with_context(|| format!("Failed to open file {source_file:?} for wal backup"))?;
let file = tokio_util::io::ReaderStream::with_capacity(file, 8 * 1024);
storage.upload_storage_object(file, size, target_file).await
}
pub async fn read_object(
@@ -524,5 +522,9 @@ pub async fn read_object(
format!("Failed to open WAL segment download stream for remote path {file_path:?}")
})?;
Ok(download.download_stream)
let reader = tokio_util::io::StreamReader::new(download.download_stream);
let reader = tokio::io::BufReader::with_capacity(8 * 1024, reader);
Ok(Box::pin(reader))
}