mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-30 11:30:37 +00:00
fix(remote_storage): buffering, by using streams for upload and download (#5446)
There is double buffering in remote_storage and in pageserver for 8KiB in using `tokio::io::copy` to read `BufReader<ReaderStream<_>>`. Switches downloads and uploads to use `Stream<Item = std::io::Result<Bytes>>`. Caller and only caller now handles setting up buffering. For reading, `Stream<Item = ...>` is also a `AsyncBufRead`, so when writing to a file, we now have `tokio::io::copy_buf` reading full buffers and writing them to `tokio::io::BufWriter` which handles the buffering before dispatching over to `tokio::fs::File`. Additionally implements streaming uploads for azure. With azure downloads are a bit nicer than before, but not much; instead of one huge vec they just hold on to N allocations we got over the wire. This PR will also make it trivial to switch reading and writing to io-uring based methods. Cc: #5563.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
//! This module provides a wrapper around a real RemoteStorage implementation that
|
||||
//! causes the first N attempts at each upload or download operatio to fail. For
|
||||
//! testing purposes.
|
||||
use bytes::Bytes;
|
||||
use futures::stream::Stream;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Mutex;
|
||||
@@ -108,7 +110,7 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
|
||||
async fn upload(
|
||||
&self,
|
||||
data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||
data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
|
||||
// S3 PUT request requires the content length to be specified,
|
||||
// otherwise it starts to fail with the concurrent connection count increasing.
|
||||
data_size_bytes: usize,
|
||||
|
||||
Reference in New Issue
Block a user