mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
feat(puffin): apply range reader (#4928)
* wip Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * feat(puffin): apply range reader Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * refactor: read_vec reduce iteration Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * refactor: simplify rangereader for vec<u8> Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: add unit test Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: toml format Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
5
Cargo.lock
generated
5
Cargo.lock
generated
@@ -1934,10 +1934,13 @@ dependencies = [
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-test-util",
|
||||
"futures",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"serde",
|
||||
"snafu 0.8.5",
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
"zeroize",
|
||||
]
|
||||
@@ -8973,6 +8976,8 @@ dependencies = [
|
||||
"auto_impl",
|
||||
"base64 0.21.7",
|
||||
"bitflags 2.6.0",
|
||||
"bytes",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-runtime",
|
||||
|
||||
@@ -16,9 +16,12 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
futures.workspace = true
|
||||
paste = "1.0"
|
||||
pin-project.workspace = true
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
zeroize = { version = "1.6", default-features = false, features = ["alloc"] }
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
@@ -12,12 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
use std::path::Path;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use futures::{AsyncReadExt, AsyncSeekExt};
|
||||
use futures::AsyncRead;
|
||||
use pin_project::pin_project;
|
||||
use tokio::io::{AsyncReadExt as _, AsyncSeekExt as _};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// `Metadata` contains the metadata of a source.
|
||||
pub struct Metadata {
|
||||
@@ -61,7 +69,7 @@ pub trait RangeReader: Send + Unpin {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
|
||||
impl<R: ?Sized + RangeReader> RangeReader for &mut R {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
(*self).metadata().await
|
||||
}
|
||||
@@ -80,26 +88,212 @@ impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
|
||||
}
|
||||
}
|
||||
|
||||
/// `RangeReaderAdapter` bridges `RangeReader` and `AsyncRead + AsyncSeek`.
|
||||
pub struct RangeReaderAdapter<R>(pub R);
|
||||
/// `AsyncReadAdapter` adapts a `RangeReader` to an `AsyncRead`.
|
||||
#[pin_project]
|
||||
pub struct AsyncReadAdapter<R> {
|
||||
/// The inner `RangeReader`.
|
||||
/// Use `Mutex` to get rid of the borrow checker issue.
|
||||
inner: Arc<Mutex<R>>,
|
||||
|
||||
/// The current position from the view of the reader.
|
||||
position: u64,
|
||||
|
||||
/// The buffer for the read bytes.
|
||||
buffer: Vec<u8>,
|
||||
|
||||
/// The length of the content.
|
||||
content_length: u64,
|
||||
|
||||
/// The future for reading the next bytes.
|
||||
#[pin]
|
||||
read_fut: Option<Pin<Box<dyn Future<Output = io::Result<Bytes>> + Send>>>,
|
||||
}
|
||||
|
||||
impl<R: RangeReader + 'static> AsyncReadAdapter<R> {
|
||||
pub async fn new(inner: R) -> io::Result<Self> {
|
||||
let mut inner = inner;
|
||||
let metadata = inner.metadata().await?;
|
||||
Ok(AsyncReadAdapter {
|
||||
inner: Arc::new(Mutex::new(inner)),
|
||||
position: 0,
|
||||
buffer: Vec::new(),
|
||||
content_length: metadata.content_length,
|
||||
read_fut: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The maximum size per read for the inner reader in `AsyncReadAdapter`.
|
||||
const MAX_SIZE_PER_READ: usize = 8 * 1024 * 1024; // 8MB
|
||||
|
||||
impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut [u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
let mut this = self.as_mut().project();
|
||||
|
||||
if *this.position >= *this.content_length {
|
||||
return Poll::Ready(Ok(0));
|
||||
}
|
||||
|
||||
if !this.buffer.is_empty() {
|
||||
let to_read = this.buffer.len().min(buf.len());
|
||||
buf[..to_read].copy_from_slice(&this.buffer[..to_read]);
|
||||
this.buffer.drain(..to_read);
|
||||
*this.position += to_read as u64;
|
||||
return Poll::Ready(Ok(to_read));
|
||||
}
|
||||
|
||||
if this.read_fut.is_none() {
|
||||
let size = (*this.content_length - *this.position).min(MAX_SIZE_PER_READ as u64);
|
||||
let range = *this.position..(*this.position + size);
|
||||
let inner = this.inner.clone();
|
||||
let fut = async move {
|
||||
let mut inner = inner.lock().await;
|
||||
inner.read(range).await
|
||||
};
|
||||
|
||||
*this.read_fut = Some(Box::pin(fut));
|
||||
}
|
||||
|
||||
match this
|
||||
.read_fut
|
||||
.as_mut()
|
||||
.as_pin_mut()
|
||||
.expect("checked above")
|
||||
.poll(cx)
|
||||
{
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Ok(bytes)) => {
|
||||
*this.read_fut = None;
|
||||
|
||||
if !bytes.is_empty() {
|
||||
this.buffer.extend_from_slice(&bytes);
|
||||
self.poll_read(cx, buf)
|
||||
} else {
|
||||
Poll::Ready(Ok(0))
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
*this.read_fut = None;
|
||||
Poll::Ready(Err(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements `RangeReader` for a type that implements `AsyncRead + AsyncSeek`.
|
||||
///
|
||||
/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`.
|
||||
/// Until the codebase is fully ported to `RangeReader`, remove this implementation.
|
||||
#[async_trait]
|
||||
impl<R: futures::AsyncRead + futures::AsyncSeek + Send + Unpin> RangeReader
|
||||
for RangeReaderAdapter<R>
|
||||
{
|
||||
impl RangeReader for Vec<u8> {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
let content_length = self.0.seek(io::SeekFrom::End(0)).await?;
|
||||
Ok(Metadata { content_length })
|
||||
Ok(Metadata {
|
||||
content_length: self.len() as u64,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
range.end = range.end.min(self.len() as u64);
|
||||
|
||||
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
|
||||
Ok(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// `FileReader` is a `RangeReader` for reading a file.
|
||||
pub struct FileReader {
|
||||
content_length: u64,
|
||||
position: u64,
|
||||
file: tokio::fs::File,
|
||||
}
|
||||
|
||||
impl FileReader {
|
||||
/// Creates a new `FileReader` for the file at the given path.
|
||||
pub async fn new(path: impl AsRef<Path>) -> io::Result<Self> {
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let metadata = file.metadata().await?;
|
||||
Ok(FileReader {
|
||||
content_length: metadata.len(),
|
||||
position: 0,
|
||||
file,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for FileReader {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.content_length,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
if range.start != self.position {
|
||||
self.file.seek(io::SeekFrom::Start(range.start)).await?;
|
||||
self.position = range.start;
|
||||
}
|
||||
|
||||
range.end = range.end.min(self.content_length);
|
||||
if range.end <= self.position {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"Start of range is out of bounds",
|
||||
));
|
||||
}
|
||||
|
||||
let mut buf = vec![0; (range.end - range.start) as usize];
|
||||
self.0.seek(io::SeekFrom::Start(range.start)).await?;
|
||||
self.0.read_exact(&mut buf).await?;
|
||||
|
||||
self.file.read_exact(&mut buf).await?;
|
||||
self.position = range.end;
|
||||
|
||||
Ok(Bytes::from(buf))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use futures::io::AsyncReadExt as _;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_async_read_adapter() {
|
||||
let data = b"hello world";
|
||||
let reader = Vec::from(data);
|
||||
let mut adapter = AsyncReadAdapter::new(reader).await.unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
adapter.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_async_read_adapter_large() {
|
||||
let data = (0..20 * 1024 * 1024).map(|i| i as u8).collect::<Vec<u8>>();
|
||||
let mut adapter = AsyncReadAdapter::new(data.clone()).await.unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
adapter.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_reader() {
|
||||
let file = create_named_temp_file();
|
||||
let path = file.path();
|
||||
let data = b"hello world";
|
||||
tokio::fs::write(path, data).await.unwrap();
|
||||
|
||||
let mut reader = FileReader::new(path).await.unwrap();
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.content_length, data.len() as u64);
|
||||
|
||||
let bytes = reader.read(0..metadata.content_length).await.unwrap();
|
||||
assert_eq!(&*bytes, data);
|
||||
|
||||
let bytes = reader.read(0..5).await.unwrap();
|
||||
assert_eq!(&*bytes, &data[..5]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,9 +80,7 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bit_vec::prelude::*;
|
||||
use common_base::range_read::RangeReaderAdapter;
|
||||
use fst::MapBuilder;
|
||||
use futures::io::Cursor;
|
||||
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
|
||||
use prost::Message;
|
||||
|
||||
@@ -163,8 +161,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_blob_reader_metadata() {
|
||||
let blob = create_inverted_index_blob();
|
||||
let cursor = RangeReaderAdapter(Cursor::new(blob));
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(blob);
|
||||
|
||||
let metas = blob_reader.metadata().await.unwrap();
|
||||
assert_eq!(metas.metas.len(), 2);
|
||||
@@ -191,8 +188,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_blob_reader_fst() {
|
||||
let blob = create_inverted_index_blob();
|
||||
let cursor = RangeReaderAdapter(Cursor::new(blob));
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(blob);
|
||||
|
||||
let metas = blob_reader.metadata().await.unwrap();
|
||||
let meta = metas.metas.get("tag0").unwrap();
|
||||
@@ -224,8 +220,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_inverted_index_blob_reader_bitmap() {
|
||||
let blob = create_inverted_index_blob();
|
||||
let cursor = RangeReaderAdapter(Cursor::new(blob));
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
|
||||
let mut blob_reader = InvertedIndexBlobReader::new(blob);
|
||||
|
||||
let metas = blob_reader.metadata().await.unwrap();
|
||||
let meta = metas.metas.get("tag0").unwrap();
|
||||
|
||||
@@ -113,8 +113,6 @@ impl<R: RangeReader> InvertedIndeFooterReader<R> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::range_read::RangeReaderAdapter;
|
||||
use futures::io::Cursor;
|
||||
use prost::Message;
|
||||
|
||||
use super::*;
|
||||
@@ -141,10 +139,9 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let payload_buf = create_test_payload(meta);
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
|
||||
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let metas = reader.read_payload(payload_size).await.unwrap();
|
||||
@@ -164,8 +161,7 @@ mod tests {
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
|
||||
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size_result = reader.read_payload_size().await;
|
||||
assert!(payload_size_result.is_err());
|
||||
@@ -180,10 +176,9 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let payload_buf = create_test_payload(meta);
|
||||
let mut payload_buf = create_test_payload(meta);
|
||||
let blob_size = payload_buf.len() as u64;
|
||||
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
|
||||
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
|
||||
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
|
||||
|
||||
let payload_size = reader.read_payload_size().await.unwrap();
|
||||
let payload_result = reader.read_payload(payload_size).await;
|
||||
|
||||
@@ -99,8 +99,6 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexBlobWriter<W> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::range_read::RangeReaderAdapter;
|
||||
use futures::io::Cursor;
|
||||
use futures::stream;
|
||||
|
||||
use super::*;
|
||||
@@ -120,8 +118,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cursor = RangeReaderAdapter(Cursor::new(blob));
|
||||
let mut reader = InvertedIndexBlobReader::new(cursor);
|
||||
let mut reader = InvertedIndexBlobReader::new(blob);
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
@@ -161,8 +158,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let cursor = RangeReaderAdapter(Cursor::new(blob));
|
||||
let mut reader = InvertedIndexBlobReader::new(cursor);
|
||||
let mut reader = InvertedIndexBlobReader::new(blob);
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.total_row_count, 8);
|
||||
assert_eq!(metadata.segment_row_count, 1);
|
||||
|
||||
@@ -16,7 +16,6 @@ pub mod builder;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::range_read::RangeReaderAdapter;
|
||||
use common_telemetry::warn;
|
||||
use index::inverted_index::format::reader::InvertedIndexBlobReader;
|
||||
use index::inverted_index::search::index_apply::{
|
||||
@@ -109,7 +108,6 @@ impl InvertedIndexApplier {
|
||||
self.remote_blob_reader(file_id).await?
|
||||
}
|
||||
};
|
||||
let blob = RangeReaderAdapter(blob);
|
||||
|
||||
if let Some(index_cache) = &self.inverted_index_cache {
|
||||
let mut index_reader = CachedInvertedIndexBlobReader::new(
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use object_store::{FuturesAsyncReader, FuturesAsyncWriter, ObjectStore};
|
||||
use object_store::{FuturesAsyncWriter, ObjectStore};
|
||||
use puffin::error::{self as puffin_error, Result as PuffinResult};
|
||||
use puffin::puffin_manager::file_accessor::PuffinFileAccessor;
|
||||
use puffin::puffin_manager::fs_puffin_manager::FsPuffinManager;
|
||||
@@ -28,11 +28,11 @@ use snafu::ResultExt;
|
||||
use crate::error::{PuffinInitStagerSnafu, Result};
|
||||
use crate::metrics::{
|
||||
INDEX_PUFFIN_FLUSH_OP_TOTAL, INDEX_PUFFIN_READ_BYTES_TOTAL, INDEX_PUFFIN_READ_OP_TOTAL,
|
||||
INDEX_PUFFIN_SEEK_OP_TOTAL, INDEX_PUFFIN_WRITE_BYTES_TOTAL, INDEX_PUFFIN_WRITE_OP_TOTAL,
|
||||
INDEX_PUFFIN_WRITE_BYTES_TOTAL, INDEX_PUFFIN_WRITE_OP_TOTAL,
|
||||
};
|
||||
use crate::sst::index::store::{self, InstrumentedStore};
|
||||
|
||||
type InstrumentedAsyncRead = store::InstrumentedAsyncRead<'static, FuturesAsyncReader>;
|
||||
type InstrumentedRangeReader = store::InstrumentedRangeReader<'static>;
|
||||
type InstrumentedAsyncWrite = store::InstrumentedAsyncWrite<'static, FuturesAsyncWriter>;
|
||||
|
||||
pub(crate) type SstPuffinManager =
|
||||
@@ -115,16 +115,15 @@ impl ObjectStorePuffinFileAccessor {
|
||||
|
||||
#[async_trait]
|
||||
impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
|
||||
type Reader = InstrumentedAsyncRead;
|
||||
type Reader = InstrumentedRangeReader;
|
||||
type Writer = InstrumentedAsyncWrite;
|
||||
|
||||
async fn reader(&self, puffin_file_name: &str) -> PuffinResult<Self::Reader> {
|
||||
self.object_store
|
||||
.reader(
|
||||
.range_reader(
|
||||
puffin_file_name,
|
||||
&INDEX_PUFFIN_READ_BYTES_TOTAL,
|
||||
&INDEX_PUFFIN_READ_OP_TOTAL,
|
||||
&INDEX_PUFFIN_SEEK_OP_TOTAL,
|
||||
)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
@@ -147,9 +146,9 @@ impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::range_read::RangeReader;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures::io::Cursor;
|
||||
use futures::AsyncReadExt;
|
||||
use object_store::services::Memory;
|
||||
use puffin::blob_metadata::CompressionCodec;
|
||||
use puffin::puffin_manager::{DirGuard, PuffinManager, PuffinReader, PuffinWriter, PutOptions};
|
||||
@@ -193,9 +192,9 @@ mod tests {
|
||||
let reader = manager.reader(file_name).await.unwrap();
|
||||
let blob_guard = reader.blob(blob_key).await.unwrap();
|
||||
let mut blob_reader = blob_guard.reader().await.unwrap();
|
||||
let mut buf = Vec::new();
|
||||
blob_reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, raw_data);
|
||||
let meta = blob_reader.metadata().await.unwrap();
|
||||
let bs = blob_reader.read(0..meta.content_length).await.unwrap();
|
||||
assert_eq!(&*bs, raw_data);
|
||||
|
||||
let dir_guard = reader.dir(dir_key).await.unwrap();
|
||||
let file = dir_guard.path().join("hello");
|
||||
|
||||
@@ -13,9 +13,13 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use common_base::range_read::{Metadata, RangeReader};
|
||||
use futures::{AsyncRead, AsyncSeek, AsyncWrite};
|
||||
use object_store::ObjectStore;
|
||||
use pin_project::pin_project;
|
||||
@@ -51,6 +55,22 @@ impl InstrumentedStore {
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns an [`InstrumentedRangeReader`] for the given path.
|
||||
/// Metrics like the number of bytes read are recorded using the provided `IntCounter`.
|
||||
pub async fn range_reader<'a>(
|
||||
&self,
|
||||
path: &str,
|
||||
read_byte_count: &'a IntCounter,
|
||||
read_count: &'a IntCounter,
|
||||
) -> Result<InstrumentedRangeReader<'a>> {
|
||||
Ok(InstrumentedRangeReader {
|
||||
store: self.object_store.clone(),
|
||||
path: path.to_string(),
|
||||
read_byte_count,
|
||||
read_count,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns an [`InstrumentedAsyncRead`] for the given path.
|
||||
/// Metrics like the number of bytes read, read and seek operations
|
||||
/// are recorded using the provided `IntCounter`s.
|
||||
@@ -236,6 +256,56 @@ impl<W: AsyncWrite + Unpin + Send> AsyncWrite for InstrumentedAsyncWrite<'_, W>
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements `RangeReader` for `ObjectStore` and record metrics.
|
||||
pub(crate) struct InstrumentedRangeReader<'a> {
|
||||
store: ObjectStore,
|
||||
path: String,
|
||||
read_byte_count: &'a IntCounter,
|
||||
read_count: &'a IntCounter,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for InstrumentedRangeReader<'_> {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
let stat = self.store.stat(&self.path).await?;
|
||||
Ok(Metadata {
|
||||
content_length: stat.content_length(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
let buf = self.store.reader(&self.path).await?.read(range).await?;
|
||||
self.read_byte_count.inc_by(buf.len() as _);
|
||||
self.read_count.inc_by(1);
|
||||
Ok(buf.to_bytes())
|
||||
}
|
||||
|
||||
async fn read_into(
|
||||
&mut self,
|
||||
range: Range<u64>,
|
||||
buf: &mut (impl BufMut + Send),
|
||||
) -> io::Result<()> {
|
||||
let reader = self.store.reader(&self.path).await?;
|
||||
let size = reader.read_into(buf, range).await?;
|
||||
self.read_byte_count.inc_by(size as _);
|
||||
self.read_count.inc_by(1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
|
||||
let bufs = self
|
||||
.store
|
||||
.reader(&self.path)
|
||||
.await?
|
||||
.fetch(ranges.to_owned())
|
||||
.await?;
|
||||
let total_size: usize = bufs.iter().map(|buf| buf.len()).sum();
|
||||
self.read_byte_count.inc_by(total_size as _);
|
||||
self.read_count.inc_by(1);
|
||||
Ok(bufs.into_iter().map(|buf| buf.to_bytes()).collect())
|
||||
}
|
||||
}
|
||||
|
||||
/// A guard that increments a counter when dropped.
|
||||
struct CounterGuard<'a> {
|
||||
count: usize,
|
||||
|
||||
@@ -14,6 +14,8 @@ async-walkdir = "2.0.0"
|
||||
auto_impl = "1.2.0"
|
||||
base64.workspace = true
|
||||
bitflags.workspace = true
|
||||
bytes.workspace = true
|
||||
common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-runtime.workspace = true
|
||||
|
||||
@@ -16,6 +16,7 @@ mod file;
|
||||
mod footer;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::range_read::RangeReader;
|
||||
|
||||
use crate::blob_metadata::BlobMetadata;
|
||||
use crate::error::Result;
|
||||
@@ -38,7 +39,7 @@ pub trait SyncReader<'a> {
|
||||
/// `AsyncReader` defines an asynchronous reader for puffin data.
|
||||
#[async_trait]
|
||||
pub trait AsyncReader<'a> {
|
||||
type Reader: futures::AsyncRead + futures::AsyncSeek;
|
||||
type Reader: RangeReader;
|
||||
|
||||
/// Fetches the FileMetadata.
|
||||
async fn metadata(&'a mut self) -> Result<FileMetadata>;
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::io::{self, SeekFrom};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
||||
use common_base::range_read::RangeReader;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::blob_metadata::BlobMetadata;
|
||||
@@ -112,7 +112,7 @@ impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader<R> {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<'a, R: AsyncRead + AsyncSeek + Unpin + Send + 'a> AsyncReader<'a> for PuffinFileReader<R> {
|
||||
impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
|
||||
type Reader = PartialReader<&'a mut R>;
|
||||
|
||||
async fn metadata(&'a mut self) -> Result<FileMetadata> {
|
||||
@@ -121,12 +121,8 @@ impl<'a, R: AsyncRead + AsyncSeek + Unpin + Send + 'a> AsyncReader<'a> for Puffi
|
||||
}
|
||||
|
||||
// check the magic
|
||||
let mut magic = [0; MAGIC_SIZE as usize];
|
||||
self.source
|
||||
.read_exact(&mut magic)
|
||||
.await
|
||||
.context(ReadSnafu)?;
|
||||
ensure!(magic == MAGIC, MagicNotMatchedSnafu);
|
||||
let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?;
|
||||
ensure!(*magic == MAGIC, MagicNotMatchedSnafu);
|
||||
|
||||
let file_size = self.get_file_size_async().await?;
|
||||
|
||||
@@ -155,13 +151,14 @@ impl<R: io::Read + io::Seek> PuffinFileReader<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + AsyncSeek + Send + Unpin> PuffinFileReader<R> {
|
||||
impl<R: RangeReader> PuffinFileReader<R> {
|
||||
async fn get_file_size_async(&mut self) -> Result<u64> {
|
||||
let file_size = self
|
||||
.source
|
||||
.seek(SeekFrom::End(0))
|
||||
.metadata()
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
.context(ReadSnafu)?
|
||||
.content_length;
|
||||
Self::validate_file_size(file_size)?;
|
||||
Ok(file_size)
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::io::{self, Cursor, SeekFrom};
|
||||
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
||||
use common_base::range_read::RangeReader;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
@@ -72,24 +72,20 @@ impl<R: io::Read + io::Seek> FooterParser<R> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin> FooterParser<R> {
|
||||
impl<R: RangeReader> FooterParser<R> {
|
||||
/// Parses the footer from the IO source in a asynchronous manner.
|
||||
pub async fn parse_async(&mut self) -> Result<FileMetadata> {
|
||||
let mut parser = StageParser::new(self.file_size);
|
||||
|
||||
let mut buf = vec![];
|
||||
while let Some(byte_to_read) = parser.next_to_read() {
|
||||
buf.clear();
|
||||
let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size;
|
||||
self.source
|
||||
.seek(SeekFrom::Start(byte_to_read.offset))
|
||||
.read_into(range, &mut buf)
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
let size = byte_to_read.size as usize;
|
||||
|
||||
buf.resize(size, 0);
|
||||
let buf = &mut buf[..size];
|
||||
|
||||
self.source.read_exact(buf).await.context(ReadSnafu)?;
|
||||
parser.consume_bytes(buf)?;
|
||||
.context(ReadSnafu)?;
|
||||
parser.consume_bytes(&buf)?;
|
||||
}
|
||||
|
||||
parser.finish()
|
||||
|
||||
@@ -13,184 +13,121 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::io;
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use std::ops::Range;
|
||||
|
||||
use futures::{ready, AsyncRead, AsyncSeek};
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use common_base::range_read::{Metadata, RangeReader};
|
||||
|
||||
use crate::partial_reader::position::position_after_seek;
|
||||
use crate::partial_reader::PartialReader;
|
||||
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin> AsyncRead for PartialReader<R> {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut [u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
// past end of portion
|
||||
if self.position() > self.size() {
|
||||
return Poll::Ready(Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"invalid read past the end of the portion",
|
||||
)));
|
||||
}
|
||||
|
||||
// end of portion
|
||||
if self.is_eof() {
|
||||
return Poll::Ready(Ok(0));
|
||||
}
|
||||
|
||||
// first read, seek to the correct offset
|
||||
if self.position_in_portion.is_none() {
|
||||
// seek operation
|
||||
let seek_from = io::SeekFrom::Start(self.offset);
|
||||
ready!(self.as_mut().project().source.poll_seek(cx, seek_from))?;
|
||||
|
||||
self.position_in_portion = Some(0);
|
||||
}
|
||||
|
||||
// prevent reading over the end
|
||||
let max_len = (self.size() - self.position_in_portion.unwrap()) as usize;
|
||||
let actual_len = max_len.min(buf.len());
|
||||
|
||||
// create a limited reader
|
||||
let target_buf = &mut buf[..actual_len];
|
||||
|
||||
// read operation
|
||||
let read_bytes = ready!(self.as_mut().project().source.poll_read(cx, target_buf))?;
|
||||
self.position_in_portion = Some(self.position() + read_bytes as u64);
|
||||
|
||||
Poll::Ready(Ok(read_bytes))
|
||||
#[async_trait]
|
||||
impl<R: RangeReader> RangeReader for PartialReader<R> {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.size,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin> AsyncSeek for PartialReader<R> {
|
||||
fn poll_seek(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
pos: io::SeekFrom,
|
||||
) -> Poll<io::Result<u64>> {
|
||||
let new_position = position_after_seek(pos, self.position(), self.size())?;
|
||||
let pos = io::SeekFrom::Start(self.offset + new_position);
|
||||
ready!(self.as_mut().project().source.poll_seek(cx, pos))?;
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
let absolute_range_start = self.offset + range.start;
|
||||
if absolute_range_start >= self.offset + self.size {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"Start of range is out of bounds",
|
||||
));
|
||||
}
|
||||
let absolute_range_end = (self.offset + range.end).min(self.offset + self.size);
|
||||
let absolute_range = absolute_range_start..absolute_range_end;
|
||||
|
||||
self.position_in_portion = Some(new_position);
|
||||
Poll::Ready(Ok(new_position))
|
||||
let result = self.source.read(absolute_range.clone()).await?;
|
||||
self.position_in_portion = Some(absolute_range.end);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn read_into(
|
||||
&mut self,
|
||||
range: Range<u64>,
|
||||
buf: &mut (impl BufMut + Send),
|
||||
) -> io::Result<()> {
|
||||
let absolute_range_start = self.offset + range.start;
|
||||
if absolute_range_start >= self.offset + self.size {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"Start of range is out of bounds",
|
||||
));
|
||||
}
|
||||
let absolute_range_end = (self.offset + range.end).min(self.offset + self.size);
|
||||
let absolute_range = absolute_range_start..absolute_range_end;
|
||||
|
||||
self.source.read_into(absolute_range.clone(), buf).await?;
|
||||
self.position_in_portion = Some(absolute_range.end);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
|
||||
let absolute_ranges = ranges
|
||||
.iter()
|
||||
.map(|range| {
|
||||
let start = self.offset + range.start;
|
||||
|
||||
if start >= self.offset + self.size {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"Start of range is out of bounds",
|
||||
));
|
||||
}
|
||||
|
||||
let end = (self.offset + range.end).min(self.offset + self.size);
|
||||
Ok(start..end)
|
||||
})
|
||||
.collect::<io::Result<Vec<_>>>()?;
|
||||
|
||||
let results = self.source.read_vec(&absolute_ranges).await?;
|
||||
if let Some(last_range) = absolute_ranges.last() {
|
||||
self.position_in_portion = Some(last_range.end);
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::io::Cursor;
|
||||
use futures::{AsyncReadExt as _, AsyncSeekExt as _};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_all_data_in_portion() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data.clone()), 0, 100);
|
||||
let mut buf = vec![0; 100];
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 100);
|
||||
assert_eq!(buf, data);
|
||||
let mut reader = PartialReader::new(data.clone(), 0, 100);
|
||||
let buf = reader.read(0..100).await.unwrap();
|
||||
assert_eq!(*buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_part_of_data_in_portion() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
let mut buf = vec![0; 30];
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
|
||||
assert_eq!(buf, (10..40).collect::<Vec<u8>>());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_and_read_data_in_portion() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
|
||||
let mut buf = vec![0; 10];
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 10);
|
||||
assert_eq!(buf, (20..30).collect::<Vec<u8>>());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_past_end_of_portion_is_eof() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
let mut buf = vec![0; 50];
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 0); // hit EOF
|
||||
let mut reader = PartialReader::new(data, 10, 30);
|
||||
let buf = reader.read(0..30).await.unwrap();
|
||||
assert_eq!(*buf, (10..40).collect::<Vec<u8>>());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_past_end_of_portion_returns_error() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
let mut reader = PartialReader::new(data, 10, 30);
|
||||
// seeking past the portion returns an error
|
||||
assert!(reader.seek(io::SeekFrom::Start(31)).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_to_negative_position_returns_error() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
|
||||
// seeking back to the start of the portion
|
||||
assert_eq!(reader.seek(io::SeekFrom::Current(-10)).await.unwrap(), 0);
|
||||
// seeking to a negative position returns an error
|
||||
assert!(reader.seek(io::SeekFrom::Current(-1)).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_from_end_of_portion() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
let mut buf = vec![0; 10];
|
||||
// seek to 10 bytes before the end of the portion
|
||||
assert_eq!(reader.seek(io::SeekFrom::End(-10)).await.unwrap(), 20);
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 10);
|
||||
// the final 10 bytes of the portion
|
||||
assert_eq!(buf, (30..40).collect::<Vec<u8>>());
|
||||
assert!(reader.is_eof());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn seek_from_end_to_negative_position_returns_error() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data.clone()), 10, 30);
|
||||
// seeking to a negative position returns an error
|
||||
assert!(reader.seek(io::SeekFrom::End(-31)).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn zero_length_portion_returns_zero_on_read() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 0);
|
||||
let mut buf = vec![0; 10];
|
||||
// reading a portion with zero length returns 0 bytes
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 0);
|
||||
assert!(reader.read(31..32).await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn is_eof_returns_true_at_end_of_portion() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
let mut reader = PartialReader::new(data, 10, 30);
|
||||
// we are not at the end of the portion
|
||||
assert!(!reader.is_eof());
|
||||
let mut buf = vec![0; 30];
|
||||
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
|
||||
let _ = reader.read(0..20).await.unwrap();
|
||||
// we are at the end of the portion
|
||||
assert!(reader.is_eof());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn position_resets_after_seek_to_start() {
|
||||
let data: Vec<u8> = (0..100).collect();
|
||||
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
|
||||
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
|
||||
assert_eq!(reader.position(), 10);
|
||||
assert_eq!(reader.seek(io::SeekFrom::Start(0)).await.unwrap(), 0);
|
||||
assert_eq!(reader.position(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,8 @@ mod tests;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{AsyncRead, AsyncSeek};
|
||||
use common_base::range_read::RangeReader;
|
||||
use futures::AsyncRead;
|
||||
|
||||
use crate::blob_metadata::CompressionCodec;
|
||||
use crate::error::Result;
|
||||
@@ -94,7 +95,7 @@ pub trait PuffinReader {
|
||||
#[async_trait]
|
||||
#[auto_impl::auto_impl(Arc)]
|
||||
pub trait BlobGuard {
|
||||
type Reader: AsyncRead + AsyncSeek + Unpin;
|
||||
type Reader: RangeReader;
|
||||
async fn reader(&self) -> Result<Self::Reader>;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::{AsyncRead, AsyncSeek, AsyncWrite};
|
||||
use common_base::range_read::RangeReader;
|
||||
use futures::AsyncWrite;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
@@ -21,7 +22,7 @@ use crate::error::Result;
|
||||
#[async_trait]
|
||||
#[auto_impl::auto_impl(Arc)]
|
||||
pub trait PuffinFileAccessor: Send + Sync + 'static {
|
||||
type Reader: AsyncRead + AsyncSeek + Unpin + Send + Sync;
|
||||
type Reader: RangeReader + Sync;
|
||||
type Writer: AsyncWrite + Unpin + Send;
|
||||
|
||||
/// Opens a reader for the given puffin file.
|
||||
|
||||
@@ -20,8 +20,8 @@ use async_trait::async_trait;
|
||||
pub use reader::FsPuffinReader;
|
||||
pub use writer::FsPuffinWriter;
|
||||
|
||||
use super::file_accessor::PuffinFileAccessor;
|
||||
use crate::error::Result;
|
||||
use crate::puffin_manager::file_accessor::PuffinFileAccessor;
|
||||
use crate::puffin_manager::stager::Stager;
|
||||
use crate::puffin_manager::PuffinManager;
|
||||
|
||||
|
||||
@@ -12,19 +12,21 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
|
||||
use async_compression::futures::bufread::ZstdDecoder;
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use common_base::range_read::{AsyncReadAdapter, Metadata, RangeReader};
|
||||
use futures::io::BufReader;
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncWrite};
|
||||
use futures::{AsyncRead, AsyncWrite};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::blob_metadata::{BlobMetadata, CompressionCodec};
|
||||
use crate::error::{
|
||||
BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu,
|
||||
ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
|
||||
MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
|
||||
};
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileReader};
|
||||
use crate::partial_reader::PartialReader;
|
||||
@@ -132,11 +134,12 @@ where
|
||||
F: PuffinFileAccessor + Clone,
|
||||
{
|
||||
async fn init_blob_to_stager(
|
||||
mut reader: PuffinFileReader<F::Reader>,
|
||||
reader: PuffinFileReader<F::Reader>,
|
||||
blob_metadata: BlobMetadata,
|
||||
mut writer: BoxWriter,
|
||||
) -> Result<u64> {
|
||||
let reader = reader.blob_reader(&blob_metadata)?;
|
||||
let reader = reader.into_blob_reader(&blob_metadata);
|
||||
let reader = AsyncReadAdapter::new(reader).await.context(MetadataSnafu)?;
|
||||
let compression = blob_metadata.compression_codec;
|
||||
let size = Self::handle_decompress(reader, &mut writer, compression).await?;
|
||||
Ok(size)
|
||||
@@ -159,10 +162,12 @@ where
|
||||
.context(BlobNotFoundSnafu { blob: key })?;
|
||||
|
||||
let mut reader = file.blob_reader(blob_metadata)?;
|
||||
let mut buf = vec![];
|
||||
reader.read_to_end(&mut buf).await.context(ReadSnafu)?;
|
||||
let dir_meta: DirMetadata =
|
||||
serde_json::from_slice(buf.as_slice()).context(DeserializeJsonSnafu)?;
|
||||
let meta = reader.metadata().await.context(MetadataSnafu)?;
|
||||
let buf = reader
|
||||
.read(0..meta.content_length)
|
||||
.await
|
||||
.context(ReadSnafu)?;
|
||||
let dir_meta: DirMetadata = serde_json::from_slice(&buf).context(DeserializeJsonSnafu)?;
|
||||
|
||||
let mut tasks = vec![];
|
||||
for file_meta in dir_meta.files {
|
||||
@@ -185,8 +190,8 @@ where
|
||||
let reader = accessor.reader(&puffin_file_name).await?;
|
||||
let writer = writer_provider.writer(&file_meta.relative_path).await?;
|
||||
let task = common_runtime::spawn_global(async move {
|
||||
let mut file = PuffinFileReader::new(reader);
|
||||
let reader = file.blob_reader(&blob_meta)?;
|
||||
let reader = PuffinFileReader::new(reader).into_blob_reader(&blob_meta);
|
||||
let reader = AsyncReadAdapter::new(reader).await.context(MetadataSnafu)?;
|
||||
let compression = blob_meta.compression_codec;
|
||||
let size = Self::handle_decompress(reader, writer, compression).await?;
|
||||
Ok(size)
|
||||
@@ -256,43 +261,45 @@ impl<F: PuffinFileAccessor + Clone> BlobGuard for RandomReadBlob<F> {
|
||||
/// `Either` is a type that represents either `A` or `B`.
|
||||
///
|
||||
/// Used to:
|
||||
/// impl `AsyncRead + AsyncSeek` for `Either<A: AsyncRead + AsyncSeek, B: AsyncRead + AsyncSeek>`,
|
||||
/// impl `RangeReader` for `Either<A: RangeReader, B: RangeReader>`,
|
||||
/// impl `BlobGuard` for `Either<A: BlobGuard, B: BlobGuard>`.
|
||||
pub enum Either<A, B> {
|
||||
L(A),
|
||||
R(B),
|
||||
}
|
||||
|
||||
impl<A, B> AsyncRead for Either<A, B>
|
||||
#[async_trait]
|
||||
impl<A, B> RangeReader for Either<A, B>
|
||||
where
|
||||
A: AsyncRead + Unpin,
|
||||
B: AsyncRead + Unpin,
|
||||
A: RangeReader,
|
||||
B: RangeReader,
|
||||
{
|
||||
fn poll_read(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut [u8],
|
||||
) -> Poll<std::io::Result<usize>> {
|
||||
match self.get_mut() {
|
||||
Either::L(a) => Pin::new(a).poll_read(cx, buf),
|
||||
Either::R(b) => Pin::new(b).poll_read(cx, buf),
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
match self {
|
||||
Either::L(a) => a.metadata().await,
|
||||
Either::R(b) => b.metadata().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, B> AsyncSeek for Either<A, B>
|
||||
where
|
||||
A: AsyncSeek + Unpin,
|
||||
B: AsyncSeek + Unpin,
|
||||
{
|
||||
fn poll_seek(
|
||||
self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
pos: std::io::SeekFrom,
|
||||
) -> Poll<std::io::Result<u64>> {
|
||||
match self.get_mut() {
|
||||
Either::L(a) => Pin::new(a).poll_seek(cx, pos),
|
||||
Either::R(b) => Pin::new(b).poll_seek(cx, pos),
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
match self {
|
||||
Either::L(a) => a.read(range).await,
|
||||
Either::R(b) => b.read(range).await,
|
||||
}
|
||||
}
|
||||
async fn read_into(
|
||||
&mut self,
|
||||
range: Range<u64>,
|
||||
buf: &mut (impl BufMut + Send),
|
||||
) -> io::Result<()> {
|
||||
match self {
|
||||
Either::L(a) => a.read_into(range, buf).await,
|
||||
Either::R(b) => b.read_into(range, buf).await,
|
||||
}
|
||||
}
|
||||
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
|
||||
match self {
|
||||
Either::L(a) => a.read_vec(ranges).await,
|
||||
Either::R(b) => b.read_vec(ranges).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ use async_trait::async_trait;
|
||||
use async_walkdir::{Filtering, WalkDir};
|
||||
use base64::prelude::BASE64_URL_SAFE;
|
||||
use base64::Engine;
|
||||
use common_base::range_read::FileReader;
|
||||
use common_runtime::runtime::RuntimeTrait;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures::{FutureExt, StreamExt};
|
||||
@@ -30,7 +31,7 @@ use snafu::ResultExt;
|
||||
use tokio::fs;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
|
||||
use tokio_util::compat::TokioAsyncWriteCompatExt;
|
||||
|
||||
use crate::error::{
|
||||
CacheGetSnafu, CreateSnafu, MetadataSnafu, OpenSnafu, ReadSnafu, RemoveSnafu, RenameSnafu,
|
||||
@@ -427,11 +428,10 @@ pub struct FsBlobGuard {
|
||||
|
||||
#[async_trait]
|
||||
impl BlobGuard for FsBlobGuard {
|
||||
type Reader = Compat<fs::File>;
|
||||
type Reader = FileReader;
|
||||
|
||||
async fn reader(&self) -> Result<Self::Reader> {
|
||||
let file = fs::File::open(&self.path).await.context(OpenSnafu)?;
|
||||
Ok(file.compat())
|
||||
FileReader::new(&self.path).await.context(OpenSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -530,8 +530,9 @@ impl BoundedStager {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::range_read::RangeReader;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use futures::{AsyncReadExt, AsyncWriteExt};
|
||||
use futures::AsyncWriteExt;
|
||||
use tokio::io::AsyncReadExt as _;
|
||||
|
||||
use super::*;
|
||||
@@ -564,9 +565,9 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, b"hello world");
|
||||
let m = reader.metadata().await.unwrap();
|
||||
let buf = reader.read(0..m.content_length).await.unwrap();
|
||||
assert_eq!(&*buf, b"hello world");
|
||||
|
||||
let mut file = stager.must_get_file(puffin_file_name, key).await;
|
||||
let mut buf = Vec::new();
|
||||
@@ -695,9 +696,10 @@ mod tests {
|
||||
.reader()
|
||||
.await
|
||||
.unwrap();
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, b"hello world");
|
||||
|
||||
let m = reader.metadata().await.unwrap();
|
||||
let buf = reader.read(0..m.content_length).await.unwrap();
|
||||
assert_eq!(&*buf, b"hello world");
|
||||
|
||||
let dir_path = stager
|
||||
.get_dir(
|
||||
@@ -751,9 +753,9 @@ mod tests {
|
||||
stager.cache.run_pending_tasks().await;
|
||||
assert!(!stager.in_cache(puffin_file_name, blob_key));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, b"Hello world");
|
||||
let m = reader.metadata().await.unwrap();
|
||||
let buf = reader.read(0..m.content_length).await.unwrap();
|
||||
assert_eq!(&*buf, b"Hello world");
|
||||
|
||||
// Second time to get the blob, get from recycle bin
|
||||
let mut reader = stager
|
||||
@@ -772,9 +774,9 @@ mod tests {
|
||||
stager.cache.run_pending_tasks().await;
|
||||
assert!(!stager.in_cache(puffin_file_name, blob_key));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, b"Hello world");
|
||||
let m = reader.metadata().await.unwrap();
|
||||
let buf = reader.read(0..m.content_length).await.unwrap();
|
||||
assert_eq!(&*buf, b"Hello world");
|
||||
|
||||
let dir_key = "dir_key";
|
||||
let files_in_dir = [
|
||||
|
||||
@@ -16,8 +16,8 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::range_read::{FileReader, RangeReader};
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use futures::AsyncReadExt as _;
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncReadExt as _;
|
||||
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt};
|
||||
@@ -93,10 +93,9 @@ async fn test_put_get_file() {
|
||||
#[tokio::test]
|
||||
async fn test_put_get_files() {
|
||||
let capicities = [1, 16, u64::MAX];
|
||||
let compression_codecs = [None, Some(CompressionCodec::Zstd)];
|
||||
|
||||
for capacity in capicities {
|
||||
let compression_codecs = [None, Some(CompressionCodec::Zstd)];
|
||||
|
||||
for compression_codec in compression_codecs {
|
||||
let (_staging_dir, stager) = new_bounded_stager("test_put_get_files_", capacity).await;
|
||||
let file_accessor = Arc::new(MockFileAccessor::new("test_put_get_files_"));
|
||||
@@ -299,9 +298,9 @@ async fn check_blob(
|
||||
) {
|
||||
let blob = puffin_reader.blob(key).await.unwrap();
|
||||
let mut reader = blob.reader().await.unwrap();
|
||||
let mut buf = Vec::new();
|
||||
reader.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, raw_data);
|
||||
let meta = reader.metadata().await.unwrap();
|
||||
let bs = reader.read(0..meta.content_length).await.unwrap();
|
||||
assert_eq!(&*bs, raw_data);
|
||||
|
||||
if !compressed {
|
||||
// If the blob is not compressed, it won't be exist in the stager.
|
||||
@@ -386,14 +385,13 @@ impl MockFileAccessor {
|
||||
|
||||
#[async_trait]
|
||||
impl PuffinFileAccessor for MockFileAccessor {
|
||||
type Reader = Compat<File>;
|
||||
type Reader = FileReader;
|
||||
type Writer = Compat<File>;
|
||||
|
||||
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader> {
|
||||
let f = tokio::fs::File::open(self.tempdir.path().join(puffin_file_name))
|
||||
Ok(FileReader::new(self.tempdir.path().join(puffin_file_name))
|
||||
.await
|
||||
.unwrap();
|
||||
Ok(f.compat())
|
||||
.unwrap())
|
||||
}
|
||||
|
||||
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer> {
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{Cursor, Read};
|
||||
use std::vec;
|
||||
|
||||
use common_base::range_read::{FileReader, RangeReader};
|
||||
use futures::io::Cursor as AsyncCursor;
|
||||
use futures::AsyncReadExt;
|
||||
use tokio_util::compat::TokioAsyncReadCompatExt;
|
||||
|
||||
use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader};
|
||||
use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter};
|
||||
@@ -38,8 +38,8 @@ fn test_read_empty_puffin_sync() {
|
||||
async fn test_read_empty_puffin_async() {
|
||||
let path = "src/tests/resources/empty-puffin-uncompressed.puffin";
|
||||
|
||||
let file = tokio::fs::File::open(path).await.unwrap();
|
||||
let mut reader = PuffinFileReader::new(file.compat());
|
||||
let reader = FileReader::new(path).await.unwrap();
|
||||
let mut reader = PuffinFileReader::new(reader);
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.properties.len(), 0);
|
||||
assert_eq!(metadata.blobs.len(), 0);
|
||||
@@ -84,8 +84,8 @@ fn test_sample_metric_data_puffin_sync() {
|
||||
async fn test_sample_metric_data_puffin_async() {
|
||||
let path = "src/tests/resources/sample-metric-data-uncompressed.puffin";
|
||||
|
||||
let file = tokio::fs::File::open(path).await.unwrap();
|
||||
let mut reader = PuffinFileReader::new(file.compat());
|
||||
let reader = FileReader::new(path).await.unwrap();
|
||||
let mut reader = PuffinFileReader::new(reader);
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
@@ -104,13 +104,11 @@ async fn test_sample_metric_data_puffin_async() {
|
||||
assert_eq!(metadata.blobs[1].length, 83);
|
||||
|
||||
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
|
||||
let mut buf = String::new();
|
||||
some_blob.read_to_string(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, "abcdefghi");
|
||||
let buf = read_all_range(&mut some_blob).await;
|
||||
assert_eq!(&buf, b"abcdefghi");
|
||||
|
||||
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
some_other_blob.read_to_end(&mut buf).await.unwrap();
|
||||
let buf = read_all_range(&mut some_other_blob).await;
|
||||
let expected = include_bytes!("tests/resources/sample-metric-data.blob");
|
||||
assert_eq!(buf, expected);
|
||||
}
|
||||
@@ -162,8 +160,7 @@ async fn test_writer_reader_empty_async() {
|
||||
let written_bytes = writer.finish().await.unwrap();
|
||||
assert!(written_bytes > 0);
|
||||
|
||||
let mut buf = AsyncCursor::new(buf.into_inner());
|
||||
let mut reader = PuffinFileReader::new(&mut buf);
|
||||
let mut reader = PuffinFileReader::new(buf.into_inner());
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
@@ -287,8 +284,7 @@ async fn test_writer_reader_async() {
|
||||
let written_bytes = writer.finish().await.unwrap();
|
||||
assert!(written_bytes > 0);
|
||||
|
||||
let mut buf = AsyncCursor::new(buf.into_inner());
|
||||
let mut reader = PuffinFileReader::new(&mut buf);
|
||||
let mut reader = PuffinFileReader::new(buf.into_inner());
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
|
||||
assert_eq!(metadata.properties.len(), 1);
|
||||
@@ -307,16 +303,20 @@ async fn test_writer_reader_async() {
|
||||
assert_eq!(metadata.blobs[1].length, 83);
|
||||
|
||||
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
some_blob.read_to_end(&mut buf).await.unwrap();
|
||||
let buf = read_all_range(&mut some_blob).await;
|
||||
assert_eq!(buf, blob1);
|
||||
|
||||
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
some_other_blob.read_to_end(&mut buf).await.unwrap();
|
||||
let buf = read_all_range(&mut some_other_blob).await;
|
||||
assert_eq!(buf, blob2);
|
||||
}
|
||||
|
||||
test_writer_reader_async(false).await;
|
||||
test_writer_reader_async(true).await;
|
||||
}
|
||||
|
||||
async fn read_all_range(reader: &mut impl RangeReader) -> Vec<u8> {
|
||||
let m = reader.metadata().await.unwrap();
|
||||
let buf = reader.read(0..m.content_length).await.unwrap();
|
||||
buf.to_vec()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user