feat(puffin): apply range reader (#4928)

* wip

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(puffin): apply range reader

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: read_vec reduce iteration

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: simplify rangereader for vec<u8>

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add unit test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: toml format

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-11-12 10:36:38 +08:00
committed by GitHub
parent 6248a6ccf5
commit cbf21e53a9
21 changed files with 504 additions and 307 deletions

5
Cargo.lock generated
View File

@@ -1934,10 +1934,13 @@ dependencies = [
"bytes",
"common-error",
"common-macro",
"common-test-util",
"futures",
"paste",
"pin-project",
"serde",
"snafu 0.8.5",
"tokio",
"toml 0.8.19",
"zeroize",
]
@@ -8973,6 +8976,8 @@ dependencies = [
"auto_impl",
"base64 0.21.7",
"bitflags 2.6.0",
"bytes",
"common-base",
"common-error",
"common-macro",
"common-runtime",

View File

@@ -16,9 +16,12 @@ common-error.workspace = true
common-macro.workspace = true
futures.workspace = true
paste = "1.0"
pin-project.workspace = true
serde = { version = "1.0", features = ["derive"] }
snafu.workspace = true
tokio.workspace = true
zeroize = { version = "1.6", default-features = false, features = ["alloc"] }
[dev-dependencies]
common-test-util.workspace = true
toml.workspace = true

View File

@@ -12,12 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::future::Future;
use std::io;
use std::ops::Range;
use std::path::Path;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use async_trait::async_trait;
use bytes::{BufMut, Bytes};
use futures::{AsyncReadExt, AsyncSeekExt};
use futures::AsyncRead;
use pin_project::pin_project;
use tokio::io::{AsyncReadExt as _, AsyncSeekExt as _};
use tokio::sync::Mutex;
/// `Metadata` contains the metadata of a source.
pub struct Metadata {
@@ -61,7 +69,7 @@ pub trait RangeReader: Send + Unpin {
}
#[async_trait]
impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
impl<R: ?Sized + RangeReader> RangeReader for &mut R {
async fn metadata(&mut self) -> io::Result<Metadata> {
(*self).metadata().await
}
@@ -80,26 +88,212 @@ impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
}
}
/// `RangeReaderAdapter` bridges `RangeReader` and `AsyncRead + AsyncSeek`.
pub struct RangeReaderAdapter<R>(pub R);
/// `AsyncReadAdapter` adapts a `RangeReader` to an `AsyncRead`.
#[pin_project]
pub struct AsyncReadAdapter<R> {
/// The inner `RangeReader`.
/// Use `Mutex` to get rid of the borrow checker issue.
inner: Arc<Mutex<R>>,
/// The current position from the view of the reader.
position: u64,
/// The buffer for the read bytes.
buffer: Vec<u8>,
/// The length of the content.
content_length: u64,
/// The future for reading the next bytes.
#[pin]
read_fut: Option<Pin<Box<dyn Future<Output = io::Result<Bytes>> + Send>>>,
}
impl<R: RangeReader + 'static> AsyncReadAdapter<R> {
pub async fn new(inner: R) -> io::Result<Self> {
let mut inner = inner;
let metadata = inner.metadata().await?;
Ok(AsyncReadAdapter {
inner: Arc::new(Mutex::new(inner)),
position: 0,
buffer: Vec::new(),
content_length: metadata.content_length,
read_fut: None,
})
}
}
/// The maximum size per read for the inner reader in `AsyncReadAdapter`.
const MAX_SIZE_PER_READ: usize = 8 * 1024 * 1024; // 8MB
impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
let mut this = self.as_mut().project();
if *this.position >= *this.content_length {
return Poll::Ready(Ok(0));
}
if !this.buffer.is_empty() {
let to_read = this.buffer.len().min(buf.len());
buf[..to_read].copy_from_slice(&this.buffer[..to_read]);
this.buffer.drain(..to_read);
*this.position += to_read as u64;
return Poll::Ready(Ok(to_read));
}
if this.read_fut.is_none() {
let size = (*this.content_length - *this.position).min(MAX_SIZE_PER_READ as u64);
let range = *this.position..(*this.position + size);
let inner = this.inner.clone();
let fut = async move {
let mut inner = inner.lock().await;
inner.read(range).await
};
*this.read_fut = Some(Box::pin(fut));
}
match this
.read_fut
.as_mut()
.as_pin_mut()
.expect("checked above")
.poll(cx)
{
Poll::Pending => Poll::Pending,
Poll::Ready(Ok(bytes)) => {
*this.read_fut = None;
if !bytes.is_empty() {
this.buffer.extend_from_slice(&bytes);
self.poll_read(cx, buf)
} else {
Poll::Ready(Ok(0))
}
}
Poll::Ready(Err(e)) => {
*this.read_fut = None;
Poll::Ready(Err(e))
}
}
}
}
/// Implements `RangeReader` for a type that implements `AsyncRead + AsyncSeek`.
///
/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`.
/// Until the codebase is fully ported to `RangeReader`, remove this implementation.
#[async_trait]
impl<R: futures::AsyncRead + futures::AsyncSeek + Send + Unpin> RangeReader
for RangeReaderAdapter<R>
{
impl RangeReader for Vec<u8> {
async fn metadata(&mut self) -> io::Result<Metadata> {
let content_length = self.0.seek(io::SeekFrom::End(0)).await?;
Ok(Metadata { content_length })
Ok(Metadata {
content_length: self.len() as u64,
})
}
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
range.end = range.end.min(self.len() as u64);
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
Ok(bytes)
}
}
/// `FileReader` is a `RangeReader` for reading a file.
pub struct FileReader {
content_length: u64,
position: u64,
file: tokio::fs::File,
}
impl FileReader {
/// Creates a new `FileReader` for the file at the given path.
pub async fn new(path: impl AsRef<Path>) -> io::Result<Self> {
let file = tokio::fs::File::open(path).await?;
let metadata = file.metadata().await?;
Ok(FileReader {
content_length: metadata.len(),
position: 0,
file,
})
}
}
#[async_trait]
impl RangeReader for FileReader {
async fn metadata(&mut self) -> io::Result<Metadata> {
Ok(Metadata {
content_length: self.content_length,
})
}
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
if range.start != self.position {
self.file.seek(io::SeekFrom::Start(range.start)).await?;
self.position = range.start;
}
range.end = range.end.min(self.content_length);
if range.end <= self.position {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Start of range is out of bounds",
));
}
let mut buf = vec![0; (range.end - range.start) as usize];
self.0.seek(io::SeekFrom::Start(range.start)).await?;
self.0.read_exact(&mut buf).await?;
self.file.read_exact(&mut buf).await?;
self.position = range.end;
Ok(Bytes::from(buf))
}
}
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_named_temp_file;
use futures::io::AsyncReadExt as _;
use super::*;
#[tokio::test]
async fn test_async_read_adapter() {
let data = b"hello world";
let reader = Vec::from(data);
let mut adapter = AsyncReadAdapter::new(reader).await.unwrap();
let mut buf = Vec::new();
adapter.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, data);
}
#[tokio::test]
async fn test_async_read_adapter_large() {
let data = (0..20 * 1024 * 1024).map(|i| i as u8).collect::<Vec<u8>>();
let mut adapter = AsyncReadAdapter::new(data.clone()).await.unwrap();
let mut buf = Vec::new();
adapter.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, data);
}
#[tokio::test]
async fn test_file_reader() {
let file = create_named_temp_file();
let path = file.path();
let data = b"hello world";
tokio::fs::write(path, data).await.unwrap();
let mut reader = FileReader::new(path).await.unwrap();
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.content_length, data.len() as u64);
let bytes = reader.read(0..metadata.content_length).await.unwrap();
assert_eq!(&*bytes, data);
let bytes = reader.read(0..5).await.unwrap();
assert_eq!(&*bytes, &data[..5]);
}
}

View File

@@ -80,9 +80,7 @@ impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
#[cfg(test)]
mod tests {
use common_base::bit_vec::prelude::*;
use common_base::range_read::RangeReaderAdapter;
use fst::MapBuilder;
use futures::io::Cursor;
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
use prost::Message;
@@ -163,8 +161,7 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_metadata() {
let blob = create_inverted_index_blob();
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let mut blob_reader = InvertedIndexBlobReader::new(blob);
let metas = blob_reader.metadata().await.unwrap();
assert_eq!(metas.metas.len(), 2);
@@ -191,8 +188,7 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_fst() {
let blob = create_inverted_index_blob();
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let mut blob_reader = InvertedIndexBlobReader::new(blob);
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();
@@ -224,8 +220,7 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_bitmap() {
let blob = create_inverted_index_blob();
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let mut blob_reader = InvertedIndexBlobReader::new(blob);
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();

View File

@@ -113,8 +113,6 @@ impl<R: RangeReader> InvertedIndeFooterReader<R> {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReaderAdapter;
use futures::io::Cursor;
use prost::Message;
use super::*;
@@ -141,10 +139,9 @@ mod tests {
..Default::default()
};
let payload_buf = create_test_payload(meta);
let mut payload_buf = create_test_payload(meta);
let blob_size = payload_buf.len() as u64;
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
let payload_size = reader.read_payload_size().await.unwrap();
let metas = reader.read_payload(payload_size).await.unwrap();
@@ -164,8 +161,7 @@ mod tests {
let mut payload_buf = create_test_payload(meta);
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
let blob_size = payload_buf.len() as u64;
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
let payload_size_result = reader.read_payload_size().await;
assert!(payload_size_result.is_err());
@@ -180,10 +176,9 @@ mod tests {
..Default::default()
};
let payload_buf = create_test_payload(meta);
let mut payload_buf = create_test_payload(meta);
let blob_size = payload_buf.len() as u64;
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size);
let payload_size = reader.read_payload_size().await.unwrap();
let payload_result = reader.read_payload(payload_size).await;

View File

@@ -99,8 +99,6 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexBlobWriter<W> {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReaderAdapter;
use futures::io::Cursor;
use futures::stream;
use super::*;
@@ -120,8 +118,7 @@ mod tests {
.await
.unwrap();
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut reader = InvertedIndexBlobReader::new(cursor);
let mut reader = InvertedIndexBlobReader::new(blob);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.total_row_count, 8);
assert_eq!(metadata.segment_row_count, 1);
@@ -161,8 +158,7 @@ mod tests {
.await
.unwrap();
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut reader = InvertedIndexBlobReader::new(cursor);
let mut reader = InvertedIndexBlobReader::new(blob);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.total_row_count, 8);
assert_eq!(metadata.segment_row_count, 1);

View File

@@ -16,7 +16,6 @@ pub mod builder;
use std::sync::Arc;
use common_base::range_read::RangeReaderAdapter;
use common_telemetry::warn;
use index::inverted_index::format::reader::InvertedIndexBlobReader;
use index::inverted_index::search::index_apply::{
@@ -109,7 +108,6 @@ impl InvertedIndexApplier {
self.remote_blob_reader(file_id).await?
}
};
let blob = RangeReaderAdapter(blob);
if let Some(index_cache) = &self.inverted_index_cache {
let mut index_reader = CachedInvertedIndexBlobReader::new(

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use object_store::{FuturesAsyncReader, FuturesAsyncWriter, ObjectStore};
use object_store::{FuturesAsyncWriter, ObjectStore};
use puffin::error::{self as puffin_error, Result as PuffinResult};
use puffin::puffin_manager::file_accessor::PuffinFileAccessor;
use puffin::puffin_manager::fs_puffin_manager::FsPuffinManager;
@@ -28,11 +28,11 @@ use snafu::ResultExt;
use crate::error::{PuffinInitStagerSnafu, Result};
use crate::metrics::{
INDEX_PUFFIN_FLUSH_OP_TOTAL, INDEX_PUFFIN_READ_BYTES_TOTAL, INDEX_PUFFIN_READ_OP_TOTAL,
INDEX_PUFFIN_SEEK_OP_TOTAL, INDEX_PUFFIN_WRITE_BYTES_TOTAL, INDEX_PUFFIN_WRITE_OP_TOTAL,
INDEX_PUFFIN_WRITE_BYTES_TOTAL, INDEX_PUFFIN_WRITE_OP_TOTAL,
};
use crate::sst::index::store::{self, InstrumentedStore};
type InstrumentedAsyncRead = store::InstrumentedAsyncRead<'static, FuturesAsyncReader>;
type InstrumentedRangeReader = store::InstrumentedRangeReader<'static>;
type InstrumentedAsyncWrite = store::InstrumentedAsyncWrite<'static, FuturesAsyncWriter>;
pub(crate) type SstPuffinManager =
@@ -115,16 +115,15 @@ impl ObjectStorePuffinFileAccessor {
#[async_trait]
impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
type Reader = InstrumentedAsyncRead;
type Reader = InstrumentedRangeReader;
type Writer = InstrumentedAsyncWrite;
async fn reader(&self, puffin_file_name: &str) -> PuffinResult<Self::Reader> {
self.object_store
.reader(
.range_reader(
puffin_file_name,
&INDEX_PUFFIN_READ_BYTES_TOTAL,
&INDEX_PUFFIN_READ_OP_TOTAL,
&INDEX_PUFFIN_SEEK_OP_TOTAL,
)
.await
.map_err(BoxedError::new)
@@ -147,9 +146,9 @@ impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReader;
use common_test_util::temp_dir::create_temp_dir;
use futures::io::Cursor;
use futures::AsyncReadExt;
use object_store::services::Memory;
use puffin::blob_metadata::CompressionCodec;
use puffin::puffin_manager::{DirGuard, PuffinManager, PuffinReader, PuffinWriter, PutOptions};
@@ -193,9 +192,9 @@ mod tests {
let reader = manager.reader(file_name).await.unwrap();
let blob_guard = reader.blob(blob_key).await.unwrap();
let mut blob_reader = blob_guard.reader().await.unwrap();
let mut buf = Vec::new();
blob_reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, raw_data);
let meta = blob_reader.metadata().await.unwrap();
let bs = blob_reader.read(0..meta.content_length).await.unwrap();
assert_eq!(&*bs, raw_data);
let dir_guard = reader.dir(dir_key).await.unwrap();
let file = dir_guard.path().join("hello");

View File

@@ -13,9 +13,13 @@
// limitations under the License.
use std::io;
use std::ops::Range;
use std::pin::Pin;
use std::task::{Context, Poll};
use async_trait::async_trait;
use bytes::{BufMut, Bytes};
use common_base::range_read::{Metadata, RangeReader};
use futures::{AsyncRead, AsyncSeek, AsyncWrite};
use object_store::ObjectStore;
use pin_project::pin_project;
@@ -51,6 +55,22 @@ impl InstrumentedStore {
self
}
/// Returns an [`InstrumentedRangeReader`] for the given path.
/// Metrics like the number of bytes read are recorded using the provided `IntCounter`.
pub async fn range_reader<'a>(
&self,
path: &str,
read_byte_count: &'a IntCounter,
read_count: &'a IntCounter,
) -> Result<InstrumentedRangeReader<'a>> {
Ok(InstrumentedRangeReader {
store: self.object_store.clone(),
path: path.to_string(),
read_byte_count,
read_count,
})
}
/// Returns an [`InstrumentedAsyncRead`] for the given path.
/// Metrics like the number of bytes read, read and seek operations
/// are recorded using the provided `IntCounter`s.
@@ -236,6 +256,56 @@ impl<W: AsyncWrite + Unpin + Send> AsyncWrite for InstrumentedAsyncWrite<'_, W>
}
}
/// Implements `RangeReader` for `ObjectStore` and record metrics.
pub(crate) struct InstrumentedRangeReader<'a> {
store: ObjectStore,
path: String,
read_byte_count: &'a IntCounter,
read_count: &'a IntCounter,
}
#[async_trait]
impl RangeReader for InstrumentedRangeReader<'_> {
async fn metadata(&mut self) -> io::Result<Metadata> {
let stat = self.store.stat(&self.path).await?;
Ok(Metadata {
content_length: stat.content_length(),
})
}
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
let buf = self.store.reader(&self.path).await?.read(range).await?;
self.read_byte_count.inc_by(buf.len() as _);
self.read_count.inc_by(1);
Ok(buf.to_bytes())
}
async fn read_into(
&mut self,
range: Range<u64>,
buf: &mut (impl BufMut + Send),
) -> io::Result<()> {
let reader = self.store.reader(&self.path).await?;
let size = reader.read_into(buf, range).await?;
self.read_byte_count.inc_by(size as _);
self.read_count.inc_by(1);
Ok(())
}
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
let bufs = self
.store
.reader(&self.path)
.await?
.fetch(ranges.to_owned())
.await?;
let total_size: usize = bufs.iter().map(|buf| buf.len()).sum();
self.read_byte_count.inc_by(total_size as _);
self.read_count.inc_by(1);
Ok(bufs.into_iter().map(|buf| buf.to_bytes()).collect())
}
}
/// A guard that increments a counter when dropped.
struct CounterGuard<'a> {
count: usize,

View File

@@ -14,6 +14,8 @@ async-walkdir = "2.0.0"
auto_impl = "1.2.0"
base64.workspace = true
bitflags.workspace = true
bytes.workspace = true
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-runtime.workspace = true

View File

@@ -16,6 +16,7 @@ mod file;
mod footer;
use async_trait::async_trait;
use common_base::range_read::RangeReader;
use crate::blob_metadata::BlobMetadata;
use crate::error::Result;
@@ -38,7 +39,7 @@ pub trait SyncReader<'a> {
/// `AsyncReader` defines an asynchronous reader for puffin data.
#[async_trait]
pub trait AsyncReader<'a> {
type Reader: futures::AsyncRead + futures::AsyncSeek;
type Reader: RangeReader;
/// Fetches the FileMetadata.
async fn metadata(&'a mut self) -> Result<FileMetadata>;

View File

@@ -15,7 +15,7 @@
use std::io::{self, SeekFrom};
use async_trait::async_trait;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use common_base::range_read::RangeReader;
use snafu::{ensure, ResultExt};
use crate::blob_metadata::BlobMetadata;
@@ -112,7 +112,7 @@ impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader<R> {
}
#[async_trait]
impl<'a, R: AsyncRead + AsyncSeek + Unpin + Send + 'a> AsyncReader<'a> for PuffinFileReader<R> {
impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader<R> {
type Reader = PartialReader<&'a mut R>;
async fn metadata(&'a mut self) -> Result<FileMetadata> {
@@ -121,12 +121,8 @@ impl<'a, R: AsyncRead + AsyncSeek + Unpin + Send + 'a> AsyncReader<'a> for Puffi
}
// check the magic
let mut magic = [0; MAGIC_SIZE as usize];
self.source
.read_exact(&mut magic)
.await
.context(ReadSnafu)?;
ensure!(magic == MAGIC, MagicNotMatchedSnafu);
let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?;
ensure!(*magic == MAGIC, MagicNotMatchedSnafu);
let file_size = self.get_file_size_async().await?;
@@ -155,13 +151,14 @@ impl<R: io::Read + io::Seek> PuffinFileReader<R> {
}
}
impl<R: AsyncRead + AsyncSeek + Send + Unpin> PuffinFileReader<R> {
impl<R: RangeReader> PuffinFileReader<R> {
async fn get_file_size_async(&mut self) -> Result<u64> {
let file_size = self
.source
.seek(SeekFrom::End(0))
.metadata()
.await
.context(SeekSnafu)?;
.context(ReadSnafu)?
.content_length;
Self::validate_file_size(file_size)?;
Ok(file_size)
}

View File

@@ -14,7 +14,7 @@
use std::io::{self, Cursor, SeekFrom};
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use common_base::range_read::RangeReader;
use snafu::{ensure, ResultExt};
use crate::error::{
@@ -72,24 +72,20 @@ impl<R: io::Read + io::Seek> FooterParser<R> {
}
}
impl<R: AsyncRead + AsyncSeek + Unpin> FooterParser<R> {
impl<R: RangeReader> FooterParser<R> {
/// Parses the footer from the IO source in a asynchronous manner.
pub async fn parse_async(&mut self) -> Result<FileMetadata> {
let mut parser = StageParser::new(self.file_size);
let mut buf = vec![];
while let Some(byte_to_read) = parser.next_to_read() {
buf.clear();
let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size;
self.source
.seek(SeekFrom::Start(byte_to_read.offset))
.read_into(range, &mut buf)
.await
.context(SeekSnafu)?;
let size = byte_to_read.size as usize;
buf.resize(size, 0);
let buf = &mut buf[..size];
self.source.read_exact(buf).await.context(ReadSnafu)?;
parser.consume_bytes(buf)?;
.context(ReadSnafu)?;
parser.consume_bytes(&buf)?;
}
parser.finish()

View File

@@ -13,184 +13,121 @@
// limitations under the License.
use std::io;
use std::pin::Pin;
use std::task::{Context, Poll};
use std::ops::Range;
use futures::{ready, AsyncRead, AsyncSeek};
use async_trait::async_trait;
use bytes::{BufMut, Bytes};
use common_base::range_read::{Metadata, RangeReader};
use crate::partial_reader::position::position_after_seek;
use crate::partial_reader::PartialReader;
impl<R: AsyncRead + AsyncSeek + Unpin> AsyncRead for PartialReader<R> {
fn poll_read(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut [u8],
) -> Poll<io::Result<usize>> {
// past end of portion
if self.position() > self.size() {
return Poll::Ready(Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid read past the end of the portion",
)));
}
// end of portion
if self.is_eof() {
return Poll::Ready(Ok(0));
}
// first read, seek to the correct offset
if self.position_in_portion.is_none() {
// seek operation
let seek_from = io::SeekFrom::Start(self.offset);
ready!(self.as_mut().project().source.poll_seek(cx, seek_from))?;
self.position_in_portion = Some(0);
}
// prevent reading over the end
let max_len = (self.size() - self.position_in_portion.unwrap()) as usize;
let actual_len = max_len.min(buf.len());
// create a limited reader
let target_buf = &mut buf[..actual_len];
// read operation
let read_bytes = ready!(self.as_mut().project().source.poll_read(cx, target_buf))?;
self.position_in_portion = Some(self.position() + read_bytes as u64);
Poll::Ready(Ok(read_bytes))
#[async_trait]
impl<R: RangeReader> RangeReader for PartialReader<R> {
async fn metadata(&mut self) -> io::Result<Metadata> {
Ok(Metadata {
content_length: self.size,
})
}
}
impl<R: AsyncRead + AsyncSeek + Unpin> AsyncSeek for PartialReader<R> {
fn poll_seek(
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
pos: io::SeekFrom,
) -> Poll<io::Result<u64>> {
let new_position = position_after_seek(pos, self.position(), self.size())?;
let pos = io::SeekFrom::Start(self.offset + new_position);
ready!(self.as_mut().project().source.poll_seek(cx, pos))?;
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
let absolute_range_start = self.offset + range.start;
if absolute_range_start >= self.offset + self.size {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Start of range is out of bounds",
));
}
let absolute_range_end = (self.offset + range.end).min(self.offset + self.size);
let absolute_range = absolute_range_start..absolute_range_end;
self.position_in_portion = Some(new_position);
Poll::Ready(Ok(new_position))
let result = self.source.read(absolute_range.clone()).await?;
self.position_in_portion = Some(absolute_range.end);
Ok(result)
}
async fn read_into(
&mut self,
range: Range<u64>,
buf: &mut (impl BufMut + Send),
) -> io::Result<()> {
let absolute_range_start = self.offset + range.start;
if absolute_range_start >= self.offset + self.size {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Start of range is out of bounds",
));
}
let absolute_range_end = (self.offset + range.end).min(self.offset + self.size);
let absolute_range = absolute_range_start..absolute_range_end;
self.source.read_into(absolute_range.clone(), buf).await?;
self.position_in_portion = Some(absolute_range.end);
Ok(())
}
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
let absolute_ranges = ranges
.iter()
.map(|range| {
let start = self.offset + range.start;
if start >= self.offset + self.size {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
"Start of range is out of bounds",
));
}
let end = (self.offset + range.end).min(self.offset + self.size);
Ok(start..end)
})
.collect::<io::Result<Vec<_>>>()?;
let results = self.source.read_vec(&absolute_ranges).await?;
if let Some(last_range) = absolute_ranges.last() {
self.position_in_portion = Some(last_range.end);
}
Ok(results)
}
}
#[cfg(test)]
mod tests {
use futures::io::Cursor;
use futures::{AsyncReadExt as _, AsyncSeekExt as _};
use super::*;
#[tokio::test]
async fn read_all_data_in_portion() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data.clone()), 0, 100);
let mut buf = vec![0; 100];
assert_eq!(reader.read(&mut buf).await.unwrap(), 100);
assert_eq!(buf, data);
let mut reader = PartialReader::new(data.clone(), 0, 100);
let buf = reader.read(0..100).await.unwrap();
assert_eq!(*buf, data);
}
#[tokio::test]
async fn read_part_of_data_in_portion() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
let mut buf = vec![0; 30];
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
assert_eq!(buf, (10..40).collect::<Vec<u8>>());
}
#[tokio::test]
async fn seek_and_read_data_in_portion() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
let mut buf = vec![0; 10];
assert_eq!(reader.read(&mut buf).await.unwrap(), 10);
assert_eq!(buf, (20..30).collect::<Vec<u8>>());
}
#[tokio::test]
async fn read_past_end_of_portion_is_eof() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
let mut buf = vec![0; 50];
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
assert_eq!(reader.read(&mut buf).await.unwrap(), 0); // hit EOF
let mut reader = PartialReader::new(data, 10, 30);
let buf = reader.read(0..30).await.unwrap();
assert_eq!(*buf, (10..40).collect::<Vec<u8>>());
}
#[tokio::test]
async fn seek_past_end_of_portion_returns_error() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
let mut reader = PartialReader::new(data, 10, 30);
// seeking past the portion returns an error
assert!(reader.seek(io::SeekFrom::Start(31)).await.is_err());
}
#[tokio::test]
async fn seek_to_negative_position_returns_error() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
// seeking back to the start of the portion
assert_eq!(reader.seek(io::SeekFrom::Current(-10)).await.unwrap(), 0);
// seeking to a negative position returns an error
assert!(reader.seek(io::SeekFrom::Current(-1)).await.is_err());
}
#[tokio::test]
async fn seek_from_end_of_portion() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
let mut buf = vec![0; 10];
// seek to 10 bytes before the end of the portion
assert_eq!(reader.seek(io::SeekFrom::End(-10)).await.unwrap(), 20);
assert_eq!(reader.read(&mut buf).await.unwrap(), 10);
// the final 10 bytes of the portion
assert_eq!(buf, (30..40).collect::<Vec<u8>>());
assert!(reader.is_eof());
}
#[tokio::test]
async fn seek_from_end_to_negative_position_returns_error() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data.clone()), 10, 30);
// seeking to a negative position returns an error
assert!(reader.seek(io::SeekFrom::End(-31)).await.is_err());
}
#[tokio::test]
async fn zero_length_portion_returns_zero_on_read() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 0);
let mut buf = vec![0; 10];
// reading a portion with zero length returns 0 bytes
assert_eq!(reader.read(&mut buf).await.unwrap(), 0);
assert!(reader.read(31..32).await.is_err());
}
#[tokio::test]
async fn is_eof_returns_true_at_end_of_portion() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
let mut reader = PartialReader::new(data, 10, 30);
// we are not at the end of the portion
assert!(!reader.is_eof());
let mut buf = vec![0; 30];
assert_eq!(reader.read(&mut buf).await.unwrap(), 30);
let _ = reader.read(0..20).await.unwrap();
// we are at the end of the portion
assert!(reader.is_eof());
}
#[tokio::test]
async fn position_resets_after_seek_to_start() {
let data: Vec<u8> = (0..100).collect();
let mut reader = PartialReader::new(Cursor::new(data), 10, 30);
assert_eq!(reader.seek(io::SeekFrom::Start(10)).await.unwrap(), 10);
assert_eq!(reader.position(), 10);
assert_eq!(reader.seek(io::SeekFrom::Start(0)).await.unwrap(), 0);
assert_eq!(reader.position(), 0);
}
}

View File

@@ -22,7 +22,8 @@ mod tests;
use std::path::PathBuf;
use async_trait::async_trait;
use futures::{AsyncRead, AsyncSeek};
use common_base::range_read::RangeReader;
use futures::AsyncRead;
use crate::blob_metadata::CompressionCodec;
use crate::error::Result;
@@ -94,7 +95,7 @@ pub trait PuffinReader {
#[async_trait]
#[auto_impl::auto_impl(Arc)]
pub trait BlobGuard {
type Reader: AsyncRead + AsyncSeek + Unpin;
type Reader: RangeReader;
async fn reader(&self) -> Result<Self::Reader>;
}

View File

@@ -13,7 +13,8 @@
// limitations under the License.
use async_trait::async_trait;
use futures::{AsyncRead, AsyncSeek, AsyncWrite};
use common_base::range_read::RangeReader;
use futures::AsyncWrite;
use crate::error::Result;
@@ -21,7 +22,7 @@ use crate::error::Result;
#[async_trait]
#[auto_impl::auto_impl(Arc)]
pub trait PuffinFileAccessor: Send + Sync + 'static {
type Reader: AsyncRead + AsyncSeek + Unpin + Send + Sync;
type Reader: RangeReader + Sync;
type Writer: AsyncWrite + Unpin + Send;
/// Opens a reader for the given puffin file.

View File

@@ -20,8 +20,8 @@ use async_trait::async_trait;
pub use reader::FsPuffinReader;
pub use writer::FsPuffinWriter;
use super::file_accessor::PuffinFileAccessor;
use crate::error::Result;
use crate::puffin_manager::file_accessor::PuffinFileAccessor;
use crate::puffin_manager::stager::Stager;
use crate::puffin_manager::PuffinManager;

View File

@@ -12,19 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::pin::Pin;
use std::task::{Context, Poll};
use std::io;
use std::ops::Range;
use async_compression::futures::bufread::ZstdDecoder;
use async_trait::async_trait;
use bytes::{BufMut, Bytes};
use common_base::range_read::{AsyncReadAdapter, Metadata, RangeReader};
use futures::io::BufReader;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncWrite};
use futures::{AsyncRead, AsyncWrite};
use snafu::{ensure, OptionExt, ResultExt};
use crate::blob_metadata::{BlobMetadata, CompressionCodec};
use crate::error::{
BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu,
ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu,
};
use crate::file_format::reader::{AsyncReader, PuffinFileReader};
use crate::partial_reader::PartialReader;
@@ -132,11 +134,12 @@ where
F: PuffinFileAccessor + Clone,
{
async fn init_blob_to_stager(
mut reader: PuffinFileReader<F::Reader>,
reader: PuffinFileReader<F::Reader>,
blob_metadata: BlobMetadata,
mut writer: BoxWriter,
) -> Result<u64> {
let reader = reader.blob_reader(&blob_metadata)?;
let reader = reader.into_blob_reader(&blob_metadata);
let reader = AsyncReadAdapter::new(reader).await.context(MetadataSnafu)?;
let compression = blob_metadata.compression_codec;
let size = Self::handle_decompress(reader, &mut writer, compression).await?;
Ok(size)
@@ -159,10 +162,12 @@ where
.context(BlobNotFoundSnafu { blob: key })?;
let mut reader = file.blob_reader(blob_metadata)?;
let mut buf = vec![];
reader.read_to_end(&mut buf).await.context(ReadSnafu)?;
let dir_meta: DirMetadata =
serde_json::from_slice(buf.as_slice()).context(DeserializeJsonSnafu)?;
let meta = reader.metadata().await.context(MetadataSnafu)?;
let buf = reader
.read(0..meta.content_length)
.await
.context(ReadSnafu)?;
let dir_meta: DirMetadata = serde_json::from_slice(&buf).context(DeserializeJsonSnafu)?;
let mut tasks = vec![];
for file_meta in dir_meta.files {
@@ -185,8 +190,8 @@ where
let reader = accessor.reader(&puffin_file_name).await?;
let writer = writer_provider.writer(&file_meta.relative_path).await?;
let task = common_runtime::spawn_global(async move {
let mut file = PuffinFileReader::new(reader);
let reader = file.blob_reader(&blob_meta)?;
let reader = PuffinFileReader::new(reader).into_blob_reader(&blob_meta);
let reader = AsyncReadAdapter::new(reader).await.context(MetadataSnafu)?;
let compression = blob_meta.compression_codec;
let size = Self::handle_decompress(reader, writer, compression).await?;
Ok(size)
@@ -256,43 +261,45 @@ impl<F: PuffinFileAccessor + Clone> BlobGuard for RandomReadBlob<F> {
/// `Either` is a type that represents either `A` or `B`.
///
/// Used to:
/// impl `AsyncRead + AsyncSeek` for `Either<A: AsyncRead + AsyncSeek, B: AsyncRead + AsyncSeek>`,
/// impl `RangeReader` for `Either<A: RangeReader, B: RangeReader>`,
/// impl `BlobGuard` for `Either<A: BlobGuard, B: BlobGuard>`.
pub enum Either<A, B> {
L(A),
R(B),
}
impl<A, B> AsyncRead for Either<A, B>
#[async_trait]
impl<A, B> RangeReader for Either<A, B>
where
A: AsyncRead + Unpin,
B: AsyncRead + Unpin,
A: RangeReader,
B: RangeReader,
{
fn poll_read(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
buf: &mut [u8],
) -> Poll<std::io::Result<usize>> {
match self.get_mut() {
Either::L(a) => Pin::new(a).poll_read(cx, buf),
Either::R(b) => Pin::new(b).poll_read(cx, buf),
async fn metadata(&mut self) -> io::Result<Metadata> {
match self {
Either::L(a) => a.metadata().await,
Either::R(b) => b.metadata().await,
}
}
}
impl<A, B> AsyncSeek for Either<A, B>
where
A: AsyncSeek + Unpin,
B: AsyncSeek + Unpin,
{
fn poll_seek(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
pos: std::io::SeekFrom,
) -> Poll<std::io::Result<u64>> {
match self.get_mut() {
Either::L(a) => Pin::new(a).poll_seek(cx, pos),
Either::R(b) => Pin::new(b).poll_seek(cx, pos),
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
match self {
Either::L(a) => a.read(range).await,
Either::R(b) => b.read(range).await,
}
}
async fn read_into(
&mut self,
range: Range<u64>,
buf: &mut (impl BufMut + Send),
) -> io::Result<()> {
match self {
Either::L(a) => a.read_into(range, buf).await,
Either::R(b) => b.read_into(range, buf).await,
}
}
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
match self {
Either::L(a) => a.read_vec(ranges).await,
Either::R(b) => b.read_vec(ranges).await,
}
}
}

View File

@@ -21,6 +21,7 @@ use async_trait::async_trait;
use async_walkdir::{Filtering, WalkDir};
use base64::prelude::BASE64_URL_SAFE;
use base64::Engine;
use common_base::range_read::FileReader;
use common_runtime::runtime::RuntimeTrait;
use common_telemetry::{info, warn};
use futures::{FutureExt, StreamExt};
@@ -30,7 +31,7 @@ use snafu::ResultExt;
use tokio::fs;
use tokio::sync::mpsc::error::TrySendError;
use tokio::sync::mpsc::{Receiver, Sender};
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
use tokio_util::compat::TokioAsyncWriteCompatExt;
use crate::error::{
CacheGetSnafu, CreateSnafu, MetadataSnafu, OpenSnafu, ReadSnafu, RemoveSnafu, RenameSnafu,
@@ -427,11 +428,10 @@ pub struct FsBlobGuard {
#[async_trait]
impl BlobGuard for FsBlobGuard {
type Reader = Compat<fs::File>;
type Reader = FileReader;
async fn reader(&self) -> Result<Self::Reader> {
let file = fs::File::open(&self.path).await.context(OpenSnafu)?;
Ok(file.compat())
FileReader::new(&self.path).await.context(OpenSnafu)
}
}
@@ -530,8 +530,9 @@ impl BoundedStager {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReader;
use common_test_util::temp_dir::create_temp_dir;
use futures::{AsyncReadExt, AsyncWriteExt};
use futures::AsyncWriteExt;
use tokio::io::AsyncReadExt as _;
use super::*;
@@ -564,9 +565,9 @@ mod tests {
.await
.unwrap();
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"hello world");
let m = reader.metadata().await.unwrap();
let buf = reader.read(0..m.content_length).await.unwrap();
assert_eq!(&*buf, b"hello world");
let mut file = stager.must_get_file(puffin_file_name, key).await;
let mut buf = Vec::new();
@@ -695,9 +696,10 @@ mod tests {
.reader()
.await
.unwrap();
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"hello world");
let m = reader.metadata().await.unwrap();
let buf = reader.read(0..m.content_length).await.unwrap();
assert_eq!(&*buf, b"hello world");
let dir_path = stager
.get_dir(
@@ -751,9 +753,9 @@ mod tests {
stager.cache.run_pending_tasks().await;
assert!(!stager.in_cache(puffin_file_name, blob_key));
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"Hello world");
let m = reader.metadata().await.unwrap();
let buf = reader.read(0..m.content_length).await.unwrap();
assert_eq!(&*buf, b"Hello world");
// Second time to get the blob, get from recycle bin
let mut reader = stager
@@ -772,9 +774,9 @@ mod tests {
stager.cache.run_pending_tasks().await;
assert!(!stager.in_cache(puffin_file_name, blob_key));
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, b"Hello world");
let m = reader.metadata().await.unwrap();
let buf = reader.read(0..m.content_length).await.unwrap();
assert_eq!(&*buf, b"Hello world");
let dir_key = "dir_key";
let files_in_dir = [

View File

@@ -16,8 +16,8 @@ use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use common_base::range_read::{FileReader, RangeReader};
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use futures::AsyncReadExt as _;
use tokio::fs::File;
use tokio::io::AsyncReadExt as _;
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt};
@@ -93,10 +93,9 @@ async fn test_put_get_file() {
#[tokio::test]
async fn test_put_get_files() {
let capicities = [1, 16, u64::MAX];
let compression_codecs = [None, Some(CompressionCodec::Zstd)];
for capacity in capicities {
let compression_codecs = [None, Some(CompressionCodec::Zstd)];
for compression_codec in compression_codecs {
let (_staging_dir, stager) = new_bounded_stager("test_put_get_files_", capacity).await;
let file_accessor = Arc::new(MockFileAccessor::new("test_put_get_files_"));
@@ -299,9 +298,9 @@ async fn check_blob(
) {
let blob = puffin_reader.blob(key).await.unwrap();
let mut reader = blob.reader().await.unwrap();
let mut buf = Vec::new();
reader.read_to_end(&mut buf).await.unwrap();
assert_eq!(buf, raw_data);
let meta = reader.metadata().await.unwrap();
let bs = reader.read(0..meta.content_length).await.unwrap();
assert_eq!(&*bs, raw_data);
if !compressed {
// If the blob is not compressed, it won't be exist in the stager.
@@ -386,14 +385,13 @@ impl MockFileAccessor {
#[async_trait]
impl PuffinFileAccessor for MockFileAccessor {
type Reader = Compat<File>;
type Reader = FileReader;
type Writer = Compat<File>;
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader> {
let f = tokio::fs::File::open(self.tempdir.path().join(puffin_file_name))
Ok(FileReader::new(self.tempdir.path().join(puffin_file_name))
.await
.unwrap();
Ok(f.compat())
.unwrap())
}
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer> {

View File

@@ -15,10 +15,10 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{Cursor, Read};
use std::vec;
use common_base::range_read::{FileReader, RangeReader};
use futures::io::Cursor as AsyncCursor;
use futures::AsyncReadExt;
use tokio_util::compat::TokioAsyncReadCompatExt;
use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader};
use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter};
@@ -38,8 +38,8 @@ fn test_read_empty_puffin_sync() {
async fn test_read_empty_puffin_async() {
let path = "src/tests/resources/empty-puffin-uncompressed.puffin";
let file = tokio::fs::File::open(path).await.unwrap();
let mut reader = PuffinFileReader::new(file.compat());
let reader = FileReader::new(path).await.unwrap();
let mut reader = PuffinFileReader::new(reader);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.properties.len(), 0);
assert_eq!(metadata.blobs.len(), 0);
@@ -84,8 +84,8 @@ fn test_sample_metric_data_puffin_sync() {
async fn test_sample_metric_data_puffin_async() {
let path = "src/tests/resources/sample-metric-data-uncompressed.puffin";
let file = tokio::fs::File::open(path).await.unwrap();
let mut reader = PuffinFileReader::new(file.compat());
let reader = FileReader::new(path).await.unwrap();
let mut reader = PuffinFileReader::new(reader);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.properties.len(), 1);
@@ -104,13 +104,11 @@ async fn test_sample_metric_data_puffin_async() {
assert_eq!(metadata.blobs[1].length, 83);
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
let mut buf = String::new();
some_blob.read_to_string(&mut buf).await.unwrap();
assert_eq!(buf, "abcdefghi");
let buf = read_all_range(&mut some_blob).await;
assert_eq!(&buf, b"abcdefghi");
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
let mut buf = Vec::new();
some_other_blob.read_to_end(&mut buf).await.unwrap();
let buf = read_all_range(&mut some_other_blob).await;
let expected = include_bytes!("tests/resources/sample-metric-data.blob");
assert_eq!(buf, expected);
}
@@ -162,8 +160,7 @@ async fn test_writer_reader_empty_async() {
let written_bytes = writer.finish().await.unwrap();
assert!(written_bytes > 0);
let mut buf = AsyncCursor::new(buf.into_inner());
let mut reader = PuffinFileReader::new(&mut buf);
let mut reader = PuffinFileReader::new(buf.into_inner());
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.properties.len(), 1);
@@ -287,8 +284,7 @@ async fn test_writer_reader_async() {
let written_bytes = writer.finish().await.unwrap();
assert!(written_bytes > 0);
let mut buf = AsyncCursor::new(buf.into_inner());
let mut reader = PuffinFileReader::new(&mut buf);
let mut reader = PuffinFileReader::new(buf.into_inner());
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.properties.len(), 1);
@@ -307,16 +303,20 @@ async fn test_writer_reader_async() {
assert_eq!(metadata.blobs[1].length, 83);
let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap();
let mut buf = Vec::new();
some_blob.read_to_end(&mut buf).await.unwrap();
let buf = read_all_range(&mut some_blob).await;
assert_eq!(buf, blob1);
let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap();
let mut buf = Vec::new();
some_other_blob.read_to_end(&mut buf).await.unwrap();
let buf = read_all_range(&mut some_other_blob).await;
assert_eq!(buf, blob2);
}
test_writer_reader_async(false).await;
test_writer_reader_async(true).await;
}
async fn read_all_range(reader: &mut impl RangeReader) -> Vec<u8> {
let m = reader.metadata().await.unwrap();
let buf = reader.read(0..m.content_length).await.unwrap();
buf.to_vec()
}