build(deps): upgrade opendal to 0.46 (#4037)

* build(deps): upgrade opendal to 0.46

Signed-off-by: tison <wander4096@gmail.com>

* migrate writes

Signed-off-by: tison <wander4096@gmail.com>

* migrate reads

Signed-off-by: tison <wander4096@gmail.com>

* fixup object safety

Signed-off-by: tison <wander4096@gmail.com>

* fixup names

Signed-off-by: tison <wander4096@gmail.com>

* fixup compilation

Signed-off-by: tison <wander4096@gmail.com>

* fixup compilation

Signed-off-by: tison <wander4096@gmail.com>

* a few Buffer to Vec

Signed-off-by: tison <wander4096@gmail.com>

* Make greptime buildable with opendal 0.46 (#5)

Signed-off-by: Xuanwo <github@xuanwo.io>

* fixup toml check

Signed-off-by: tison <wander4096@gmail.com>

* test_orc_opener

Signed-off-by: tison <wander4096@gmail.com>

* Fix lru cache (#6)

Signed-off-by: Xuanwo <github@xuanwo.io>

* clippy

Signed-off-by: tison <wander4096@gmail.com>

* improve comments

Signed-off-by: tison <wander4096@gmail.com>

* address comments

Signed-off-by: tison <wander4096@gmail.com>

* reduce buf copy

Signed-off-by: tison <wander4096@gmail.com>

* upgrade to reqwest 0.12

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
Signed-off-by: Xuanwo <github@xuanwo.io>
Co-authored-by: Xuanwo <github@xuanwo.io>
This commit is contained in:
tison
2024-05-27 17:12:23 +08:00
committed by GitHub
parent 20ce7d428d
commit f9db5ff0d6
31 changed files with 631 additions and 443 deletions

View File

@@ -112,6 +112,10 @@ impl FileCache {
self.memory_index.insert(key, value).await;
}
pub(crate) async fn get(&self, key: IndexKey) -> Option<IndexValue> {
self.memory_index.get(&key).await
}
/// Reads a file from the cache.
pub(crate) async fn reader(&self, key: IndexKey) -> Option<Reader> {
// We must use `get()` to update the estimator of the cache.
@@ -372,7 +376,6 @@ fn parse_index_key(name: &str) -> Option<IndexKey> {
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_temp_dir;
use futures::AsyncReadExt;
use object_store::services::Fs;
use super::*;
@@ -451,10 +454,9 @@ mod tests {
.await;
// Read file content.
let mut reader = cache.reader(key).await.unwrap();
let mut buf = String::new();
reader.read_to_string(&mut buf).await.unwrap();
assert_eq!("hello", buf);
let reader = cache.reader(key).await.unwrap();
let buf = reader.read(..).await.unwrap().to_vec();
assert_eq!("hello", String::from_utf8(buf).unwrap());
// Get weighted size.
cache.memory_index.run_pending_tasks().await;
@@ -549,10 +551,9 @@ mod tests {
for (i, file_id) in file_ids.iter().enumerate() {
let key = IndexKey::new(region_id, *file_id, file_type);
let mut reader = cache.reader(key).await.unwrap();
let mut buf = String::new();
reader.read_to_string(&mut buf).await.unwrap();
assert_eq!(i.to_string(), buf);
let reader = cache.reader(key).await.unwrap();
let buf = reader.read(..).await.unwrap().to_vec();
assert_eq!(i.to_string(), String::from_utf8(buf).unwrap());
}
}

View File

@@ -19,6 +19,7 @@ use std::time::Duration;
use common_base::readable_size::ReadableSize;
use common_telemetry::{debug, info};
use futures::AsyncWriteExt;
use object_store::manager::ObjectStoreManagerRef;
use object_store::ObjectStore;
use snafu::ResultExt;
@@ -175,19 +176,27 @@ impl WriteCache {
}])
.start_timer();
let cached_value = self
.file_cache
.local_store()
.stat(&cache_path)
.await
.context(error::OpenDalSnafu)?;
let reader = self
.file_cache
.local_store()
.reader(&cache_path)
.await
.context(error::OpenDalSnafu)?;
.context(error::OpenDalSnafu)?
.into_futures_async_read(0..cached_value.content_length());
let mut writer = remote_store
.writer_with(upload_path)
.buffer(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.concurrent(DEFAULT_WRITE_CONCURRENCY)
.await
.context(error::OpenDalSnafu)?;
.context(error::OpenDalSnafu)?
.into_futures_async_write();
let bytes_written =
futures::io::copy(reader, &mut writer)
@@ -199,7 +208,11 @@ impl WriteCache {
})?;
// Must close to upload all data.
writer.close().await.context(error::OpenDalSnafu)?;
writer.close().await.context(error::UploadSnafu {
region_id,
file_id,
file_type,
})?;
UPLOAD_BYTES_TOTAL.inc_by(bytes_written);
@@ -315,7 +328,7 @@ mod tests {
.read(&write_cache.file_cache.cache_file_path(key))
.await
.unwrap();
assert_eq!(remote_data, cache_data);
assert_eq!(remote_data.to_vec(), cache_data.to_vec());
// Check write cache contains the index key
let index_key = IndexKey::new(region_id, file_id, FileType::Puffin);
@@ -326,7 +339,7 @@ mod tests {
.read(&write_cache.file_cache.cache_file_path(index_key))
.await
.unwrap();
assert_eq!(remote_index_data, cache_index_data);
assert_eq!(remote_index_data.to_vec(), cache_index_data.to_vec());
}
#[tokio::test]

View File

@@ -489,7 +489,7 @@ impl ManifestObjectStore {
}
};
let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data)?;
let checkpoint_metadata = CheckpointMetadata::decode(&last_checkpoint_data.to_vec())?;
debug!(
"Load checkpoint in path: {}, metadata: {:?}",
@@ -501,7 +501,11 @@ impl ManifestObjectStore {
#[cfg(test)]
pub async fn read_file(&self, path: &str) -> Result<Vec<u8>> {
self.object_store.read(path).await.context(OpenDalSnafu)
self.object_store
.read(path)
.await
.context(OpenDalSnafu)
.map(|v| v.to_vec())
}
#[cfg(test)]

View File

@@ -121,9 +121,17 @@ impl SstIndexApplier {
return Ok(None);
};
let Some(indexed_value) = file_cache
.get(IndexKey::new(self.region_id, file_id, FileType::Puffin))
.await
else {
return Ok(None);
};
Ok(file_cache
.reader(IndexKey::new(self.region_id, file_id, FileType::Puffin))
.await
.map(|v| v.into_futures_async_read(0..indexed_value.file_size as u64))
.map(PuffinFileReader::new))
}
@@ -190,7 +198,13 @@ mod tests {
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let mut puffin_writer = PuffinFileWriter::new(object_store.writer(&path).await.unwrap());
let mut puffin_writer = PuffinFileWriter::new(
object_store
.writer(&path)
.await
.unwrap()
.into_futures_async_write(),
);
puffin_writer
.add_blob(Blob {
blob_type: INDEX_BLOB_TYPE.to_string(),
@@ -236,7 +250,13 @@ mod tests {
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let mut puffin_writer = PuffinFileWriter::new(object_store.writer(&path).await.unwrap());
let mut puffin_writer = PuffinFileWriter::new(
object_store
.writer(&path)
.await
.unwrap()
.into_futures_async_write(),
);
puffin_writer
.add_blob(Blob {
blob_type: "invalid_blob_type".to_string(),

View File

@@ -26,6 +26,8 @@ use crate::error::{OpenDalSnafu, Result};
/// A wrapper around [`ObjectStore`] that adds instrumentation for monitoring
/// metrics such as bytes read, bytes written, and the number of seek operations.
///
/// TODO: Consider refactor InstrumentedStore to use async in trait instead of AsyncRead.
#[derive(Clone)]
pub(crate) struct InstrumentedStore {
/// The underlying object store.
@@ -58,8 +60,14 @@ impl InstrumentedStore {
read_byte_count: &'a IntCounter,
read_count: &'a IntCounter,
seek_count: &'a IntCounter,
) -> Result<InstrumentedAsyncRead<'a, object_store::Reader>> {
let reader = self.object_store.reader(path).await.context(OpenDalSnafu)?;
) -> Result<InstrumentedAsyncRead<'a, object_store::FuturesAsyncReader>> {
let meta = self.object_store.stat(path).await.context(OpenDalSnafu)?;
let reader = self
.object_store
.reader(path)
.await
.context(OpenDalSnafu)?
.into_futures_async_read(0..meta.content_length());
Ok(InstrumentedAsyncRead::new(
reader,
read_byte_count,
@@ -77,15 +85,21 @@ impl InstrumentedStore {
write_byte_count: &'a IntCounter,
write_count: &'a IntCounter,
flush_count: &'a IntCounter,
) -> Result<InstrumentedAsyncWrite<'a, object_store::Writer>> {
) -> Result<InstrumentedAsyncWrite<'a, object_store::FuturesAsyncWriter>> {
let writer = match self.write_buffer_size {
Some(size) => self
.object_store
.writer_with(path)
.buffer(size)
.chunk(size)
.await
.context(OpenDalSnafu)?,
None => self.object_store.writer(path).await.context(OpenDalSnafu)?,
.context(OpenDalSnafu)?
.into_futures_async_write(),
None => self
.object_store
.writer(path)
.await
.context(OpenDalSnafu)?
.into_futures_async_write(),
};
Ok(InstrumentedAsyncWrite::new(
writer,

View File

@@ -121,7 +121,7 @@ async fn fetch_ranges_seq(
.read_with(&file_path)
.range(range.start..range.end)
.call()?;
Ok::<_, object_store::Error>(Bytes::from(data))
Ok::<_, object_store::Error>(data.to_bytes())
})
.collect::<object_store::Result<Vec<_>>>()
};
@@ -141,7 +141,7 @@ async fn fetch_ranges_concurrent(
let future_read = object_store.read_with(file_path);
handles.push(async move {
let data = future_read.range(range.start..range.end).await?;
Ok::<_, object_store::Error>(Bytes::from(data))
Ok::<_, object_store::Error>(data.to_bytes())
});
}
let results = futures::future::try_join_all(handles).await?;
@@ -164,7 +164,7 @@ where
}
}
// https://github.com/apache/incubator-opendal/blob/7144ab1ca2409dff0c324bfed062ce985997f8ce/core/src/raw/tokio_util.rs#L21-L23
// https://github.com/apache/opendal/blob/v0.46.0/core/src/raw/tokio_util.rs#L21-L24
/// Parse tokio error into opendal::Error.
fn new_task_join_error(e: tokio::task::JoinError) -> object_store::Error {
object_store::Error::new(ErrorKind::Unexpected, "tokio task join failed").set_source(e)

View File

@@ -85,7 +85,8 @@ impl<'a> MetadataLoader<'a> {
.read_with(path)
.range(buffer_start..file_size)
.await
.context(error::OpenDalSnafu)?;
.context(error::OpenDalSnafu)?
.to_vec();
let buffer_len = buffer.len();
let mut footer = [0; 8];
@@ -129,7 +130,8 @@ impl<'a> MetadataLoader<'a> {
.read_with(path)
.range(metadata_start..(file_size - FOOTER_SIZE as u64))
.await
.context(error::OpenDalSnafu)?;
.context(error::OpenDalSnafu)?
.to_vec();
let metadata = decode_metadata(&data).map_err(|e| {
error::InvalidParquetSnafu {

View File

@@ -16,6 +16,7 @@
use std::time::Duration;
use bytes::Bytes;
use common_telemetry::{error, info, warn};
use futures::TryStreamExt;
use object_store::util::join_path;
@@ -50,7 +51,7 @@ impl<S> RegionWorkerLoop<S> {
region
.access_layer
.object_store()
.write(&marker_path, vec![])
.write(&marker_path, Bytes::new())
.await
.context(OpenDalSnafu)
.inspect_err(|e| {