feat: improve object storage cache (#2522)

* feat: refactor object storage cache with moka

* chore: minor fixes

* fix: concurrent issues and invalidate cache after write/delete

* chore: minor changes

* fix: cargo lock

* refactor: rename

* chore: change DEFAULT_OBJECT_STORE_CACHE_SIZE to 256Mib

* fix: typo

* chore: style

* fix: toml format

* chore: toml

* fix: toml format

* Update src/object-store/src/layers/lru_cache/read_cache.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update Cargo.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: update src/object-store/Cargo.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: refactor and apply suggestions

* fix: typo

* feat: adds back allow list for caching

* chore: cr suggestion

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: cr suggestion

Co-authored-by: Yingwen <realevenyag@gmail.com>

* refactor: wrap inner Accessor with Arc

* chore: remove run_pending_task in read and write path

* chore: the arc is unnecessary

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
dennis zhuang
2023-10-08 11:27:49 +08:00
committed by GitHub
parent 657542c0b8
commit ff15bc41d6
15 changed files with 499 additions and 304 deletions

View File

@@ -30,6 +30,8 @@ async fn test_object_crud(store: &ObjectStore) -> Result<()> {
// Create object handler.
// Write data info object;
let file_name = "test_file";
assert!(store.read(file_name).await.is_err());
store.write(file_name, "Hello, World!").await?;
// Read data from object;
@@ -80,6 +82,11 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> {
store.delete(p2).await?;
let entries = store.list("/").await?;
assert!(entries.is_empty());
assert!(store.read(p1).await.is_err());
assert!(store.read(p2).await.is_err());
assert!(store.read(p3).await.is_err());
Ok(())
}
@@ -210,12 +217,48 @@ async fn test_gcs_backend() -> Result<()> {
Ok(())
}
#[tokio::test]
async fn test_file_backend_with_lru_cache() -> Result<()> {
logging::init_default_ut_logging();
let data_dir = create_temp_dir("test_file_backend_with_lru_cache");
let tmp_dir = create_temp_dir("test_file_backend_with_lru_cache");
let mut builder = Fs::default();
let _ = builder
.root(&data_dir.path().to_string_lossy())
.atomic_write_dir(&tmp_dir.path().to_string_lossy());
let store = ObjectStore::new(builder).unwrap().finish();
let cache_dir = create_temp_dir("test_file_backend_with_lru_cache");
let cache_layer = {
let mut builder = Fs::default();
let _ = builder
.root(&cache_dir.path().to_string_lossy())
.atomic_write_dir(&cache_dir.path().to_string_lossy());
let file_cache = Arc::new(builder.build().unwrap());
LruCacheLayer::new(Arc::new(file_cache.clone()), 32)
.await
.unwrap()
};
let store = store.layer(cache_layer.clone());
test_object_crud(&store).await?;
test_object_list(&store).await?;
assert_eq!(cache_layer.read_cache_stat().await, (4, 0));
Ok(())
}
async fn assert_lru_cache<C: Accessor + Clone>(
cache_layer: &LruCacheLayer<C>,
file_names: &[&str],
) {
for file_name in file_names {
assert!(cache_layer.lru_contains_key(file_name).await);
assert!(cache_layer.contains_file(file_name).await);
}
}
@@ -265,11 +308,11 @@ async fn test_object_store_cache_policy() -> Result<()> {
let _ = builder
.root(&cache_dir.path().to_string_lossy())
.atomic_write_dir(&cache_dir.path().to_string_lossy());
let cache_accessor = Arc::new(builder.build().unwrap());
let cache_store = OperatorBuilder::new(cache_accessor.clone()).finish();
let file_cache = Arc::new(builder.build().unwrap());
let cache_store = OperatorBuilder::new(file_cache.clone()).finish();
// create operator for cache dir to verify cache file
let cache_layer = LruCacheLayer::new(Arc::new(cache_accessor.clone()), 3)
let cache_layer = LruCacheLayer::new(Arc::new(file_cache.clone()), 38)
.await
.unwrap();
let store = store.layer(cache_layer.clone());
@@ -281,13 +324,14 @@ async fn test_object_store_cache_policy() -> Result<()> {
store.write(p1, "Hello, object1!").await.unwrap();
store.write(p2, "Hello, object2!").await.unwrap();
// create cache by read object
// Try to read p1 and p2
let _ = store.read_with(p1).range(0..).await?;
let _ = store.read(p1).await?;
let _ = store.read_with(p2).range(0..).await?;
let _ = store.read_with(p2).range(7..).await?;
let _ = store.read(p2).await?;
assert_eq!(cache_layer.read_cache_stat().await, (3, 38));
assert_cache_files(
&cache_store,
&[
@@ -302,13 +346,16 @@ async fn test_object_store_cache_policy() -> Result<()> {
&cache_layer,
&[
"6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-",
"ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-",
"ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-",
],
)
.await;
// Delete p2 file
store.delete(p2).await.unwrap();
assert_eq!(cache_layer.read_cache_stat().await, (1, 15));
assert_cache_files(
&cache_store,
&["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"],
@@ -321,12 +368,17 @@ async fn test_object_store_cache_policy() -> Result<()> {
)
.await;
assert!(store.read(p2).await.is_err());
let p3 = "test_file3";
store.write(p3, "Hello, object3!").await.unwrap();
// Try to read p3
let _ = store.read(p3).await.unwrap();
let _ = store.read_with(p3).range(0..5).await.unwrap();
// The entry count is 4, because we have the p2 `NotFound` cache.
assert_eq!(cache_layer.read_cache_stat().await, (4, 35));
assert_cache_files(
&cache_store,
&[
@@ -347,6 +399,33 @@ async fn test_object_store_cache_policy() -> Result<()> {
)
.await;
// try to read p1, p2, p3
let _ = store.read(p3).await.unwrap();
let _ = store.read_with(p3).range(0..5).await.unwrap();
assert!(store.read(p2).await.is_err());
// Read p1 with range `1..` , the existing p1 with range `0..` must be evicted.
let _ = store.read_with(p1).range(1..15).await.unwrap();
assert_eq!(cache_layer.read_cache_stat().await, (4, 34));
assert_cache_files(
&cache_store,
&[
"6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4",
],
&["ello, object1!", "Hello, object3!", "Hello"],
)
.await?;
assert_lru_cache(
&cache_layer,
&[
"6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4",
],
)
.await;
let handle = metric::try_handle().unwrap();
let metric_text = handle.render();
@@ -354,14 +433,15 @@ async fn test_object_store_cache_policy() -> Result<()> {
assert!(metric_text.contains("object_store_lru_cache_miss"));
drop(cache_layer);
let cache_layer = LruCacheLayer::new(Arc::new(cache_accessor), 3)
.await
.unwrap();
// Test recover
let cache_layer = LruCacheLayer::new(Arc::new(file_cache), 38).await.unwrap();
// The p2 `NotFound` cache will not be recovered
assert_eq!(cache_layer.read_cache_stat().await, (3, 34));
assert_lru_cache(
&cache_layer,
&[
"6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-",
"6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-",
"a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4",
],