feat(inverted_index): inverted index cache (#4309)

* feat/inverted-index-cache:
 Update dependencies and add caching for inverted index reader

 - Updated `atomic` to 0.6.0 and `uuid` to 1.9.1 in `Cargo.lock`.
 - Added `moka` and `uuid` dependencies in `Cargo.toml`.
 - Introduced `seek_read` method in `InvertedIndexBlobReader` for common seek and read operations.
 - Added `cache.rs` module to implement caching for inverted index reader using `moka`.
 - Updated `async-compression` to 0.4.11 in `puffin/Cargo.toml`.

* feat/inverted-index-cache:
 Refactor InvertedIndexReader and Add Index Cache Support

 - Refactored `InvertedIndexReader` to include `seek_read` method and default implementations for `fst` and `bitmap`.
 - Implemented `seek_read` in `InvertedIndexBlobReader` and `CachedInvertedIndexBlobReader`.
 - Introduced `InvertedIndexCache` in `CacheManager` and `SstIndexApplier`.
 - Updated `SstIndexApplierBuilder` to accept and utilize `InvertedIndexCache`.
 - Added `From<FileId> for Uuid` implementation.

* feat/inverted-index-cache:
 Update Cargo.toml and refactor SstIndexApplier

 - Moved `uuid.workspace` entry in Cargo.toml for better organization.

* feat/inverted-index-cache:
 Refactor InvertedIndexCache to use type alias for Arc

 - Replaced `Arc<InvertedIndexCache>` with `InvertedIndexCacheRef` type alias.

* feat/inverted-index-cache:
 Add Prometheus metrics and caching improvements for inverted index

 - Introduced `prometheus` and `puffin` dependencies for metrics.

* feat/inverted-index-cache:
 Refactor InvertedIndexReader and Cache handling

 - Simplified `InvertedIndexReader` trait by removing seek-related comments.

* feat/inverted-index-cache:
 Add configurable cache sizes for inverted index metadata and content
 - Introduced `index_metadata_size` and `index_content_size` in `CacheManagerBuilder`.

* feat/inverted-index-cache:
 Refactor and optimize inverted index caching

 - Removed `metrics.rs` and integrated cache metrics into `index.rs`.

* feat/inverted-index-cache:
 Remove unused dependencies from Cargo.lock and Cargo.toml

 - Removed `moka`, `prometheus`, and `puffin` dependencies from both Cargo.lock and Cargo.toml.

* feat/inverted-index-cache:
 Replace Uuid with FileId in CachedInvertedIndexBlobReader

 - Updated `file_id` type from `Uuid` to `FileId` in `CachedInvertedIndexBlobReader` and related methods.

* feat/inverted-index-cache:
 Refactor cache configuration for inverted index

 - Moved `inverted_index_metadata_cache_size` and `inverted_index_cache_size` from `MitoConfig` to `InvertedIndexConfig`.

* feat/inverted-index-cache:
 Remove unnecessary conversion of `file_id` in `SstIndexApplier`

 - Simplified the initialization of `CachedInvertedIndexBlobReader` by removing the redundant `into()` conversion for `file_id`.
This commit is contained in:
Lei, HUANG
2024-07-08 20:36:59 +08:00
committed by GitHub
parent 4811fe83f5
commit aa4d10eef7
20 changed files with 391 additions and 56 deletions

View File

@@ -29,6 +29,7 @@ snafu.workspace = true
tantivy = { version = "0.22", features = ["zstd-compression"] }
tantivy-jieba = "0.11.0"
tokio.workspace = true
uuid.workspace = true
[dev-dependencies]
common-test-util.workspace = true

View File

@@ -12,27 +12,41 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod blob;
mod footer;
use std::sync::Arc;
use async_trait::async_trait;
use common_base::BitVec;
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::ResultExt;
use crate::inverted_index::error::Result;
use crate::inverted_index::error::{DecodeFstSnafu, Result};
pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
use crate::inverted_index::FstMap;
mod blob;
mod footer;
/// InvertedIndexReader defines an asynchronous reader of inverted index data
#[mockall::automock]
#[async_trait]
pub trait InvertedIndexReader: Send {
/// Retrieve metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<InvertedIndexMetas>;
/// Reads all data to dest.
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;
/// Retrieve the finite state transducer (FST) map from the given offset and size.
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap>;
/// Seeks to given offset and reads data with exact size as provided.
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
/// Retrieve the bitmap from the given offset and size.
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec>;
/// Retrieves metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;
/// Retrieves the finite state transducer (FST) map from the given offset and size.
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
let fst_data = self.seek_read(offset, size).await?;
FstMap::new(fst_data).context(DecodeFstSnafu)
}
/// Retrieves the bitmap from the given offset and size.
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
self.seek_read(offset, size).await.map(BitVec::from_vec)
}
}

View File

@@ -13,18 +13,16 @@
// limitations under the License.
use std::io::SeekFrom;
use std::sync::Arc;
use async_trait::async_trait;
use common_base::BitVec;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
DecodeFstSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu,
};
use crate::inverted_index::error::{ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu};
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
use crate::inverted_index::format::reader::{FstMap, InvertedIndexReader};
use crate::inverted_index::format::reader::InvertedIndexReader;
use crate::inverted_index::format::MIN_BLOB_SIZE;
/// Inverted index blob reader, implements [`InvertedIndexReader`]
@@ -52,35 +50,31 @@ impl<R> InvertedIndexBlobReader<R> {
#[async_trait]
impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIndexBlobReader<R> {
async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
self.source
.seek(SeekFrom::Start(0))
.await
.context(SeekSnafu)?;
self.source.read_to_end(dest).await.context(ReadSnafu)
}
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
self.source
.seek(SeekFrom::Start(offset))
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read(&mut buf).await.context(ReadSnafu)?;
Ok(buf)
}
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
let end = SeekFrom::End(0);
let blob_size = self.source.seek(end).await.context(SeekSnafu)?;
Self::validate_blob_size(blob_size)?;
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
footer_reader.metadata().await
}
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
self.source
.seek(SeekFrom::Start(offset))
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;
FstMap::new(buf).context(DecodeFstSnafu)
}
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
self.source
.seek(SeekFrom::Start(offset))
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;
Ok(BitVec::from_vec(buf))
footer_reader.metadata().await.map(Arc::new)
}
}

View File

@@ -148,6 +148,8 @@ impl TryFrom<Vec<(String, Vec<Predicate>)>> for PredicatesIndexApplier {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_base::bit_vec::prelude::*;
use greptime_proto::v1::index::InvertedIndexMeta;
@@ -161,7 +163,7 @@ mod tests {
s.to_owned()
}
fn mock_metas(tags: impl IntoIterator<Item = (&'static str, u32)>) -> InvertedIndexMetas {
fn mock_metas(tags: impl IntoIterator<Item = (&'static str, u32)>) -> Arc<InvertedIndexMetas> {
let mut metas = InvertedIndexMetas {
total_row_count: 8,
segment_row_count: 1,
@@ -175,7 +177,7 @@ mod tests {
};
metas.metas.insert(s(tag), meta);
}
metas
Arc::new(metas)
}
fn key_fst_applier(value: &'static str) -> Box<dyn FstApplier> {
@@ -300,11 +302,11 @@ mod tests {
async fn test_index_applier_with_empty_index() {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader.expect_metadata().returning(move || {
Ok(InvertedIndexMetas {
Ok(Arc::new(InvertedIndexMetas {
total_row_count: 0, // No rows
segment_row_count: 1,
..Default::default()
})
}))
});
let mut mock_fst_applier = MockFstApplier::new();