mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 05:12:54 +00:00
feat(inverted_index): inverted index cache (#4309)
* feat/inverted-index-cache: Update dependencies and add caching for inverted index reader - Updated `atomic` to 0.6.0 and `uuid` to 1.9.1 in `Cargo.lock`. - Added `moka` and `uuid` dependencies in `Cargo.toml`. - Introduced `seek_read` method in `InvertedIndexBlobReader` for common seek and read operations. - Added `cache.rs` module to implement caching for inverted index reader using `moka`. - Updated `async-compression` to 0.4.11 in `puffin/Cargo.toml`. * feat/inverted-index-cache: Refactor InvertedIndexReader and Add Index Cache Support - Refactored `InvertedIndexReader` to include `seek_read` method and default implementations for `fst` and `bitmap`. - Implemented `seek_read` in `InvertedIndexBlobReader` and `CachedInvertedIndexBlobReader`. - Introduced `InvertedIndexCache` in `CacheManager` and `SstIndexApplier`. - Updated `SstIndexApplierBuilder` to accept and utilize `InvertedIndexCache`. - Added `From<FileId> for Uuid` implementation. * feat/inverted-index-cache: Update Cargo.toml and refactor SstIndexApplier - Moved `uuid.workspace` entry in Cargo.toml for better organization. * feat/inverted-index-cache: Refactor InvertedIndexCache to use type alias for Arc - Replaced `Arc<InvertedIndexCache>` with `InvertedIndexCacheRef` type alias. * feat/inverted-index-cache: Add Prometheus metrics and caching improvements for inverted index - Introduced `prometheus` and `puffin` dependencies for metrics. * feat/inverted-index-cache: Refactor InvertedIndexReader and Cache handling - Simplified `InvertedIndexReader` trait by removing seek-related comments. * feat/inverted-index-cache: Add configurable cache sizes for inverted index metadata and content - Introduced `index_metadata_size` and `index_content_size` in `CacheManagerBuilder`. * feat/inverted-index-cache: Refactor and optimize inverted index caching - Removed `metrics.rs` and integrated cache metrics into `index.rs`. * feat/inverted-index-cache: Remove unused dependencies from Cargo.lock and Cargo.toml - Removed `moka`, `prometheus`, and `puffin` dependencies from both Cargo.lock and Cargo.toml. * feat/inverted-index-cache: Replace Uuid with FileId in CachedInvertedIndexBlobReader - Updated `file_id` type from `Uuid` to `FileId` in `CachedInvertedIndexBlobReader` and related methods. * feat/inverted-index-cache: Refactor cache configuration for inverted index - Moved `inverted_index_metadata_cache_size` and `inverted_index_cache_size` from `MitoConfig` to `InvertedIndexConfig`. * feat/inverted-index-cache: Remove unnecessary conversion of `file_id` in `SstIndexApplier` - Simplified the initialization of `CachedInvertedIndexBlobReader` by removing the redundant `into()` conversion for `file_id`.
This commit is contained in:
@@ -29,6 +29,7 @@ snafu.workspace = true
|
||||
tantivy = { version = "0.22", features = ["zstd-compression"] }
|
||||
tantivy-jieba = "0.11.0"
|
||||
tokio.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util.workspace = true
|
||||
|
||||
@@ -12,27 +12,41 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod blob;
|
||||
mod footer;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::error::{DecodeFstSnafu, Result};
|
||||
pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
|
||||
use crate::inverted_index::FstMap;
|
||||
|
||||
mod blob;
|
||||
mod footer;
|
||||
|
||||
/// InvertedIndexReader defines an asynchronous reader of inverted index data
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait InvertedIndexReader: Send {
|
||||
/// Retrieve metadata of all inverted indices stored within the blob.
|
||||
async fn metadata(&mut self) -> Result<InvertedIndexMetas>;
|
||||
/// Reads all data to dest.
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize>;
|
||||
|
||||
/// Retrieve the finite state transducer (FST) map from the given offset and size.
|
||||
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap>;
|
||||
/// Seeks to given offset and reads data with exact size as provided.
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
|
||||
/// Retrieve the bitmap from the given offset and size.
|
||||
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec>;
|
||||
/// Retrieves metadata of all inverted indices stored within the blob.
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>>;
|
||||
|
||||
/// Retrieves the finite state transducer (FST) map from the given offset and size.
|
||||
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
|
||||
let fst_data = self.seek_read(offset, size).await?;
|
||||
FstMap::new(fst_data).context(DecodeFstSnafu)
|
||||
}
|
||||
|
||||
/// Retrieves the bitmap from the given offset and size.
|
||||
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
|
||||
self.seek_read(offset, size).await.map(BitVec::from_vec)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,18 +13,16 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::io::SeekFrom;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_base::BitVec;
|
||||
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
|
||||
use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::inverted_index::error::{
|
||||
DecodeFstSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu,
|
||||
};
|
||||
use crate::inverted_index::error::{ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
|
||||
use crate::inverted_index::format::reader::{FstMap, InvertedIndexReader};
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::MIN_BLOB_SIZE;
|
||||
|
||||
/// Inverted index blob reader, implements [`InvertedIndexReader`]
|
||||
@@ -52,35 +50,31 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
|
||||
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(0))
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
self.source.read_to_end(dest).await.context(ReadSnafu)
|
||||
}
|
||||
|
||||
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(offset))
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
let mut buf = vec![0u8; size as usize];
|
||||
self.source.read(&mut buf).await.context(ReadSnafu)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
let end = SeekFrom::End(0);
|
||||
let blob_size = self.source.seek(end).await.context(SeekSnafu)?;
|
||||
Self::validate_blob_size(blob_size)?;
|
||||
|
||||
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
|
||||
footer_reader.metadata().await
|
||||
}
|
||||
|
||||
async fn fst(&mut self, offset: u64, size: u32) -> Result<FstMap> {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(offset))
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
let mut buf = vec![0u8; size as usize];
|
||||
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;
|
||||
|
||||
FstMap::new(buf).context(DecodeFstSnafu)
|
||||
}
|
||||
|
||||
async fn bitmap(&mut self, offset: u64, size: u32) -> Result<BitVec> {
|
||||
self.source
|
||||
.seek(SeekFrom::Start(offset))
|
||||
.await
|
||||
.context(SeekSnafu)?;
|
||||
let mut buf = vec![0u8; size as usize];
|
||||
self.source.read_exact(&mut buf).await.context(ReadSnafu)?;
|
||||
|
||||
Ok(BitVec::from_vec(buf))
|
||||
footer_reader.metadata().await.map(Arc::new)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -148,6 +148,8 @@ impl TryFrom<Vec<(String, Vec<Predicate>)>> for PredicatesIndexApplier {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::bit_vec::prelude::*;
|
||||
use greptime_proto::v1::index::InvertedIndexMeta;
|
||||
|
||||
@@ -161,7 +163,7 @@ mod tests {
|
||||
s.to_owned()
|
||||
}
|
||||
|
||||
fn mock_metas(tags: impl IntoIterator<Item = (&'static str, u32)>) -> InvertedIndexMetas {
|
||||
fn mock_metas(tags: impl IntoIterator<Item = (&'static str, u32)>) -> Arc<InvertedIndexMetas> {
|
||||
let mut metas = InvertedIndexMetas {
|
||||
total_row_count: 8,
|
||||
segment_row_count: 1,
|
||||
@@ -175,7 +177,7 @@ mod tests {
|
||||
};
|
||||
metas.metas.insert(s(tag), meta);
|
||||
}
|
||||
metas
|
||||
Arc::new(metas)
|
||||
}
|
||||
|
||||
fn key_fst_applier(value: &'static str) -> Box<dyn FstApplier> {
|
||||
@@ -300,11 +302,11 @@ mod tests {
|
||||
async fn test_index_applier_with_empty_index() {
|
||||
let mut mock_reader = MockInvertedIndexReader::new();
|
||||
mock_reader.expect_metadata().returning(move || {
|
||||
Ok(InvertedIndexMetas {
|
||||
Ok(Arc::new(InvertedIndexMetas {
|
||||
total_row_count: 0, // No rows
|
||||
segment_row_count: 1,
|
||||
..Default::default()
|
||||
})
|
||||
}))
|
||||
});
|
||||
|
||||
let mut mock_fst_applier = MockFstApplier::new();
|
||||
|
||||
Reference in New Issue
Block a user