mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-27 18:30:38 +00:00
feat: collect read metrics for inverted index
Signed-off-by: evenyag <realevenyag@gmail.com>
This commit is contained in:
@@ -110,7 +110,10 @@ impl BloomFilterApplier {
|
||||
.map(|i| self.meta.bloom_filter_locs[i as usize])
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let bloom_filters = self.reader.bloom_filter_vec(&bloom_filter_locs, metrics).await?;
|
||||
let bloom_filters = self
|
||||
.reader
|
||||
.bloom_filter_vec(&bloom_filter_locs, metrics)
|
||||
.await?;
|
||||
|
||||
Ok((segment_locations, bloom_filters))
|
||||
}
|
||||
@@ -424,7 +427,10 @@ mod tests {
|
||||
];
|
||||
|
||||
for (predicates, search_range, expected) in cases {
|
||||
let result = applier.search(&predicates, &[search_range], None).await.unwrap();
|
||||
let result = applier
|
||||
.search(&predicates, &[search_range], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
result, expected,
|
||||
"Expected {:?}, got {:?}",
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
@@ -29,19 +30,44 @@ pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader;
|
||||
mod blob;
|
||||
mod footer;
|
||||
|
||||
/// Metrics for inverted index read operations.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct InvertedIndexReadMetrics {
|
||||
/// Total byte size to read.
|
||||
pub total_bytes: u64,
|
||||
/// Total number of ranges to read.
|
||||
pub total_ranges: usize,
|
||||
/// Elapsed time of the read_vec operation.
|
||||
pub elapsed: Duration,
|
||||
}
|
||||
|
||||
/// InvertedIndexReader defines an asynchronous reader of inverted index data
|
||||
#[mockall::automock]
|
||||
#[async_trait]
|
||||
pub trait InvertedIndexReader: Send + Sync {
|
||||
/// Seeks to given offset and reads data with exact size as provided.
|
||||
async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>>;
|
||||
async fn range_read<'a>(
|
||||
&self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<u8>>;
|
||||
|
||||
/// Reads the bytes in the given ranges.
|
||||
async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
async fn read_vec<'a>(
|
||||
&self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<Bytes>> {
|
||||
let mut metrics = metrics;
|
||||
let mut result = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
let data = self
|
||||
.range_read(range.start, (range.end - range.start) as u32)
|
||||
.range_read(
|
||||
range.start,
|
||||
(range.end - range.start) as u32,
|
||||
metrics.as_deref_mut(),
|
||||
)
|
||||
.await?;
|
||||
result.push(Bytes::from(data));
|
||||
}
|
||||
@@ -52,14 +78,24 @@ pub trait InvertedIndexReader: Send + Sync {
|
||||
async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>>;
|
||||
|
||||
/// Retrieves the finite state transducer (FST) map from the given offset and size.
|
||||
async fn fst(&self, offset: u64, size: u32) -> Result<FstMap> {
|
||||
let fst_data = self.range_read(offset, size).await?;
|
||||
async fn fst<'a>(
|
||||
&self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<FstMap> {
|
||||
let fst_data = self.range_read(offset, size, metrics).await?;
|
||||
FstMap::new(fst_data).context(DecodeFstSnafu)
|
||||
}
|
||||
|
||||
/// Retrieves the multiple finite state transducer (FST) maps from the given ranges.
|
||||
async fn fst_vec(&mut self, ranges: &[Range<u64>]) -> Result<Vec<FstMap>> {
|
||||
self.read_vec(ranges)
|
||||
async fn fst_vec<'a>(
|
||||
&mut self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<FstMap>> {
|
||||
let mut metrics = metrics;
|
||||
self.read_vec(ranges, metrics.as_deref_mut())
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|bytes| FstMap::new(bytes.to_vec()).context(DecodeFstSnafu))
|
||||
@@ -67,19 +103,29 @@ pub trait InvertedIndexReader: Send + Sync {
|
||||
}
|
||||
|
||||
/// Retrieves the bitmap from the given offset and size.
|
||||
async fn bitmap(&self, offset: u64, size: u32, bitmap_type: BitmapType) -> Result<Bitmap> {
|
||||
self.range_read(offset, size).await.and_then(|bytes| {
|
||||
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
|
||||
})
|
||||
async fn bitmap<'a>(
|
||||
&self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
bitmap_type: BitmapType,
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Bitmap> {
|
||||
self.range_read(offset, size, metrics)
|
||||
.await
|
||||
.and_then(|bytes| {
|
||||
Bitmap::deserialize_from(&bytes, bitmap_type).context(DecodeBitmapSnafu)
|
||||
})
|
||||
}
|
||||
|
||||
/// Retrieves the multiple bitmaps from the given ranges.
|
||||
async fn bitmap_deque(
|
||||
async fn bitmap_deque<'a>(
|
||||
&mut self,
|
||||
ranges: &[(Range<u64>, BitmapType)],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<VecDeque<Bitmap>> {
|
||||
let mut metrics = metrics;
|
||||
let (ranges, types): (Vec<_>, Vec<_>) = ranges.iter().cloned().unzip();
|
||||
let bytes = self.read_vec(&ranges).await?;
|
||||
let bytes = self.read_vec(&ranges, metrics.as_deref_mut()).await?;
|
||||
bytes
|
||||
.into_iter()
|
||||
.zip(types)
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
@@ -23,10 +24,10 @@ use snafu::{ResultExt, ensure};
|
||||
|
||||
use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
|
||||
use crate::inverted_index::format::MIN_BLOB_SIZE;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::reader::footer::{
|
||||
DEFAULT_PREFETCH_SIZE, InvertedIndexFooterReader,
|
||||
};
|
||||
use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
|
||||
|
||||
/// Inverted index blob reader, implements [`InvertedIndexReader`]
|
||||
pub struct InvertedIndexBlobReader<R> {
|
||||
@@ -53,17 +54,45 @@ impl<R> InvertedIndexBlobReader<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader + Sync> InvertedIndexReader for InvertedIndexBlobReader<R> {
|
||||
async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
async fn range_read<'a>(
|
||||
&self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<u8>> {
|
||||
let start = metrics.as_ref().map(|_| Instant::now());
|
||||
|
||||
let buf = self
|
||||
.source
|
||||
.read(offset..offset + size as u64)
|
||||
.await
|
||||
.context(CommonIoSnafu)?;
|
||||
|
||||
if let Some(m) = metrics {
|
||||
m.total_bytes += size as u64;
|
||||
m.total_ranges += 1;
|
||||
m.elapsed += start.unwrap().elapsed();
|
||||
}
|
||||
|
||||
Ok(buf.into())
|
||||
}
|
||||
|
||||
async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
self.source.read_vec(ranges).await.context(CommonIoSnafu)
|
||||
async fn read_vec<'a>(
|
||||
&self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<Bytes>> {
|
||||
let start = metrics.as_ref().map(|_| Instant::now());
|
||||
|
||||
let result = self.source.read_vec(ranges).await.context(CommonIoSnafu)?;
|
||||
|
||||
if let Some(m) = metrics {
|
||||
m.total_bytes += ranges.iter().map(|r| r.end - r.start).sum::<u64>();
|
||||
m.total_ranges += ranges.len();
|
||||
m.elapsed += start.unwrap().elapsed();
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn metadata(&self) -> Result<Arc<InvertedIndexMetas>> {
|
||||
@@ -207,6 +236,7 @@ mod tests {
|
||||
.fst(
|
||||
meta.base_offset + meta.relative_fst_offset as u64,
|
||||
meta.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -219,6 +249,7 @@ mod tests {
|
||||
.fst(
|
||||
meta.base_offset + meta.relative_fst_offset as u64,
|
||||
meta.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -236,12 +267,12 @@ mod tests {
|
||||
let meta = metas.metas.get("tag0").unwrap();
|
||||
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring)
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
@@ -250,12 +281,12 @@ mod tests {
|
||||
let meta = metas.metas.get("tag1").unwrap();
|
||||
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring)
|
||||
.bitmap(meta.base_offset, 26, BitmapType::Roaring, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
let bitmap = blob_reader
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring)
|
||||
.bitmap(meta.base_offset + 26, 26, BitmapType::Roaring, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(bitmap, mock_bitmap());
|
||||
|
||||
@@ -198,13 +198,19 @@ mod tests {
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -213,7 +219,12 @@ mod tests {
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -222,7 +233,12 @@ mod tests {
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -241,13 +257,19 @@ mod tests {
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -256,7 +278,12 @@ mod tests {
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -265,7 +292,12 @@ mod tests {
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -16,7 +16,7 @@ use greptime_proto::v1::index::{BitmapType, InvertedIndexMeta};
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
|
||||
|
||||
/// `ParallelFstValuesMapper` enables parallel mapping of multiple FST value groups to their
|
||||
/// corresponding bitmaps within an inverted index.
|
||||
@@ -33,10 +33,12 @@ impl<'a> ParallelFstValuesMapper<'a> {
|
||||
Self { reader }
|
||||
}
|
||||
|
||||
pub async fn map_values_vec(
|
||||
pub async fn map_values_vec<'b>(
|
||||
&mut self,
|
||||
value_and_meta_vec: &[(Vec<u64>, &'a InvertedIndexMeta)],
|
||||
value_and_meta_vec: &[(Vec<u64>, &'b InvertedIndexMeta)],
|
||||
metrics: Option<&mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<Bitmap>> {
|
||||
let mut metrics = metrics;
|
||||
let groups = value_and_meta_vec
|
||||
.iter()
|
||||
.map(|(values, _)| values.len())
|
||||
@@ -64,7 +66,10 @@ impl<'a> ParallelFstValuesMapper<'a> {
|
||||
}
|
||||
|
||||
common_telemetry::debug!("fetch ranges: {:?}", fetch_ranges);
|
||||
let mut bitmaps = self.reader.bitmap_deque(&fetch_ranges).await?;
|
||||
let mut bitmaps = self
|
||||
.reader
|
||||
.bitmap_deque(&fetch_ranges, metrics.as_deref_mut())
|
||||
.await?;
|
||||
let mut output = Vec::with_capacity(groups.len());
|
||||
|
||||
for counter in groups {
|
||||
@@ -95,23 +100,25 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_map_values_vec() {
|
||||
let mut mock_reader = MockInvertedIndexReader::new();
|
||||
mock_reader.expect_bitmap_deque().returning(|ranges| {
|
||||
let mut output = VecDeque::new();
|
||||
for (range, bitmap_type) in ranges {
|
||||
let offset = range.start;
|
||||
let size = range.end - range.start;
|
||||
match (offset, size, bitmap_type) {
|
||||
(1, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
|
||||
mock_reader
|
||||
.expect_bitmap_deque()
|
||||
.returning(|ranges, _metrics| {
|
||||
let mut output = VecDeque::new();
|
||||
for (range, bitmap_type) in ranges {
|
||||
let offset = range.start;
|
||||
let size = range.end - range.start;
|
||||
match (offset, size, bitmap_type) {
|
||||
(1, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
|
||||
}
|
||||
(2, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b01010101], *bitmap_type))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
(2, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b01010101], *bitmap_type))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
});
|
||||
Ok(output)
|
||||
});
|
||||
|
||||
let meta = InvertedIndexMeta {
|
||||
bitmap_type: BitmapType::Roaring.into(),
|
||||
@@ -120,13 +127,13 @@ mod tests {
|
||||
let mut values_mapper = ParallelFstValuesMapper::new(&mut mock_reader);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![], &meta)])
|
||||
.map_values_vec(&[(vec![], &meta)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result[0].count_ones(), 0);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![value(1, 1)], &meta)])
|
||||
.map_values_vec(&[(vec![value(1, 1)], &meta)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -135,7 +142,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![value(2, 1)], &meta)])
|
||||
.map_values_vec(&[(vec![value(2, 1)], &meta)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -144,7 +151,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![value(1, 1), value(2, 1)], &meta)])
|
||||
.map_values_vec(&[(vec![value(1, 1), value(2, 1)], &meta)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -153,7 +160,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![value(2, 1), value(1, 1)], &meta)])
|
||||
.map_values_vec(&[(vec![value(2, 1), value(1, 1)], &meta)], None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -162,7 +169,10 @@ mod tests {
|
||||
);
|
||||
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[(vec![value(2, 1)], &meta), (vec![value(1, 1)], &meta)])
|
||||
.map_values_vec(
|
||||
&[(vec![value(2, 1)], &meta), (vec![value(1, 1)], &meta)],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -174,10 +184,13 @@ mod tests {
|
||||
Bitmap::from_lsb0_bytes(&[0b10101010], BitmapType::Roaring)
|
||||
);
|
||||
let result = values_mapper
|
||||
.map_values_vec(&[
|
||||
(vec![value(2, 1), value(1, 1)], &meta),
|
||||
(vec![value(1, 1)], &meta),
|
||||
])
|
||||
.map_values_vec(
|
||||
&[
|
||||
(vec![value(2, 1), value(1, 1)], &meta),
|
||||
(vec![value(1, 1)], &meta),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
|
||||
@@ -19,7 +19,7 @@ pub use predicates_apply::PredicatesIndexApplier;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::Result;
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
|
||||
|
||||
/// The output of an apply operation.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
@@ -44,10 +44,11 @@ pub trait IndexApplier: Send + Sync {
|
||||
/// Applies the predefined predicates to the data read by the given index reader, returning
|
||||
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
|
||||
#[allow(unused_parens)]
|
||||
async fn apply<'a>(
|
||||
async fn apply<'a, 'b>(
|
||||
&self,
|
||||
context: SearchContext,
|
||||
reader: &mut (dyn InvertedIndexReader + 'a),
|
||||
metrics: Option<&'b mut InvertedIndexReadMetrics>,
|
||||
) -> Result<ApplyOutput>;
|
||||
|
||||
/// Returns the memory usage of the applier.
|
||||
|
||||
@@ -19,7 +19,7 @@ use greptime_proto::v1::index::InvertedIndexMetas;
|
||||
|
||||
use crate::bitmap::Bitmap;
|
||||
use crate::inverted_index::error::{IndexNotFoundSnafu, Result};
|
||||
use crate::inverted_index::format::reader::InvertedIndexReader;
|
||||
use crate::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
|
||||
use crate::inverted_index::search::fst_apply::{
|
||||
FstApplier, IntersectionFstApplier, KeysFstApplier,
|
||||
};
|
||||
@@ -43,11 +43,13 @@ pub struct PredicatesIndexApplier {
|
||||
impl IndexApplier for PredicatesIndexApplier {
|
||||
/// Applies all `FstApplier`s to the data in the inverted index reader, intersecting the individual
|
||||
/// bitmaps obtained for each index to result in a final set of indices.
|
||||
async fn apply<'a>(
|
||||
async fn apply<'a, 'b>(
|
||||
&self,
|
||||
context: SearchContext,
|
||||
reader: &mut (dyn InvertedIndexReader + 'a),
|
||||
metrics: Option<&'b mut InvertedIndexReadMetrics>,
|
||||
) -> Result<ApplyOutput> {
|
||||
let mut metrics = metrics;
|
||||
let metadata = reader.metadata().await?;
|
||||
let mut output = ApplyOutput {
|
||||
matched_segment_ids: Bitmap::new_bitvec(),
|
||||
@@ -84,7 +86,7 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
return Ok(output);
|
||||
}
|
||||
|
||||
let fsts = reader.fst_vec(&fst_ranges).await?;
|
||||
let fsts = reader.fst_vec(&fst_ranges, metrics.as_deref_mut()).await?;
|
||||
let value_and_meta_vec = fsts
|
||||
.into_iter()
|
||||
.zip(appliers)
|
||||
@@ -92,7 +94,9 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut mapper = ParallelFstValuesMapper::new(reader);
|
||||
let mut bm_vec = mapper.map_values_vec(&value_and_meta_vec).await?;
|
||||
let mut bm_vec = mapper
|
||||
.map_values_vec(&value_and_meta_vec, metrics.as_deref_mut())
|
||||
.await?;
|
||||
|
||||
let mut bitmap = bm_vec.pop().unwrap(); // SAFETY: `fst_ranges` is not empty
|
||||
for bm in bm_vec {
|
||||
@@ -222,25 +226,27 @@ mod tests {
|
||||
mock_reader
|
||||
.expect_metadata()
|
||||
.returning(|| Ok(mock_metas([("tag-0", 0)])));
|
||||
mock_reader.expect_fst_vec().returning(|_ranges| {
|
||||
mock_reader.expect_fst_vec().returning(|_ranges, _metrics| {
|
||||
Ok(vec![
|
||||
FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap(),
|
||||
])
|
||||
});
|
||||
|
||||
mock_reader.expect_bitmap_deque().returning(|arg| {
|
||||
assert_eq!(arg.len(), 1);
|
||||
let range = &arg[0].0;
|
||||
let bitmap_type = arg[0].1;
|
||||
assert_eq!(*range, 2..3);
|
||||
assert_eq!(bitmap_type, BitmapType::Roaring);
|
||||
Ok(VecDeque::from([Bitmap::from_lsb0_bytes(
|
||||
&[0b10101010],
|
||||
bitmap_type,
|
||||
)]))
|
||||
});
|
||||
mock_reader
|
||||
.expect_bitmap_deque()
|
||||
.returning(|arg, _metrics| {
|
||||
assert_eq!(arg.len(), 1);
|
||||
let range = &arg[0].0;
|
||||
let bitmap_type = arg[0].1;
|
||||
assert_eq!(*range, 2..3);
|
||||
assert_eq!(bitmap_type, BitmapType::Roaring);
|
||||
Ok(VecDeque::from([Bitmap::from_lsb0_bytes(
|
||||
&[0b10101010],
|
||||
bitmap_type,
|
||||
)]))
|
||||
});
|
||||
let output = applier
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.apply(SearchContext::default(), &mut mock_reader, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -253,13 +259,13 @@ mod tests {
|
||||
mock_reader
|
||||
.expect_metadata()
|
||||
.returning(|| Ok(mock_metas([("tag-0", 0)])));
|
||||
mock_reader.expect_fst_vec().returning(|_range| {
|
||||
mock_reader.expect_fst_vec().returning(|_range, _metrics| {
|
||||
Ok(vec![
|
||||
FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap(),
|
||||
])
|
||||
});
|
||||
let output = applier
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.apply(SearchContext::default(), &mut mock_reader, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(output.matched_segment_ids.count_ones(), 0);
|
||||
@@ -280,7 +286,7 @@ mod tests {
|
||||
mock_reader
|
||||
.expect_metadata()
|
||||
.returning(|| Ok(mock_metas([("tag-0", 0), ("tag-1", 1)])));
|
||||
mock_reader.expect_fst_vec().returning(|ranges| {
|
||||
mock_reader.expect_fst_vec().returning(|ranges, _metrics| {
|
||||
let mut output = vec![];
|
||||
for range in ranges {
|
||||
match range.start {
|
||||
@@ -293,27 +299,29 @@ mod tests {
|
||||
}
|
||||
Ok(output)
|
||||
});
|
||||
mock_reader.expect_bitmap_deque().returning(|ranges| {
|
||||
let mut output = VecDeque::new();
|
||||
for (range, bitmap_type) in ranges {
|
||||
let offset = range.start;
|
||||
let size = range.end - range.start;
|
||||
match (offset, size, bitmap_type) {
|
||||
(1, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
|
||||
mock_reader
|
||||
.expect_bitmap_deque()
|
||||
.returning(|ranges, _metrics| {
|
||||
let mut output = VecDeque::new();
|
||||
for (range, bitmap_type) in ranges {
|
||||
let offset = range.start;
|
||||
let size = range.end - range.start;
|
||||
match (offset, size, bitmap_type) {
|
||||
(1, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b10101010], *bitmap_type))
|
||||
}
|
||||
(2, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b11011011], *bitmap_type))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
(2, 1, BitmapType::Roaring) => {
|
||||
output.push_back(Bitmap::from_lsb0_bytes(&[0b11011011], *bitmap_type))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
});
|
||||
Ok(output)
|
||||
});
|
||||
|
||||
let output = applier
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.apply(SearchContext::default(), &mut mock_reader, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -334,7 +342,7 @@ mod tests {
|
||||
.returning(|| Ok(mock_metas([("tag-0", 0)])));
|
||||
|
||||
let output = applier
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.apply(SearchContext::default(), &mut mock_reader, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(output.matched_segment_ids, Bitmap::full_bitvec(8)); // full range to scan
|
||||
@@ -359,7 +367,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let output = applier
|
||||
.apply(SearchContext::default(), &mut mock_reader)
|
||||
.apply(SearchContext::default(), &mut mock_reader, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(output.matched_segment_ids.is_empty());
|
||||
@@ -385,6 +393,7 @@ mod tests {
|
||||
index_not_found_strategy: IndexNotFoundStrategy::ThrowError,
|
||||
},
|
||||
&mut mock_reader,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
assert!(matches!(result, Err(Error::IndexNotFound { .. })));
|
||||
@@ -395,6 +404,7 @@ mod tests {
|
||||
index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty,
|
||||
},
|
||||
&mut mock_reader,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -406,6 +416,7 @@ mod tests {
|
||||
index_not_found_strategy: IndexNotFoundStrategy::Ignore,
|
||||
},
|
||||
&mut mock_reader,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
91
src/mito2/src/cache/index/inverted_index.rs
vendored
91
src/mito2/src/cache/index/inverted_index.rs
vendored
@@ -14,12 +14,13 @@
|
||||
|
||||
use core::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use api::v1::index::InvertedIndexMetas;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use index::inverted_index::error::Result;
|
||||
use index::inverted_index::format::reader::InvertedIndexReader;
|
||||
use index::inverted_index::format::reader::{InvertedIndexReadMetrics, InvertedIndexReader};
|
||||
use prost::Message;
|
||||
use store_api::storage::FileId;
|
||||
|
||||
@@ -83,20 +84,42 @@ impl<R> CachedInvertedIndexBlobReader<R> {
|
||||
|
||||
#[async_trait]
|
||||
impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobReader<R> {
|
||||
async fn range_read(&self, offset: u64, size: u32) -> Result<Vec<u8>> {
|
||||
async fn range_read<'a>(
|
||||
&self,
|
||||
offset: u64,
|
||||
size: u32,
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<u8>> {
|
||||
let start = metrics.as_ref().map(|_| Instant::now());
|
||||
|
||||
let inner = &self.inner;
|
||||
self.cache
|
||||
let result = self
|
||||
.cache
|
||||
.get_or_load(
|
||||
self.file_id,
|
||||
self.blob_size,
|
||||
offset,
|
||||
size,
|
||||
move |ranges| async move { inner.read_vec(&ranges).await },
|
||||
move |ranges| async move { inner.read_vec(&ranges, None).await },
|
||||
)
|
||||
.await
|
||||
.await?;
|
||||
|
||||
if let Some(m) = metrics {
|
||||
m.total_bytes += size as u64;
|
||||
m.total_ranges += 1;
|
||||
m.elapsed += start.unwrap().elapsed();
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn read_vec(&self, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
|
||||
async fn read_vec<'a>(
|
||||
&self,
|
||||
ranges: &[Range<u64>],
|
||||
metrics: Option<&'a mut InvertedIndexReadMetrics>,
|
||||
) -> Result<Vec<Bytes>> {
|
||||
let start = metrics.as_ref().map(|_| Instant::now());
|
||||
|
||||
let mut pages = Vec::with_capacity(ranges.len());
|
||||
for range in ranges {
|
||||
let inner = &self.inner;
|
||||
@@ -107,13 +130,19 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
self.blob_size,
|
||||
range.start,
|
||||
(range.end - range.start) as u32,
|
||||
move |ranges| async move { inner.read_vec(&ranges).await },
|
||||
move |ranges| async move { inner.read_vec(&ranges, None).await },
|
||||
)
|
||||
.await?;
|
||||
|
||||
pages.push(Bytes::from(page));
|
||||
}
|
||||
|
||||
if let Some(m) = metrics {
|
||||
m.total_bytes += ranges.iter().map(|r| r.end - r.start).sum::<u64>();
|
||||
m.total_ranges += ranges.len();
|
||||
m.elapsed += start.unwrap().elapsed();
|
||||
}
|
||||
|
||||
Ok(pages)
|
||||
}
|
||||
|
||||
@@ -292,13 +321,19 @@ mod test {
|
||||
.fst(
|
||||
tag0.base_offset + tag0.relative_fst_offset as u64,
|
||||
tag0.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst0.len(), 3);
|
||||
let [offset, size] = unpack(fst0.get(b"a").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -307,7 +342,12 @@ mod test {
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"b").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -316,7 +356,12 @@ mod test {
|
||||
);
|
||||
let [offset, size] = unpack(fst0.get(b"c").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag0.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag0.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -335,13 +380,19 @@ mod test {
|
||||
.fst(
|
||||
tag1.base_offset + tag1.relative_fst_offset as u64,
|
||||
tag1.fst_size,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(fst1.len(), 3);
|
||||
let [offset, size] = unpack(fst1.get(b"x").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -350,7 +401,12 @@ mod test {
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"y").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -359,7 +415,12 @@ mod test {
|
||||
);
|
||||
let [offset, size] = unpack(fst1.get(b"z").unwrap());
|
||||
let bitmap = cached_reader
|
||||
.bitmap(tag1.base_offset + offset as u64, size, BitmapType::Roaring)
|
||||
.bitmap(
|
||||
tag1.base_offset + offset as u64,
|
||||
size,
|
||||
BitmapType::Roaring,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -372,7 +433,7 @@ mod test {
|
||||
for _ in 0..FUZZ_REPEAT_TIMES {
|
||||
let offset = rng.random_range(0..file_size);
|
||||
let size = rng.random_range(0..file_size as u32 - offset as u32);
|
||||
let expected = cached_reader.range_read(offset, size).await.unwrap();
|
||||
let expected = cached_reader.range_read(offset, size, None).await.unwrap();
|
||||
let inner = &cached_reader.inner;
|
||||
let read = cached_reader
|
||||
.cache
|
||||
@@ -381,7 +442,7 @@ mod test {
|
||||
file_size,
|
||||
offset,
|
||||
size,
|
||||
|ranges| async move { inner.read_vec(&ranges).await },
|
||||
|ranges| async move { inner.read_vec(&ranges, None).await },
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -154,7 +154,13 @@ impl FulltextIndexApplier {
|
||||
}
|
||||
|
||||
let Some(result) = self
|
||||
.apply_fine_one_column(file_size_hint, file_id, *column_id, request, metrics.as_deref_mut())
|
||||
.apply_fine_one_column(
|
||||
file_size_hint,
|
||||
file_id,
|
||||
*column_id,
|
||||
request,
|
||||
metrics.as_deref_mut(),
|
||||
)
|
||||
.await?
|
||||
else {
|
||||
continue;
|
||||
@@ -374,6 +380,7 @@ impl FulltextIndexApplier {
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO(yingwen): Update reader metrics.
|
||||
*row_group_output = applier
|
||||
.search(&predicates, row_group_output, None)
|
||||
.await
|
||||
|
||||
@@ -723,9 +723,10 @@ mod tests {
|
||||
let backend = backend.clone();
|
||||
async move {
|
||||
match backend {
|
||||
FulltextBackend::Tantivy => {
|
||||
applier.apply_fine(region_file_id, None, None).await.unwrap()
|
||||
}
|
||||
FulltextBackend::Tantivy => applier
|
||||
.apply_fine(region_file_id, None, None)
|
||||
.await
|
||||
.unwrap(),
|
||||
FulltextBackend::Bloom => {
|
||||
let coarse_mask = coarse_mask.unwrap_or_default();
|
||||
let row_groups = (0..coarse_mask.len()).map(|i| (1, coarse_mask[i]));
|
||||
|
||||
@@ -177,13 +177,13 @@ impl InvertedIndexApplier {
|
||||
index_cache.clone(),
|
||||
);
|
||||
self.index_applier
|
||||
.apply(context, &mut index_reader)
|
||||
.apply(context, &mut index_reader, None)
|
||||
.await
|
||||
.context(ApplyInvertedIndexSnafu)
|
||||
} else {
|
||||
let mut index_reader = InvertedIndexBlobReader::new(blob);
|
||||
self.index_applier
|
||||
.apply(context, &mut index_reader)
|
||||
.apply(context, &mut index_reader, None)
|
||||
.await
|
||||
.context(ApplyInvertedIndexSnafu)
|
||||
};
|
||||
@@ -314,7 +314,7 @@ mod tests {
|
||||
|
||||
let mut mock_index_applier = MockIndexApplier::new();
|
||||
mock_index_applier.expect_memory_usage().returning(|| 100);
|
||||
mock_index_applier.expect_apply().returning(|_, _| {
|
||||
mock_index_applier.expect_apply().returning(|_, _, _| {
|
||||
Ok(ApplyOutput {
|
||||
matched_segment_ids: Bitmap::new_bitvec(),
|
||||
total_row_count: 100,
|
||||
|
||||
Reference in New Issue
Block a user