mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-10 15:22:56 +00:00
fix: out of bound during bloom search (#5625)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -42,7 +42,16 @@ impl BloomFilterApplier {
|
||||
) -> Result<Vec<Range<usize>>> {
|
||||
let rows_per_segment = self.meta.rows_per_segment as usize;
|
||||
let start_seg = search_range.start / rows_per_segment;
|
||||
let end_seg = search_range.end.div_ceil(rows_per_segment);
|
||||
let mut end_seg = search_range.end.div_ceil(rows_per_segment);
|
||||
|
||||
if end_seg == self.meta.segment_loc_indices.len() + 1 {
|
||||
// In a previous version, there was a bug where if the last segment was all null,
|
||||
// this segment would not be written into the index. This caused the slice
|
||||
// `self.meta.segment_loc_indices[start_seg..end_seg]` to go out of bounds due to
|
||||
// the missing segment. Since the `search` function does not search for nulls,
|
||||
// we can simply ignore the last segment in this buggy scenario.
|
||||
end_seg -= 1;
|
||||
}
|
||||
|
||||
let locs = &self.meta.segment_loc_indices[start_seg..end_seg];
|
||||
|
||||
|
||||
@@ -64,6 +64,9 @@ pub struct BloomFilterCreator {
|
||||
/// Storage for finalized Bloom filters.
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage,
|
||||
|
||||
/// Row count that finalized so far.
|
||||
finalized_row_count: usize,
|
||||
|
||||
/// Global memory usage of the bloom filter creator.
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
}
|
||||
@@ -96,6 +99,7 @@ impl BloomFilterCreator {
|
||||
global_memory_usage,
|
||||
global_memory_usage_threshold,
|
||||
),
|
||||
finalized_row_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,6 +140,7 @@ impl BloomFilterCreator {
|
||||
|
||||
if self.accumulated_row_count % self.rows_per_segment == 0 {
|
||||
self.finalize_segment().await?;
|
||||
self.finalized_row_count = self.accumulated_row_count;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -161,6 +166,7 @@ impl BloomFilterCreator {
|
||||
|
||||
if self.accumulated_row_count % self.rows_per_segment == 0 {
|
||||
self.finalize_segment().await?;
|
||||
self.finalized_row_count = self.accumulated_row_count;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -168,7 +174,7 @@ impl BloomFilterCreator {
|
||||
|
||||
/// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
|
||||
pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
|
||||
if !self.cur_seg_distinct_elems.is_empty() {
|
||||
if self.accumulated_row_count > self.finalized_row_count {
|
||||
self.finalize_segment().await?;
|
||||
}
|
||||
|
||||
@@ -406,4 +412,35 @@ mod tests {
|
||||
assert!(bf.contains(&b"f"));
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_final_seg_all_null() {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
);
|
||||
|
||||
creator
|
||||
.push_n_row_elems(4, vec![b"a".to_vec(), b"b".to_vec()])
|
||||
.await
|
||||
.unwrap();
|
||||
creator.push_row_elems(Vec::new()).await.unwrap();
|
||||
|
||||
creator.finish(&mut writer).await.unwrap();
|
||||
|
||||
let bytes = writer.into_inner();
|
||||
let total_size = bytes.len();
|
||||
let meta_size_offset = total_size - 4;
|
||||
let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap());
|
||||
|
||||
let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4];
|
||||
let meta = BloomFilterMeta::decode(meta_bytes).unwrap();
|
||||
|
||||
assert_eq!(meta.rows_per_segment, 2);
|
||||
assert_eq!(meta.segment_count, 3);
|
||||
assert_eq!(meta.row_count, 5);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user