1pub(crate) mod cache_size;
18
19pub(crate) mod file_cache;
20pub(crate) mod index;
21pub(crate) mod manifest_cache;
22#[cfg(test)]
23pub(crate) mod test_util;
24pub(crate) mod write_cache;
25
26use std::collections::{BTreeMap, HashMap};
27use std::mem;
28use std::ops::Range;
29use std::sync::{Arc, RwLock};
30
31use bytes::Bytes;
32use common_base::readable_size::ReadableSize;
33use common_telemetry::warn;
34use datatypes::arrow::buffer::BooleanBuffer;
35use datatypes::arrow::record_batch::RecordBatch;
36use datatypes::value::Value;
37use datatypes::vectors::VectorRef;
38use index::bloom_filter_index::{BloomFilterIndexCache, BloomFilterIndexCacheRef};
39use index::result_cache::IndexResultCache;
40use moka::notification::RemovalCause;
41use moka::sync::Cache;
42use object_store::ObjectStore;
43use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
44use parquet::file::metadata::{FileMetaData, PageIndexPolicy, ParquetMetaData};
45use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
46use smallvec::SmallVec;
47use snafu::{OptionExt, ResultExt};
48use store_api::metadata::RegionMetadataRef;
49use store_api::storage::{ConcreteDataType, FileId, RegionId, TimeSeriesRowSelector};
50
51use crate::cache::cache_size::parquet_meta_size;
52use crate::cache::file_cache::{FileType, IndexKey};
53use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCacheRef};
54#[cfg(feature = "vector_index")]
55use crate::cache::index::vector_index::{VectorIndexCache, VectorIndexCacheRef};
56use crate::cache::write_cache::WriteCacheRef;
57use crate::error::{InvalidMetadataSnafu, InvalidParquetSnafu, Result, UnexpectedSnafu};
58use crate::memtable::record_batch_estimated_size;
59use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
60use crate::read::Batch;
61use crate::read::range_cache::{RangeScanCacheKey, RangeScanCacheValue};
62use crate::sst::file::{RegionFileId, RegionIndexId};
63use crate::sst::parquet::PARQUET_METADATA_KEY;
64use crate::sst::parquet::read_columns::ParquetReadColumns;
65use crate::sst::parquet::reader::MetadataCacheMetrics;
66
67const SST_META_TYPE: &str = "sst_meta";
69const VECTOR_TYPE: &str = "vector";
71const PAGE_TYPE: &str = "page";
73const FILE_TYPE: &str = "file";
75const INDEX_TYPE: &str = "index";
77const SELECTOR_RESULT_TYPE: &str = "selector_result";
79const RANGE_RESULT_TYPE: &str = "range_result";
81const PREFILTER_RESULT_TYPE: &str = "prefilter_result";
83const RANGE_RESULT_CONCAT_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(512);
84const RANGE_RESULT_CONCAT_MEMORY_PERMIT: ReadableSize = ReadableSize::kb(1);
85
86#[derive(Debug)]
87pub(crate) struct RangeResultMemoryLimiter {
88 semaphore: Arc<tokio::sync::Semaphore>,
89 permit_bytes: usize,
90 total_permits: usize,
91}
92
93impl Default for RangeResultMemoryLimiter {
94 fn default() -> Self {
95 Self::new(
96 RANGE_RESULT_CONCAT_MEMORY_LIMIT.as_bytes() as usize,
97 RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize,
98 )
99 }
100}
101
102impl RangeResultMemoryLimiter {
103 pub(crate) fn new(limit_bytes: usize, permit_bytes: usize) -> Self {
104 let permit_bytes = permit_bytes.max(1);
105 let total_permits = limit_bytes
106 .div_ceil(permit_bytes)
107 .clamp(1, tokio::sync::Semaphore::MAX_PERMITS);
108 Self {
109 semaphore: Arc::new(tokio::sync::Semaphore::new(total_permits)),
110 permit_bytes,
111 total_permits,
112 }
113 }
114
115 #[cfg(test)]
116 pub(crate) fn permit_bytes(&self) -> usize {
117 self.permit_bytes
118 }
119
120 #[cfg(test)]
121 pub(crate) fn available_permits(&self) -> usize {
122 self.semaphore.available_permits()
123 }
124
125 pub(crate) async fn acquire(&self, bytes: usize) -> Result<tokio::sync::SemaphorePermit<'_>> {
126 let permits = bytes.div_ceil(self.permit_bytes).max(1);
127 if permits > self.total_permits {
128 return UnexpectedSnafu {
129 reason: format!(
130 "range result memory request of {bytes} bytes exceeds limiter capacity of {} bytes",
131 self.total_permits.saturating_mul(self.permit_bytes)
132 ),
133 }
134 .fail();
135 }
136 self.semaphore
137 .acquire_many(permits as u32)
138 .await
139 .map_err(|_| {
140 UnexpectedSnafu {
141 reason: "range result memory limiter is unexpectedly closed",
142 }
143 .build()
144 })
145 }
146}
147
148#[derive(Debug)]
153pub(crate) struct CachedSstMeta {
154 parquet_metadata: Arc<ParquetMetaData>,
155 region_metadata: RegionMetadataRef,
156 region_metadata_weight: usize,
157 page_index_policy: PageIndexPolicy,
158}
159
160impl CachedSstMeta {
161 #[cfg(test)]
162 pub(crate) fn try_new(file_path: &str, parquet_metadata: ParquetMetaData) -> Result<Self> {
163 let page_index_policy = infer_loaded_page_index_policy(&parquet_metadata);
164 Self::try_new_with_page_index_policy(file_path, parquet_metadata, None, page_index_policy)
165 }
166
167 pub(crate) fn try_new_with_region_metadata(
168 file_path: &str,
169 parquet_metadata: ParquetMetaData,
170 region_metadata: Option<RegionMetadataRef>,
171 ) -> Result<Self> {
172 let page_index_policy = infer_loaded_page_index_policy(&parquet_metadata);
173 Self::try_new_with_page_index_policy(
174 file_path,
175 parquet_metadata,
176 region_metadata,
177 page_index_policy,
178 )
179 }
180
181 pub(crate) fn try_new_with_page_index_policy(
182 file_path: &str,
183 parquet_metadata: ParquetMetaData,
184 region_metadata: Option<RegionMetadataRef>,
185 page_index_policy: PageIndexPolicy,
186 ) -> Result<Self> {
187 let file_metadata = parquet_metadata.file_metadata();
188 let key_values = file_metadata
189 .key_value_metadata()
190 .context(InvalidParquetSnafu {
191 file: file_path,
192 reason: "missing key value meta",
193 })?;
194 let meta_value = key_values
195 .iter()
196 .find(|kv| kv.key == PARQUET_METADATA_KEY)
197 .with_context(|| InvalidParquetSnafu {
198 file: file_path,
199 reason: format!("key {} not found", PARQUET_METADATA_KEY),
200 })?;
201 let json = meta_value
202 .value
203 .as_ref()
204 .with_context(|| InvalidParquetSnafu {
205 file: file_path,
206 reason: format!("No value for key {}", PARQUET_METADATA_KEY),
207 })?;
208 let region_metadata = match region_metadata {
209 Some(region_metadata) => region_metadata,
210 None => Arc::new(
211 store_api::metadata::RegionMetadata::from_json(json)
212 .context(InvalidMetadataSnafu)?,
213 ),
214 };
215 let region_metadata_weight = region_metadata.estimated_size().max(json.len());
217 let parquet_metadata = Arc::new(strip_region_metadata_from_parquet(parquet_metadata));
218
219 Ok(Self {
220 parquet_metadata,
221 region_metadata,
222 region_metadata_weight,
223 page_index_policy,
224 })
225 }
226
227 pub(crate) fn parquet_metadata(&self) -> Arc<ParquetMetaData> {
228 self.parquet_metadata.clone()
229 }
230
231 pub(crate) fn region_metadata(&self) -> RegionMetadataRef {
232 self.region_metadata.clone()
233 }
234
235 fn satisfies_page_index_policy(&self, requested: PageIndexPolicy) -> bool {
236 match requested {
237 PageIndexPolicy::Skip => true,
238 PageIndexPolicy::Optional => self.page_index_policy != PageIndexPolicy::Skip,
239 PageIndexPolicy::Required => self.page_index_policy == PageIndexPolicy::Required,
240 }
241 }
242}
243
244fn infer_loaded_page_index_policy(parquet_metadata: &ParquetMetaData) -> PageIndexPolicy {
245 if parquet_metadata.column_index().is_some() || parquet_metadata.offset_index().is_some() {
246 PageIndexPolicy::Optional
247 } else {
248 PageIndexPolicy::Skip
249 }
250}
251
252fn strip_region_metadata_from_parquet(parquet_metadata: ParquetMetaData) -> ParquetMetaData {
253 let file_metadata = parquet_metadata.file_metadata();
254 let filtered_key_values = file_metadata.key_value_metadata().and_then(|key_values| {
255 let filtered = key_values
256 .iter()
257 .filter(|kv| kv.key != PARQUET_METADATA_KEY)
258 .cloned()
259 .collect::<Vec<_>>();
260 (!filtered.is_empty()).then_some(filtered)
261 });
262 let stripped_file_metadata = FileMetaData::new(
263 file_metadata.version(),
264 file_metadata.num_rows(),
265 file_metadata.created_by().map(ToString::to_string),
266 filtered_key_values,
267 file_metadata.schema_descr_ptr(),
268 file_metadata.column_orders().cloned(),
269 );
270
271 let mut builder = parquet_metadata.into_builder();
272 let row_groups = builder.take_row_groups();
273 let column_index = builder.take_column_index();
274 let offset_index = builder.take_offset_index();
275
276 parquet::file::metadata::ParquetMetaDataBuilder::new(stripped_file_metadata)
277 .set_row_groups(row_groups)
278 .set_column_index(column_index)
279 .set_offset_index(offset_index)
280 .build()
281}
282
283fn removal_cause_str(cause: RemovalCause) -> &'static str {
284 match cause {
285 RemovalCause::Expired => "expired",
286 RemovalCause::Explicit => "explicit",
287 RemovalCause::Replaced => "replaced",
288 RemovalCause::Size => "size",
289 }
290}
291
292#[derive(Debug, Clone, PartialEq, Eq, Hash)]
293pub(crate) struct PrefilterRowSelector {
294 row_count: usize,
295 skip: bool,
296}
297
298impl From<&RowSelector> for PrefilterRowSelector {
303 fn from(selector: &RowSelector) -> Self {
304 Self {
305 row_count: selector.row_count,
306 skip: selector.skip,
307 }
308 }
309}
310
311#[derive(Debug, Clone, PartialEq, Eq, Hash)]
313pub(crate) struct PrefilterKey {
314 file_id: FileId,
315 row_group_idx: u32,
316 row_selection: Option<Arc<Vec<PrefilterRowSelector>>>,
317 schema_version: u64,
318 filter_exprs: SmallVec<[String; 1]>,
319 mem_usage: usize,
320}
321
322impl PrefilterKey {
323 pub(crate) fn row_selection_snapshot(
324 row_selection: Option<&RowSelection>,
325 ) -> Option<Arc<Vec<PrefilterRowSelector>>> {
326 row_selection.map(|selection| {
327 Arc::new(
328 selection
329 .iter()
330 .map(PrefilterRowSelector::from)
331 .collect::<Vec<_>>(),
332 )
333 })
334 }
335
336 pub(crate) fn new(
337 file_id: FileId,
338 row_group_idx: u32,
339 row_selection: Option<Arc<Vec<PrefilterRowSelector>>>,
340 schema_version: u64,
341 filter_exprs: SmallVec<[String; 1]>,
342 ) -> Self {
343 let row_selection_bytes = row_selection
344 .as_ref()
345 .map(|selection| selection.len() * mem::size_of::<PrefilterRowSelector>())
346 .unwrap_or(0);
347 let spilled_expr_bytes = if filter_exprs.spilled() {
348 filter_exprs.capacity() * mem::size_of::<String>()
349 } else {
350 0
351 };
352 let expr_bytes = filter_exprs.iter().map(|s| s.capacity()).sum::<usize>();
353
354 Self {
355 file_id,
356 row_group_idx,
357 row_selection,
358 schema_version,
359 filter_exprs,
360 mem_usage: mem::size_of::<Self>()
361 + row_selection_bytes
362 + spilled_expr_bytes
363 + expr_bytes,
364 }
365 }
366
367 fn mem_usage(&self) -> usize {
368 self.mem_usage
369 }
370}
371
372type PrefilterResultCache = Cache<PrefilterKey, Arc<BooleanBuffer>>;
373
374fn new_prefilter_result_cache(capacity: u64) -> PrefilterResultCache {
375 Cache::builder()
376 .max_capacity(capacity)
377 .weigher(prefilter_result_cache_weight)
378 .eviction_listener(|k, v, cause| {
379 let size = prefilter_result_cache_weight(&k, &v);
380 CACHE_BYTES
381 .with_label_values(&[PREFILTER_RESULT_TYPE])
382 .sub(size.into());
383 CACHE_EVICTION
384 .with_label_values(&[PREFILTER_RESULT_TYPE, removal_cause_str(cause)])
385 .inc();
386 })
387 .build()
388}
389
390fn prefilter_result_cache_weight(k: &PrefilterKey, v: &Arc<BooleanBuffer>) -> u32 {
391 (k.mem_usage() + mem::size_of::<BooleanBuffer>() + v.values().len()) as u32
392}
393
394#[derive(Clone)]
396pub enum CacheStrategy {
397 EnableAll(CacheManagerRef),
400 Compaction(CacheManagerRef),
405 Disabled,
407}
408
409impl CacheStrategy {
410 pub(crate) async fn get_sst_meta_data(
412 &self,
413 file_id: RegionFileId,
414 metrics: &mut MetadataCacheMetrics,
415 page_index_policy: PageIndexPolicy,
416 ) -> Option<Arc<CachedSstMeta>> {
417 match self {
418 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
419 cache_manager
420 .get_sst_meta_data(file_id, metrics, page_index_policy)
421 .await
422 }
423 CacheStrategy::Disabled => {
424 metrics.cache_miss += 1;
425 None
426 }
427 }
428 }
429
430 pub(crate) fn get_sst_meta_data_from_mem_cache(
432 &self,
433 file_id: RegionFileId,
434 page_index_policy: PageIndexPolicy,
435 ) -> Option<Arc<CachedSstMeta>> {
436 match self {
437 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
438 cache_manager.get_sst_meta_data_from_mem_cache(file_id, page_index_policy)
439 }
440 CacheStrategy::Disabled => None,
441 }
442 }
443
444 pub fn get_parquet_meta_data_from_mem_cache(
446 &self,
447 file_id: RegionFileId,
448 ) -> Option<Arc<ParquetMetaData>> {
449 self.get_sst_meta_data_from_mem_cache(file_id, PageIndexPolicy::Skip)
450 .map(|metadata| metadata.parquet_metadata())
451 }
452
453 pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
455 match self {
456 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
457 cache_manager.put_sst_meta_data(file_id, metadata);
458 }
459 CacheStrategy::Disabled => {}
460 }
461 }
462
463 pub fn put_parquet_meta_data(
465 &self,
466 file_id: RegionFileId,
467 metadata: Arc<ParquetMetaData>,
468 region_metadata: Option<RegionMetadataRef>,
469 ) {
470 match self {
471 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
472 cache_manager.put_parquet_meta_data(file_id, metadata, region_metadata);
473 }
474 CacheStrategy::Disabled => {}
475 }
476 }
477
478 pub(crate) fn get_prefilter_result(&self, key: &PrefilterKey) -> Option<Arc<BooleanBuffer>> {
481 match self {
482 CacheStrategy::EnableAll(cache_manager) => cache_manager.get_prefilter_result(key),
483 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
484 }
485 }
486
487 pub(crate) fn put_prefilter_result(&self, key: PrefilterKey, result: Arc<BooleanBuffer>) {
490 if let CacheStrategy::EnableAll(cache_manager) = self {
491 cache_manager.put_prefilter_result(key, result);
492 }
493 }
494
495 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
497 match self {
498 CacheStrategy::EnableAll(cache_manager) => {
499 cache_manager.remove_parquet_meta_data(file_id);
500 }
501 CacheStrategy::Compaction(cache_manager) => {
502 cache_manager.remove_parquet_meta_data(file_id);
503 }
504 CacheStrategy::Disabled => {}
505 }
506 }
507
508 pub fn get_repeated_vector(
511 &self,
512 data_type: &ConcreteDataType,
513 value: &Value,
514 ) -> Option<VectorRef> {
515 match self {
516 CacheStrategy::EnableAll(cache_manager) => {
517 cache_manager.get_repeated_vector(data_type, value)
518 }
519 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
520 }
521 }
522
523 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
526 if let CacheStrategy::EnableAll(cache_manager) = self {
527 cache_manager.put_repeated_vector(value, vector);
528 }
529 }
530
531 pub fn get_page_ranges(
534 &self,
535 file_id: FileId,
536 row_group_idx: usize,
537 ranges: &[Range<u64>],
538 ) -> Option<PageRangeLookup> {
539 match self {
540 CacheStrategy::EnableAll(cache_manager) => {
541 cache_manager.get_page_ranges(file_id, row_group_idx, ranges)
542 }
543 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
544 }
545 }
546
547 pub fn put_page_ranges(
550 &self,
551 file_id: FileId,
552 row_group_idx: usize,
553 ranges: &[Range<u64>],
554 pages: &[Bytes],
555 ) {
556 if let CacheStrategy::EnableAll(cache_manager) = self {
557 cache_manager.put_page_ranges(file_id, row_group_idx, ranges, pages);
558 }
559 }
560
561 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
563 match self {
564 CacheStrategy::EnableAll(cache_manager) => {
565 cache_manager.evict_puffin_cache(file_id).await
566 }
567 CacheStrategy::Compaction(cache_manager) => {
568 cache_manager.evict_puffin_cache(file_id).await
569 }
570 CacheStrategy::Disabled => {}
571 }
572 }
573
574 pub fn get_selector_result(
577 &self,
578 selector_key: &SelectorResultKey,
579 ) -> Option<Arc<SelectorResultValue>> {
580 match self {
581 CacheStrategy::EnableAll(cache_manager) => {
582 cache_manager.get_selector_result(selector_key)
583 }
584 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
585 }
586 }
587
588 pub fn put_selector_result(
591 &self,
592 selector_key: SelectorResultKey,
593 result: Arc<SelectorResultValue>,
594 ) {
595 if let CacheStrategy::EnableAll(cache_manager) = self {
596 cache_manager.put_selector_result(selector_key, result);
597 }
598 }
599
600 #[allow(dead_code)]
603 pub(crate) fn get_range_result(
604 &self,
605 key: &RangeScanCacheKey,
606 ) -> Option<Arc<RangeScanCacheValue>> {
607 match self {
608 CacheStrategy::EnableAll(cache_manager) => cache_manager.get_range_result(key),
609 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
610 }
611 }
612
613 pub(crate) fn put_range_result(
616 &self,
617 key: RangeScanCacheKey,
618 result: Arc<RangeScanCacheValue>,
619 ) {
620 if let CacheStrategy::EnableAll(cache_manager) = self {
621 cache_manager.put_range_result(key, result);
622 }
623 }
624
625 pub(crate) fn has_range_result_cache(&self) -> bool {
627 match self {
628 CacheStrategy::EnableAll(cache_manager) => cache_manager.has_range_result_cache(),
629 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => false,
630 }
631 }
632
633 pub(crate) fn range_result_memory_limiter(&self) -> Option<&Arc<RangeResultMemoryLimiter>> {
634 match self {
635 CacheStrategy::EnableAll(cache_manager) => {
636 Some(cache_manager.range_result_memory_limiter())
637 }
638 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
639 }
640 }
641
642 pub(crate) fn range_result_cache_size(&self) -> Option<usize> {
643 match self {
644 CacheStrategy::EnableAll(cache_manager) => {
645 Some(cache_manager.range_result_cache_size())
646 }
647 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
648 }
649 }
650
651 pub fn write_cache(&self) -> Option<&WriteCacheRef> {
654 match self {
655 CacheStrategy::EnableAll(cache_manager) => cache_manager.write_cache(),
656 CacheStrategy::Compaction(cache_manager) => cache_manager.write_cache(),
657 CacheStrategy::Disabled => None,
658 }
659 }
660
661 pub fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
664 match self {
665 CacheStrategy::EnableAll(cache_manager) => cache_manager.inverted_index_cache(),
666 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
667 }
668 }
669
670 pub fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
673 match self {
674 CacheStrategy::EnableAll(cache_manager) => cache_manager.bloom_filter_index_cache(),
675 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
676 }
677 }
678
679 #[cfg(feature = "vector_index")]
682 pub fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
683 match self {
684 CacheStrategy::EnableAll(cache_manager) => cache_manager.vector_index_cache(),
685 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
686 }
687 }
688
689 pub fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
692 match self {
693 CacheStrategy::EnableAll(cache_manager) => cache_manager.puffin_metadata_cache(),
694 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
695 }
696 }
697
698 pub fn index_result_cache(&self) -> Option<&IndexResultCache> {
701 match self {
702 CacheStrategy::EnableAll(cache_manager) => cache_manager.index_result_cache(),
703 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
704 }
705 }
706
707 pub fn maybe_download_background(
709 &self,
710 index_key: IndexKey,
711 remote_path: String,
712 remote_store: ObjectStore,
713 file_size: u64,
714 ) {
715 if let CacheStrategy::EnableAll(cache_manager) = self
716 && let Some(write_cache) = cache_manager.write_cache()
717 {
718 write_cache.file_cache().maybe_download_background(
719 index_key,
720 remote_path,
721 remote_store,
722 file_size,
723 );
724 }
725 }
726}
727
728#[derive(Default)]
732pub struct CacheManager {
733 sst_meta_cache: Option<SstMetaCache>,
735 vector_cache: Option<VectorCache>,
737 page_cache: Option<Arc<PageRangeCache>>,
739 write_cache: Option<WriteCacheRef>,
741 inverted_index_cache: Option<InvertedIndexCacheRef>,
743 bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
745 #[cfg(feature = "vector_index")]
747 vector_index_cache: Option<VectorIndexCacheRef>,
748 puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
750 selector_result_cache: Option<SelectorResultCache>,
752 range_result_cache: Option<RangeResultCache>,
754 range_result_cache_size: u64,
756 range_result_memory_limiter: Arc<RangeResultMemoryLimiter>,
758 index_result_cache: Option<IndexResultCache>,
760 prefilter_result_cache: Option<PrefilterResultCache>,
762}
763
764pub type CacheManagerRef = Arc<CacheManager>;
765
766impl CacheManager {
767 pub fn builder() -> CacheManagerBuilder {
769 CacheManagerBuilder::default()
770 }
771
772 pub(crate) async fn get_sst_meta_data(
775 &self,
776 file_id: RegionFileId,
777 metrics: &mut MetadataCacheMetrics,
778 page_index_policy: PageIndexPolicy,
779 ) -> Option<Arc<CachedSstMeta>> {
780 if let Some(metadata) = self.get_sst_meta_data_from_mem_cache(file_id, page_index_policy) {
781 metrics.mem_cache_hit += 1;
782 return Some(metadata);
783 }
784
785 let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
786 if let Some(write_cache) = &self.write_cache
787 && let Some(metadata) = write_cache
788 .file_cache()
789 .get_sst_meta_data(key, metrics, page_index_policy)
790 .await
791 {
792 metrics.file_cache_hit += 1;
793 self.put_sst_meta_data(file_id, metadata.clone());
794 return Some(metadata);
795 }
796
797 metrics.cache_miss += 1;
798 None
799 }
800
801 pub(crate) async fn get_parquet_meta_data(
804 &self,
805 file_id: RegionFileId,
806 metrics: &mut MetadataCacheMetrics,
807 page_index_policy: PageIndexPolicy,
808 ) -> Option<Arc<ParquetMetaData>> {
809 self.get_sst_meta_data(file_id, metrics, page_index_policy)
810 .await
811 .map(|metadata| metadata.parquet_metadata())
812 }
813
814 pub(crate) fn get_sst_meta_data_from_mem_cache(
817 &self,
818 file_id: RegionFileId,
819 page_index_policy: PageIndexPolicy,
820 ) -> Option<Arc<CachedSstMeta>> {
821 self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
822 let value = sst_meta_cache.get(&SstMetaKey(file_id.region_id(), file_id.file_id()));
823 let value =
824 value.filter(|metadata| metadata.satisfies_page_index_policy(page_index_policy));
825 update_hit_miss(value, SST_META_TYPE)
826 })
827 }
828
829 pub fn get_parquet_meta_data_from_mem_cache(
832 &self,
833 file_id: RegionFileId,
834 ) -> Option<Arc<ParquetMetaData>> {
835 self.get_sst_meta_data_from_mem_cache(file_id, PageIndexPolicy::Skip)
836 .map(|metadata| metadata.parquet_metadata())
837 }
838
839 pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
841 if let Some(cache) = &self.sst_meta_cache {
842 let key = SstMetaKey(file_id.region_id(), file_id.file_id());
843 CACHE_BYTES
844 .with_label_values(&[SST_META_TYPE])
845 .add(meta_cache_weight(&key, &metadata).into());
846 cache.insert(key, metadata);
847 }
848 }
849
850 pub fn put_parquet_meta_data(
852 &self,
853 file_id: RegionFileId,
854 metadata: Arc<ParquetMetaData>,
855 region_metadata: Option<RegionMetadataRef>,
856 ) {
857 if self.sst_meta_cache.is_some() {
858 let file_path = format!(
859 "region_id={}, file_id={}",
860 file_id.region_id(),
861 file_id.file_id()
862 );
863 match CachedSstMeta::try_new_with_region_metadata(
864 &file_path,
865 Arc::unwrap_or_clone(metadata),
866 region_metadata,
867 ) {
868 Ok(metadata) => self.put_sst_meta_data(file_id, Arc::new(metadata)),
869 Err(err) => warn!(
870 err; "Failed to decode region metadata while caching parquet metadata, region_id: {}, file_id: {}",
871 file_id.region_id(),
872 file_id.file_id()
873 ),
874 }
875 }
876 }
877
878 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
880 if let Some(cache) = &self.sst_meta_cache {
881 cache.remove(&SstMetaKey(file_id.region_id(), file_id.file_id()));
882 }
883 }
884
885 pub(crate) fn sst_meta_cache_weighted_size(&self) -> u64 {
887 self.sst_meta_cache
888 .as_ref()
889 .map(|cache| cache.weighted_size())
890 .unwrap_or(0)
891 }
892
893 pub(crate) fn sst_meta_cache_enabled(&self) -> bool {
895 self.sst_meta_cache.is_some()
896 }
897
898 pub fn get_repeated_vector(
900 &self,
901 data_type: &ConcreteDataType,
902 value: &Value,
903 ) -> Option<VectorRef> {
904 self.vector_cache.as_ref().and_then(|vector_cache| {
905 let value = vector_cache.get(&(data_type.clone(), value.clone()));
906 update_hit_miss(value, VECTOR_TYPE)
907 })
908 }
909
910 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
912 if let Some(cache) = &self.vector_cache {
913 let key = (vector.data_type(), value);
914 CACHE_BYTES
915 .with_label_values(&[VECTOR_TYPE])
916 .add(vector_cache_weight(&key, &vector).into());
917 cache.insert(key, vector);
918 }
919 }
920
921 pub fn get_page_ranges(
923 &self,
924 file_id: FileId,
925 row_group_idx: usize,
926 ranges: &[Range<u64>],
927 ) -> Option<PageRangeLookup> {
928 self.page_cache.as_ref().map(|page_cache| {
929 let lookup = page_cache.lookup(file_id, row_group_idx, ranges);
930 if lookup.cached_bytes > 0 {
931 CACHE_HIT.with_label_values(&[PAGE_TYPE]).inc();
932 }
933 if !lookup.missing_ranges.is_empty() {
934 CACHE_MISS.with_label_values(&[PAGE_TYPE]).inc();
935 }
936 lookup
937 })
938 }
939
940 pub fn put_page_ranges(
942 &self,
943 file_id: FileId,
944 row_group_idx: usize,
945 ranges: &[Range<u64>],
946 pages: &[Bytes],
947 ) {
948 if let Some(cache) = &self.page_cache {
949 cache.insert_ranges(file_id, row_group_idx, ranges, pages);
950 }
951 }
952
953 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
955 if let Some(cache) = &self.bloom_filter_index_cache {
956 cache.invalidate_file(file_id.file_id());
957 }
958
959 if let Some(cache) = &self.inverted_index_cache {
960 cache.invalidate_file(file_id.file_id());
961 }
962
963 if let Some(cache) = &self.index_result_cache {
964 cache.invalidate_file(file_id.file_id());
965 }
966
967 #[cfg(feature = "vector_index")]
968 if let Some(cache) = &self.vector_index_cache {
969 cache.invalidate_file(file_id.file_id());
970 }
971
972 if let Some(cache) = &self.puffin_metadata_cache {
973 cache.remove(&file_id.to_string());
974 }
975
976 if let Some(write_cache) = &self.write_cache {
977 write_cache
978 .remove(IndexKey::new(
979 file_id.region_id(),
980 file_id.file_id(),
981 FileType::Puffin(file_id.version),
982 ))
983 .await;
984 }
985 }
986
987 pub fn get_selector_result(
989 &self,
990 selector_key: &SelectorResultKey,
991 ) -> Option<Arc<SelectorResultValue>> {
992 self.selector_result_cache
993 .as_ref()
994 .and_then(|selector_result_cache| selector_result_cache.get(selector_key))
995 }
996
997 pub fn put_selector_result(
999 &self,
1000 selector_key: SelectorResultKey,
1001 result: Arc<SelectorResultValue>,
1002 ) {
1003 if let Some(cache) = &self.selector_result_cache {
1004 CACHE_BYTES
1005 .with_label_values(&[SELECTOR_RESULT_TYPE])
1006 .add(selector_result_cache_weight(&selector_key, &result).into());
1007 cache.insert(selector_key, result);
1008 }
1009 }
1010
1011 #[allow(dead_code)]
1013 pub(crate) fn get_range_result(
1014 &self,
1015 key: &RangeScanCacheKey,
1016 ) -> Option<Arc<RangeScanCacheValue>> {
1017 self.range_result_cache
1018 .as_ref()
1019 .and_then(|cache| update_hit_miss(cache.get(key), RANGE_RESULT_TYPE))
1020 }
1021
1022 pub(crate) fn put_range_result(
1024 &self,
1025 key: RangeScanCacheKey,
1026 result: Arc<RangeScanCacheValue>,
1027 ) {
1028 if let Some(cache) = &self.range_result_cache {
1029 CACHE_BYTES
1030 .with_label_values(&[RANGE_RESULT_TYPE])
1031 .add(range_result_cache_weight(&key, &result).into());
1032 cache.insert(key, result);
1033 }
1034 }
1035
1036 pub(crate) fn has_range_result_cache(&self) -> bool {
1038 self.range_result_cache.is_some()
1039 }
1040
1041 pub(crate) fn range_result_memory_limiter(&self) -> &Arc<RangeResultMemoryLimiter> {
1042 &self.range_result_memory_limiter
1043 }
1044
1045 pub(crate) fn range_result_cache_size(&self) -> usize {
1046 self.range_result_cache_size as usize
1047 }
1048
1049 pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
1051 self.write_cache.as_ref()
1052 }
1053
1054 pub(crate) fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
1055 self.inverted_index_cache.as_ref()
1056 }
1057
1058 pub(crate) fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
1059 self.bloom_filter_index_cache.as_ref()
1060 }
1061
1062 #[cfg(feature = "vector_index")]
1063 pub(crate) fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
1064 self.vector_index_cache.as_ref()
1065 }
1066
1067 pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
1068 self.puffin_metadata_cache.as_ref()
1069 }
1070
1071 pub(crate) fn index_result_cache(&self) -> Option<&IndexResultCache> {
1072 self.index_result_cache.as_ref()
1073 }
1074
1075 pub(crate) fn get_prefilter_result(&self, key: &PrefilterKey) -> Option<Arc<BooleanBuffer>> {
1076 self.prefilter_result_cache
1077 .as_ref()
1078 .and_then(|cache| update_hit_miss(cache.get(key), PREFILTER_RESULT_TYPE))
1079 }
1080
1081 pub(crate) fn put_prefilter_result(&self, key: PrefilterKey, result: Arc<BooleanBuffer>) {
1082 if let Some(cache) = &self.prefilter_result_cache {
1083 CACHE_BYTES
1084 .with_label_values(&[PREFILTER_RESULT_TYPE])
1085 .add(prefilter_result_cache_weight(&key, &result).into());
1086 cache.insert(key, result);
1087 }
1088 }
1089}
1090
1091pub fn selector_result_cache_miss() {
1093 CACHE_MISS.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
1094}
1095
1096pub fn selector_result_cache_hit() {
1098 CACHE_HIT.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
1099}
1100
1101#[derive(Default)]
1103pub struct CacheManagerBuilder {
1104 sst_meta_cache_size: u64,
1105 vector_cache_size: u64,
1106 page_cache_size: u64,
1107 index_metadata_size: u64,
1108 index_content_size: u64,
1109 index_content_page_size: u64,
1110 index_result_cache_size: u64,
1111 prefilter_result_cache_size: u64,
1112 puffin_metadata_size: u64,
1113 write_cache: Option<WriteCacheRef>,
1114 selector_result_cache_size: u64,
1115 range_result_cache_size: u64,
1116}
1117
1118impl CacheManagerBuilder {
1119 pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
1121 self.sst_meta_cache_size = bytes;
1122 self
1123 }
1124
1125 pub fn vector_cache_size(mut self, bytes: u64) -> Self {
1127 self.vector_cache_size = bytes;
1128 self
1129 }
1130
1131 pub fn page_cache_size(mut self, bytes: u64) -> Self {
1133 self.page_cache_size = bytes;
1134 self
1135 }
1136
1137 pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
1139 self.write_cache = cache;
1140 self
1141 }
1142
1143 pub fn index_metadata_size(mut self, bytes: u64) -> Self {
1145 self.index_metadata_size = bytes;
1146 self
1147 }
1148
1149 pub fn index_content_size(mut self, bytes: u64) -> Self {
1151 self.index_content_size = bytes;
1152 self
1153 }
1154
1155 pub fn index_content_page_size(mut self, bytes: u64) -> Self {
1157 self.index_content_page_size = bytes;
1158 self
1159 }
1160
1161 pub fn index_result_cache_size(mut self, bytes: u64) -> Self {
1163 self.index_result_cache_size = bytes;
1164 self
1165 }
1166
1167 pub fn prefilter_result_cache_size(mut self, bytes: u64) -> Self {
1169 self.prefilter_result_cache_size = bytes;
1170 self
1171 }
1172
1173 pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
1175 self.puffin_metadata_size = bytes;
1176 self
1177 }
1178
1179 pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
1181 self.selector_result_cache_size = bytes;
1182 self
1183 }
1184
1185 pub fn range_result_cache_size(mut self, bytes: u64) -> Self {
1187 self.range_result_cache_size = bytes;
1188 self
1189 }
1190
1191 pub fn build(self) -> CacheManager {
1193 let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
1194 Cache::builder()
1195 .max_capacity(self.sst_meta_cache_size)
1196 .weigher(meta_cache_weight)
1197 .eviction_listener(|k, v, cause| {
1198 let size = meta_cache_weight(&k, &v);
1199 CACHE_BYTES
1200 .with_label_values(&[SST_META_TYPE])
1201 .sub(size.into());
1202 CACHE_EVICTION
1203 .with_label_values(&[SST_META_TYPE, removal_cause_str(cause)])
1204 .inc();
1205 })
1206 .build()
1207 });
1208 let vector_cache = (self.vector_cache_size != 0).then(|| {
1209 Cache::builder()
1210 .max_capacity(self.vector_cache_size)
1211 .weigher(vector_cache_weight)
1212 .eviction_listener(|k, v, cause| {
1213 let size = vector_cache_weight(&k, &v);
1214 CACHE_BYTES
1215 .with_label_values(&[VECTOR_TYPE])
1216 .sub(size.into());
1217 CACHE_EVICTION
1218 .with_label_values(&[VECTOR_TYPE, removal_cause_str(cause)])
1219 .inc();
1220 })
1221 .build()
1222 });
1223 let page_cache =
1224 (self.page_cache_size != 0).then(|| PageRangeCache::new(self.page_cache_size));
1225 let inverted_index_cache = InvertedIndexCache::new(
1226 self.index_metadata_size,
1227 self.index_content_size,
1228 self.index_content_page_size,
1229 );
1230 let bloom_filter_index_cache = BloomFilterIndexCache::new(
1232 self.index_metadata_size,
1233 self.index_content_size,
1234 self.index_content_page_size,
1235 );
1236 #[cfg(feature = "vector_index")]
1237 let vector_index_cache = (self.index_content_size != 0)
1238 .then(|| Arc::new(VectorIndexCache::new(self.index_content_size)));
1239 let index_result_cache = (self.index_result_cache_size != 0)
1240 .then(|| IndexResultCache::new(self.index_result_cache_size));
1241 let prefilter_result_cache = (self.prefilter_result_cache_size != 0)
1242 .then(|| new_prefilter_result_cache(self.prefilter_result_cache_size));
1243 let puffin_metadata_cache =
1244 PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
1245 let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
1246 Cache::builder()
1247 .max_capacity(self.selector_result_cache_size)
1248 .weigher(selector_result_cache_weight)
1249 .eviction_listener(|k, v, cause| {
1250 let size = selector_result_cache_weight(&k, &v);
1251 CACHE_BYTES
1252 .with_label_values(&[SELECTOR_RESULT_TYPE])
1253 .sub(size.into());
1254 CACHE_EVICTION
1255 .with_label_values(&[SELECTOR_RESULT_TYPE, removal_cause_str(cause)])
1256 .inc();
1257 })
1258 .build()
1259 });
1260 let range_result_cache = (self.range_result_cache_size != 0).then(|| {
1261 Cache::builder()
1262 .max_capacity(self.range_result_cache_size)
1263 .weigher(range_result_cache_weight)
1264 .eviction_listener(move |k, v, cause| {
1265 let size = range_result_cache_weight(&k, &v);
1266 CACHE_BYTES
1267 .with_label_values(&[RANGE_RESULT_TYPE])
1268 .sub(size.into());
1269 CACHE_EVICTION
1270 .with_label_values(&[RANGE_RESULT_TYPE, removal_cause_str(cause)])
1271 .inc();
1272 })
1273 .build()
1274 });
1275 CacheManager {
1276 sst_meta_cache,
1277 vector_cache,
1278 page_cache,
1279 write_cache: self.write_cache,
1280 inverted_index_cache: Some(Arc::new(inverted_index_cache)),
1281 bloom_filter_index_cache: Some(Arc::new(bloom_filter_index_cache)),
1282 #[cfg(feature = "vector_index")]
1283 vector_index_cache,
1284 puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
1285 selector_result_cache,
1286 range_result_cache,
1287 range_result_cache_size: self.range_result_cache_size,
1288 range_result_memory_limiter: Arc::new(RangeResultMemoryLimiter::new(
1289 self.range_result_cache_size as usize,
1290 RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize,
1291 )),
1292 index_result_cache,
1293 prefilter_result_cache,
1294 }
1295 }
1296}
1297
1298fn meta_cache_weight(k: &SstMetaKey, v: &Arc<CachedSstMeta>) -> u32 {
1299 (k.estimated_size() + parquet_meta_size(&v.parquet_metadata) + v.region_metadata_weight) as u32
1301}
1302
1303fn vector_cache_weight(_k: &(ConcreteDataType, Value), v: &VectorRef) -> u32 {
1304 (mem::size_of::<ConcreteDataType>() + mem::size_of::<Value>() + v.memory_size()) as u32
1306}
1307
1308fn page_cache_weight(k: &PageFragmentKey, v: &Bytes) -> u32 {
1309 (k.estimated_size() + mem::size_of::<Bytes>() + v.len()) as u32
1310}
1311
1312fn selector_result_cache_weight(k: &SelectorResultKey, v: &Arc<SelectorResultValue>) -> u32 {
1313 (mem::size_of_val(k) + v.estimated_size()) as u32
1314}
1315
1316fn range_result_cache_weight(k: &RangeScanCacheKey, v: &Arc<RangeScanCacheValue>) -> u32 {
1317 (k.estimated_size() + v.estimated_size()) as u32
1318}
1319
1320fn update_hit_miss<T>(value: Option<T>, cache_type: &str) -> Option<T> {
1322 if value.is_some() {
1323 CACHE_HIT.with_label_values(&[cache_type]).inc();
1324 } else {
1325 CACHE_MISS.with_label_values(&[cache_type]).inc();
1326 }
1327 value
1328}
1329
1330#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1332struct SstMetaKey(RegionId, FileId);
1333
1334impl SstMetaKey {
1335 fn estimated_size(&self) -> usize {
1337 mem::size_of::<Self>()
1338 }
1339}
1340
1341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1342struct PageFragmentGroupKey {
1343 file_id: FileId,
1344 row_group_idx: usize,
1345}
1346
1347#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1349pub struct PageFragmentKey {
1350 file_id: FileId,
1352 row_group_idx: usize,
1354 start: u64,
1356 end: u64,
1358}
1359
1360impl PageFragmentKey {
1361 fn new(file_id: FileId, row_group_idx: usize, range: &Range<u64>) -> PageFragmentKey {
1362 PageFragmentKey {
1363 file_id,
1364 row_group_idx,
1365 start: range.start,
1366 end: range.end,
1367 }
1368 }
1369
1370 fn group_key(&self) -> PageFragmentGroupKey {
1371 PageFragmentGroupKey {
1372 file_id: self.file_id,
1373 row_group_idx: self.row_group_idx,
1374 }
1375 }
1376
1377 fn estimated_size(&self) -> usize {
1379 mem::size_of::<Self>()
1380 }
1381}
1382
1383#[derive(Clone)]
1385pub struct PageRangePart {
1386 pub range: Range<u64>,
1388 pub bytes: Bytes,
1390}
1391
1392pub struct PageRangeLookup {
1394 pub cached_parts: Vec<Vec<PageRangePart>>,
1396 pub missing_ranges: Vec<Range<u64>>,
1398 pub cached_range_count: usize,
1400 pub cached_bytes: u64,
1402}
1403
1404impl PageRangeLookup {
1405 pub fn is_fully_cached(&self) -> bool {
1406 self.missing_ranges.is_empty()
1407 }
1408}
1409
1410type PageFragmentRangeIndex = BTreeMap<(u64, u64), PageFragmentKey>;
1411type PageFragmentIndex = HashMap<PageFragmentGroupKey, PageFragmentRangeIndex>;
1412
1413pub struct PageRangeCache {
1415 cache: Cache<PageFragmentKey, Bytes>,
1416 index: RwLock<PageFragmentIndex>,
1417}
1418
1419impl PageRangeCache {
1420 fn new(capacity: u64) -> Arc<PageRangeCache> {
1421 Arc::new_cyclic(|weak_cache: &std::sync::Weak<PageRangeCache>| {
1422 let cache = Cache::builder()
1423 .max_capacity(capacity)
1424 .weigher(page_cache_weight)
1425 .eviction_listener({
1426 let weak_cache = weak_cache.clone();
1427 move |k, v, cause| {
1428 let size = page_cache_weight(&k, &v);
1429 CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
1430 CACHE_EVICTION
1431 .with_label_values(&[PAGE_TYPE, removal_cause_str(cause)])
1432 .inc();
1433
1434 if let Some(cache) = weak_cache.upgrade()
1435 && !matches!(cause, RemovalCause::Replaced)
1436 {
1437 cache.remove_index_entry(*k);
1438 }
1439 }
1440 })
1441 .build();
1442
1443 PageRangeCache {
1444 cache,
1445 index: RwLock::new(HashMap::new()),
1446 }
1447 })
1448 }
1449
1450 fn lookup(
1451 &self,
1452 file_id: FileId,
1453 row_group_idx: usize,
1454 ranges: &[Range<u64>],
1455 ) -> PageRangeLookup {
1456 let mut cached_parts = Vec::with_capacity(ranges.len());
1457 let mut missing_ranges = Vec::new();
1458 let mut cached_range_count = 0;
1459 let mut cached_bytes = 0;
1460
1461 for range in ranges {
1462 if range.start >= range.end {
1463 cached_parts.push(Vec::new());
1464 continue;
1465 }
1466
1467 let mut parts = Vec::new();
1468 let candidates = self.find_index_candidates(file_id, row_group_idx, range);
1469 let mut stale_keys = Vec::new();
1470
1471 for fragment_key in candidates {
1472 if let Some(bytes) = self.cache.get(&fragment_key) {
1473 let part_start = range.start.max(fragment_key.start);
1474 let part_end = range.end.min(fragment_key.end);
1475 let slice_start = (part_start - fragment_key.start) as usize;
1476 let slice_end = (part_end - fragment_key.start) as usize;
1477 parts.push(PageRangePart {
1478 range: part_start..part_end,
1479 bytes: bytes.slice(slice_start..slice_end),
1480 });
1481 } else {
1482 stale_keys.push(fragment_key);
1483 }
1484 }
1485 for key in stale_keys {
1486 self.remove_uncached_index_entry(key);
1487 }
1488
1489 let mut cursor = range.start;
1490 let mut compacted_parts: Vec<PageRangePart> = Vec::with_capacity(parts.len());
1491 for part in parts {
1492 if part.range.end <= cursor {
1493 continue;
1494 }
1495
1496 let part = if part.range.start < cursor {
1497 let offset = (cursor - part.range.start) as usize;
1498 PageRangePart {
1499 range: cursor..part.range.end,
1500 bytes: part.bytes.slice(offset..),
1501 }
1502 } else {
1503 part
1504 };
1505
1506 if cursor < part.range.start {
1507 missing_ranges.push(cursor..part.range.start);
1508 }
1509 cached_bytes += part.range.end - part.range.start;
1510 cached_range_count += 1;
1511 cursor = part.range.end;
1512 compacted_parts.push(part);
1513
1514 if cursor >= range.end {
1515 break;
1516 }
1517 }
1518
1519 if cursor < range.end {
1520 missing_ranges.push(cursor..range.end);
1521 }
1522 cached_parts.push(compacted_parts);
1523 }
1524
1525 PageRangeLookup {
1526 cached_parts,
1527 missing_ranges,
1528 cached_range_count,
1529 cached_bytes,
1530 }
1531 }
1532
1533 fn insert_ranges(
1534 &self,
1535 file_id: FileId,
1536 row_group_idx: usize,
1537 ranges: &[Range<u64>],
1538 pages: &[Bytes],
1539 ) {
1540 for (range, bytes) in ranges.iter().zip(pages) {
1541 if range.start >= range.end || bytes.len() as u64 != range.end - range.start {
1542 continue;
1543 }
1544
1545 let key = PageFragmentKey::new(file_id, row_group_idx, range);
1546 let bytes = Bytes::copy_from_slice(bytes.as_ref());
1547 let size = page_cache_weight(&key, &bytes);
1548 CACHE_BYTES.with_label_values(&[PAGE_TYPE]).add(size.into());
1549 self.cache.insert(key, bytes);
1550 let mut index = self.index.write().unwrap();
1551 index
1552 .entry(key.group_key())
1553 .or_default()
1554 .insert((key.start, key.end), key);
1555 }
1556 }
1557
1558 fn find_index_candidates(
1559 &self,
1560 file_id: FileId,
1561 row_group_idx: usize,
1562 range: &Range<u64>,
1563 ) -> Vec<PageFragmentKey> {
1564 let group_key = PageFragmentGroupKey {
1565 file_id,
1566 row_group_idx,
1567 };
1568 let index = self.index.read().unwrap();
1569 index
1570 .get(&group_key)
1571 .map(|ranges| {
1572 ranges
1573 .range(..(range.end, 0))
1574 .filter_map(|(_, fragment_key)| {
1575 (fragment_key.end > range.start).then_some(*fragment_key)
1576 })
1577 .collect()
1578 })
1579 .unwrap_or_default()
1580 }
1581
1582 fn remove_uncached_index_entry(&self, key: PageFragmentKey) {
1583 let group_key = key.group_key();
1584 let mut index = self.index.write().unwrap();
1585 if self.cache.contains_key(&key) {
1586 return;
1587 }
1588
1589 Self::remove_index_entry_locked(&mut index, group_key, key);
1590 }
1591
1592 fn remove_index_entry(&self, key: PageFragmentKey) {
1593 let group_key = key.group_key();
1594 let mut index = self.index.write().unwrap();
1595 Self::remove_index_entry_locked(&mut index, group_key, key);
1596 }
1597
1598 fn remove_index_entry_locked(
1599 index: &mut PageFragmentIndex,
1600 group_key: PageFragmentGroupKey,
1601 key: PageFragmentKey,
1602 ) {
1603 let Some(ranges) = index.get_mut(&group_key) else {
1604 return;
1605 };
1606
1607 let removed = ranges
1608 .get(&(key.start, key.end))
1609 .is_some_and(|current| current == &key);
1610 if removed {
1611 ranges.remove(&(key.start, key.end));
1612 }
1613 if ranges.is_empty() {
1614 index.remove(&group_key);
1615 }
1616 }
1617}
1618
1619#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1621pub struct SelectorResultKey {
1622 pub file_id: FileId,
1624 pub row_group_idx: usize,
1626 pub selector: TimeSeriesRowSelector,
1628}
1629
1630pub enum SelectorResult {
1632 PrimaryKey(Vec<Batch>),
1634 Flat(Vec<RecordBatch>),
1636}
1637
1638pub struct SelectorResultValue {
1640 pub result: SelectorResult,
1642 pub read_cols: ParquetReadColumns,
1644}
1645
1646impl SelectorResultValue {
1647 pub fn new(result: Vec<Batch>, read_cols: ParquetReadColumns) -> SelectorResultValue {
1649 SelectorResultValue {
1650 result: SelectorResult::PrimaryKey(result),
1651 read_cols,
1652 }
1653 }
1654
1655 pub fn new_flat(
1657 result: Vec<RecordBatch>,
1658 read_cols: ParquetReadColumns,
1659 ) -> SelectorResultValue {
1660 SelectorResultValue {
1661 result: SelectorResult::Flat(result),
1662 read_cols,
1663 }
1664 }
1665
1666 fn estimated_size(&self) -> usize {
1668 match &self.result {
1669 SelectorResult::PrimaryKey(batches) => {
1670 batches.iter().map(|batch| batch.memory_size()).sum()
1671 }
1672 SelectorResult::Flat(batches) => batches.iter().map(record_batch_estimated_size).sum(),
1673 }
1674 }
1675}
1676
1677type SstMetaCache = Cache<SstMetaKey, Arc<CachedSstMeta>>;
1679type VectorCache = Cache<(ConcreteDataType, Value), VectorRef>;
1683type SelectorResultCache = Cache<SelectorResultKey, Arc<SelectorResultValue>>;
1685type RangeResultCache = Cache<RangeScanCacheKey, Arc<RangeScanCacheValue>>;
1687
1688#[cfg(test)]
1689mod tests {
1690 use std::sync::Arc;
1691
1692 use api::v1::SemanticType;
1693 use api::v1::index::{BloomFilterMeta, InvertedIndexMetas};
1694 use datatypes::schema::ColumnSchema;
1695 use datatypes::vectors::Int64Vector;
1696 use puffin::file_metadata::FileMetadata;
1697 use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
1698 use store_api::storage::ColumnId;
1699
1700 use super::*;
1701 use crate::cache::index::bloom_filter_index::Tag;
1702 use crate::cache::index::result_cache::PredicateKey;
1703 use crate::cache::test_util::{
1704 parquet_meta, sst_parquet_meta, sst_parquet_meta_with_region_metadata,
1705 };
1706 use crate::read::range_cache::{
1707 RangeScanCacheKey, RangeScanCacheValue, ScanRequestFingerprintBuilder,
1708 };
1709 use crate::read::read_columns::ReadColumns;
1710 use crate::sst::parquet::row_selection::RowGroupSelection;
1711
1712 #[tokio::test]
1713 async fn test_disable_cache() {
1714 let cache = CacheManager::default();
1715 assert!(cache.sst_meta_cache.is_none());
1716 assert!(cache.vector_cache.is_none());
1717 assert!(cache.page_cache.is_none());
1718
1719 let region_id = RegionId::new(1, 1);
1720 let file_id = RegionFileId::new(region_id, FileId::random());
1721 let metadata = parquet_meta();
1722 let mut metrics = MetadataCacheMetrics::default();
1723 cache.put_parquet_meta_data(file_id, metadata, None);
1724 assert!(
1725 cache
1726 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1727 .await
1728 .is_none()
1729 );
1730
1731 let value = Value::Int64(10);
1732 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1733 cache.put_repeated_vector(value.clone(), vector.clone());
1734 assert!(
1735 cache
1736 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1737 .is_none()
1738 );
1739
1740 cache.put_page_ranges(
1741 file_id.file_id(),
1742 1,
1743 &[Range { start: 0, end: 5 }],
1744 &[Bytes::from_static(b"abcde")],
1745 );
1746 assert!(
1747 cache
1748 .get_page_ranges(file_id.file_id(), 1, &[Range { start: 0, end: 5 }])
1749 .is_none()
1750 );
1751
1752 assert!(cache.write_cache().is_none());
1753 }
1754
1755 #[tokio::test]
1756 async fn test_parquet_meta_cache() {
1757 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1758 let mut metrics = MetadataCacheMetrics::default();
1759 let region_id = RegionId::new(1, 1);
1760 let file_id = RegionFileId::new(region_id, FileId::random());
1761 assert!(
1762 cache
1763 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1764 .await
1765 .is_none()
1766 );
1767 let (metadata, region_metadata) = sst_parquet_meta();
1768 cache.put_parquet_meta_data(file_id, metadata, None);
1769 let cached = cache
1770 .get_sst_meta_data(file_id, &mut metrics, Default::default())
1771 .await
1772 .unwrap();
1773 assert_eq!(region_metadata, cached.region_metadata());
1774 assert!(
1775 cached
1776 .parquet_metadata()
1777 .file_metadata()
1778 .key_value_metadata()
1779 .is_none_or(|key_values| {
1780 key_values
1781 .iter()
1782 .all(|key_value| key_value.key != PARQUET_METADATA_KEY)
1783 })
1784 );
1785 cache.remove_parquet_meta_data(file_id);
1786 assert!(
1787 cache
1788 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1789 .await
1790 .is_none()
1791 );
1792 }
1793
1794 #[tokio::test]
1795 async fn test_parquet_meta_cache_with_provided_region_metadata() {
1796 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1797 let mut metrics = MetadataCacheMetrics::default();
1798 let region_id = RegionId::new(1, 1);
1799 let file_id = RegionFileId::new(region_id, FileId::random());
1800 let (metadata, region_metadata) = sst_parquet_meta();
1801
1802 cache.put_parquet_meta_data(file_id, metadata, Some(region_metadata.clone()));
1803
1804 let cached = cache
1805 .get_sst_meta_data(file_id, &mut metrics, Default::default())
1806 .await
1807 .unwrap();
1808 assert!(Arc::ptr_eq(®ion_metadata, &cached.region_metadata()));
1809 }
1810
1811 #[tokio::test]
1812 async fn test_parquet_meta_cache_respects_page_index_policy() {
1813 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1814 let region_id = RegionId::new(1, 1);
1815 let file_id = RegionFileId::new(region_id, FileId::random());
1816 let (metadata, _) = sst_parquet_meta();
1817
1818 let skip_metadata = Arc::new(
1819 CachedSstMeta::try_new_with_page_index_policy(
1820 "test.parquet",
1821 Arc::unwrap_or_clone(metadata.clone()),
1822 None,
1823 PageIndexPolicy::Skip,
1824 )
1825 .unwrap(),
1826 );
1827 cache.put_sst_meta_data(file_id, skip_metadata);
1828
1829 let mut metrics = MetadataCacheMetrics::default();
1830 assert!(
1831 cache
1832 .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Optional)
1833 .await
1834 .is_none()
1835 );
1836 assert_eq!(1, metrics.cache_miss);
1837
1838 let optional_metadata = Arc::new(
1839 CachedSstMeta::try_new_with_page_index_policy(
1840 "test.parquet",
1841 Arc::unwrap_or_clone(metadata),
1842 None,
1843 PageIndexPolicy::Optional,
1844 )
1845 .unwrap(),
1846 );
1847 cache.put_sst_meta_data(file_id, optional_metadata);
1848
1849 let mut metrics = MetadataCacheMetrics::default();
1850 assert!(
1851 cache
1852 .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Optional)
1853 .await
1854 .is_some()
1855 );
1856 assert_eq!(1, metrics.mem_cache_hit);
1857
1858 let mut metrics = MetadataCacheMetrics::default();
1859 assert!(
1860 cache
1861 .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Skip)
1862 .await
1863 .is_some()
1864 );
1865 assert_eq!(1, metrics.mem_cache_hit);
1866 }
1867
1868 #[test]
1869 fn test_meta_cache_weight_accounts_for_decoded_region_metadata() {
1870 let region_metadata = Arc::new(wide_region_metadata(128));
1871 let json_len = region_metadata.to_json().unwrap().len();
1872 let metadata = sst_parquet_meta_with_region_metadata(region_metadata.clone());
1873 let cached = Arc::new(
1874 CachedSstMeta::try_new("test.parquet", Arc::unwrap_or_clone(metadata)).unwrap(),
1875 );
1876 let key = SstMetaKey(region_metadata.region_id, FileId::random());
1877
1878 assert!(cached.region_metadata_weight > json_len);
1879 assert_eq!(
1880 meta_cache_weight(&key, &cached) as usize,
1881 key.estimated_size()
1882 + parquet_meta_size(&cached.parquet_metadata)
1883 + cached.region_metadata_weight
1884 );
1885 }
1886
1887 #[test]
1888 fn test_repeated_vector_cache() {
1889 let cache = CacheManager::builder().vector_cache_size(4096).build();
1890 let value = Value::Int64(10);
1891 assert!(
1892 cache
1893 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1894 .is_none()
1895 );
1896 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1897 cache.put_repeated_vector(value.clone(), vector.clone());
1898 let cached = cache
1899 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1900 .unwrap();
1901 assert_eq!(vector, cached);
1902 }
1903
1904 #[test]
1905 fn test_page_cache() {
1906 let cache = CacheManager::builder().page_cache_size(1000).build();
1907 let file_id = FileId::random();
1908 let uncached = 0..10;
1909 assert_eq!(
1910 vec![0..10],
1911 cache
1912 .get_page_ranges(file_id, 0, std::slice::from_ref(&uncached))
1913 .unwrap()
1914 .missing_ranges
1915 );
1916
1917 let cached = 100..500;
1918 cache.put_page_ranges(
1919 file_id,
1920 0,
1921 std::slice::from_ref(&cached),
1922 &[Bytes::from(vec![7; 400])],
1923 );
1924
1925 let subrange = 200..300;
1926 let lookup = cache
1927 .get_page_ranges(file_id, 0, std::slice::from_ref(&subrange))
1928 .unwrap();
1929 assert!(lookup.is_fully_cached());
1930 assert_eq!(100, lookup.cached_bytes);
1931 assert_eq!(1, lookup.cached_parts.len());
1932 assert_eq!(200..300, lookup.cached_parts[0][0].range);
1933 assert_eq!(100, lookup.cached_parts[0][0].bytes.len());
1934
1935 let overlapping = 400..600;
1936 let lookup = cache
1937 .get_page_ranges(file_id, 0, std::slice::from_ref(&overlapping))
1938 .unwrap();
1939 assert!(!lookup.is_fully_cached());
1940 assert_eq!(100, lookup.cached_bytes);
1941 assert_eq!(vec![500..600], lookup.missing_ranges);
1942 assert_eq!(400..500, lookup.cached_parts[0][0].range);
1943 }
1944
1945 #[test]
1946 fn test_page_cache_detaches_fragment_bytes() {
1947 let cache = PageRangeCache::new(1000);
1948 let file_id = FileId::random();
1949 let backing = Bytes::from(vec![1; 1024]);
1950 let page = backing.slice(512..522);
1951 let page_ptr = page.as_ptr();
1952 let range = 0..10;
1953
1954 cache.insert_ranges(
1955 file_id,
1956 0,
1957 std::slice::from_ref(&range),
1958 std::slice::from_ref(&page),
1959 );
1960
1961 let lookup = cache.lookup(file_id, 0, std::slice::from_ref(&range));
1962 assert!(lookup.is_fully_cached());
1963 assert_eq!(1, lookup.cached_parts[0].len());
1964 assert_eq!(&page[..], &lookup.cached_parts[0][0].bytes[..]);
1965 assert_ne!(page_ptr, lookup.cached_parts[0][0].bytes.as_ptr());
1966 }
1967
1968 #[test]
1969 fn test_page_cache_replaces_fragment() {
1970 let cache = PageRangeCache::new(1000);
1971 let file_id = FileId::random();
1972 let range = 0..10;
1973
1974 cache.insert_ranges(
1975 file_id,
1976 0,
1977 std::slice::from_ref(&range),
1978 &[Bytes::from(vec![1; 10])],
1979 );
1980 cache.insert_ranges(
1981 file_id,
1982 0,
1983 std::slice::from_ref(&range),
1984 &[Bytes::from(vec![2; 10])],
1985 );
1986 cache.cache.run_pending_tasks();
1987 assert_eq!(
1988 vec![PageFragmentKey::new(file_id, 0, &range)],
1989 cache.find_index_candidates(file_id, 0, &range)
1990 );
1991
1992 let lookup = cache.lookup(file_id, 0, std::slice::from_ref(&range));
1993 assert!(lookup.is_fully_cached());
1994 assert_eq!(&vec![2; 10][..], &lookup.cached_parts[0][0].bytes[..]);
1995 }
1996
1997 #[test]
1998 fn test_page_cache_retains_disjoint_inserts_for_same_row_group() {
1999 let cache = PageRangeCache::new(1000);
2000 let file_id = FileId::random();
2001 let range1 = 0..10;
2002 let range2 = 20..30;
2003
2004 cache.insert_ranges(
2005 file_id,
2006 0,
2007 std::slice::from_ref(&range1),
2008 &[Bytes::from(vec![1; 10])],
2009 );
2010 cache.insert_ranges(
2011 file_id,
2012 0,
2013 std::slice::from_ref(&range2),
2014 &[Bytes::from(vec![2; 10])],
2015 );
2016
2017 let lookup = cache.lookup(file_id, 0, &[range1, range2]);
2018 assert!(lookup.is_fully_cached());
2019 assert_eq!(2, lookup.cached_range_count);
2020 assert_eq!(&vec![1; 10][..], &lookup.cached_parts[0][0].bytes[..]);
2021 assert_eq!(&vec![2; 10][..], &lookup.cached_parts[1][0].bytes[..]);
2022 }
2023
2024 #[test]
2025 fn test_page_cache_fragment_eviction() {
2026 let file_id = FileId::random();
2027 let range = 0..10;
2028 let key = PageFragmentKey::new(file_id, 0, &range);
2029 let page = Bytes::from(vec![1; 10]);
2030 let cache = PageRangeCache::new(page_cache_weight(&key, &page) as u64);
2031
2032 cache.insert_ranges(
2033 file_id,
2034 0,
2035 std::slice::from_ref(&range),
2036 &[Bytes::from(vec![1; 10])],
2037 );
2038 assert!(
2039 cache
2040 .lookup(file_id, 0, std::slice::from_ref(&range))
2041 .is_fully_cached()
2042 );
2043
2044 cache.cache.invalidate(&key);
2045 cache.cache.run_pending_tasks();
2046 assert!(cache.find_index_candidates(file_id, 0, &range).is_empty());
2047
2048 let lookup = cache.lookup(file_id, 0, std::slice::from_ref(&range));
2049 assert!(!lookup.is_fully_cached());
2050 assert_eq!(vec![0..10], lookup.missing_ranges);
2051 }
2052
2053 #[test]
2054 fn test_page_cache_rejects_oversized_fragment() {
2055 let cache = PageRangeCache::new(1);
2056 let file_id = FileId::random();
2057 let range = 0..10;
2058
2059 cache.insert_ranges(
2060 file_id,
2061 0,
2062 std::slice::from_ref(&range),
2063 &[Bytes::from(vec![1; 10])],
2064 );
2065 cache.cache.run_pending_tasks();
2066
2067 let lookup = cache.lookup(file_id, 0, std::slice::from_ref(&range));
2068 assert!(!lookup.is_fully_cached());
2069 assert_eq!(vec![0..10], lookup.missing_ranges);
2070 }
2071
2072 #[test]
2073 fn test_selector_result_cache() {
2074 let cache = CacheManager::builder()
2075 .selector_result_cache_size(1000)
2076 .build();
2077 let file_id = FileId::random();
2078 let key = SelectorResultKey {
2079 file_id,
2080 row_group_idx: 0,
2081 selector: TimeSeriesRowSelector::LastRow,
2082 };
2083 assert!(cache.get_selector_result(&key).is_none());
2084 let result = Arc::new(SelectorResultValue::new(
2085 Vec::new(),
2086 ParquetReadColumns::from_deduped(Vec::new()),
2087 ));
2088 cache.put_selector_result(key, result);
2089 assert!(cache.get_selector_result(&key).is_some());
2090 }
2091
2092 #[test]
2093 fn test_prefilter_result_cache() {
2094 let disabled = CacheManager::builder().build();
2095 let file_id = FileId::random();
2096 let key = PrefilterKey::new(
2097 file_id,
2098 0,
2099 None,
2100 1,
2101 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()]),
2102 );
2103 let selection = Arc::new(BooleanBuffer::new_set(3));
2104
2105 disabled.put_prefilter_result(key.clone(), selection.clone());
2106 assert!(disabled.get_prefilter_result(&key).is_none());
2107
2108 let cache = Arc::new(
2109 CacheManager::builder()
2110 .prefilter_result_cache_size(1000)
2111 .build(),
2112 );
2113 assert!(cache.get_prefilter_result(&key).is_none());
2114 cache.put_prefilter_result(key.clone(), selection.clone());
2115 assert_eq!(
2116 cache.get_prefilter_result(&key).unwrap().as_ref(),
2117 selection.as_ref()
2118 );
2119
2120 let enable_all = CacheStrategy::EnableAll(cache.clone());
2121 assert!(enable_all.get_prefilter_result(&key).is_some());
2122
2123 let compaction = CacheStrategy::Compaction(cache.clone());
2124 assert!(compaction.get_prefilter_result(&key).is_none());
2125 compaction.put_prefilter_result(key.clone(), selection.clone());
2126 assert!(cache.get_prefilter_result(&key).is_some());
2127
2128 let disabled_strategy = CacheStrategy::Disabled;
2129 assert!(disabled_strategy.get_prefilter_result(&key).is_none());
2130 disabled_strategy.put_prefilter_result(key.clone(), selection);
2131 assert!(cache.get_prefilter_result(&key).is_some());
2132 }
2133
2134 #[test]
2135 fn test_prefilter_key_distinguishes_dimensions() {
2136 let file_id = FileId::random();
2137 let row_selection = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(3)]);
2138 let other_row_selection =
2139 RowSelection::from(vec![RowSelector::skip(2), RowSelector::select(2)]);
2140 let row_selection = PrefilterKey::row_selection_snapshot(Some(&row_selection));
2141 let other_row_selection = PrefilterKey::row_selection_snapshot(Some(&other_row_selection));
2142 let base = PrefilterKey::new(
2143 file_id,
2144 0,
2145 row_selection.clone(),
2146 1,
2147 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()]),
2148 );
2149
2150 assert_ne!(
2151 base,
2152 PrefilterKey::new(
2153 FileId::random(),
2154 0,
2155 row_selection.clone(),
2156 1,
2157 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
2158 )
2159 );
2160 assert_ne!(
2161 base,
2162 PrefilterKey::new(
2163 file_id,
2164 1,
2165 row_selection.clone(),
2166 1,
2167 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
2168 )
2169 );
2170 assert_ne!(
2171 base,
2172 PrefilterKey::new(
2173 file_id,
2174 0,
2175 other_row_selection,
2176 1,
2177 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
2178 )
2179 );
2180 assert_ne!(
2181 base,
2182 PrefilterKey::new(
2183 file_id,
2184 0,
2185 row_selection.clone(),
2186 1,
2187 SmallVec::from_vec(vec!["tag_0 IN ([b])".to_string()])
2188 )
2189 );
2190 assert_ne!(
2191 base,
2192 PrefilterKey::new(
2193 file_id,
2194 0,
2195 row_selection.clone(),
2196 2,
2197 SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
2198 )
2199 );
2200 let pk_group = PrefilterKey::new(
2201 file_id,
2202 0,
2203 row_selection,
2204 1,
2205 SmallVec::from_vec(vec![
2206 "tag_0 IN ([a])".to_string(),
2207 "tag_1 IN ([x])".to_string(),
2208 ]),
2209 );
2210 assert_ne!(base, pk_group);
2211 }
2212
2213 #[test]
2214 fn test_range_result_cache() {
2215 let cache = Arc::new(
2216 CacheManager::builder()
2217 .range_result_cache_size(1024 * 1024)
2218 .build(),
2219 );
2220
2221 let key = RangeScanCacheKey {
2222 region_id: RegionId::new(1, 1),
2223 row_groups: vec![(FileId::random(), 0)],
2224 scan: ScanRequestFingerprintBuilder {
2225 read_columns: ReadColumns::from_deduped_column_ids(std::iter::empty()),
2226 read_column_types: vec![],
2227 filters: vec!["tag_0 = 1".to_string()],
2228 time_filters: vec![],
2229 series_row_selector: None,
2230 append_mode: false,
2231 filter_deleted: true,
2232 merge_mode: crate::region::options::MergeMode::LastRow,
2233 partition_expr_version: 0,
2234 }
2235 .build(),
2236 };
2237 let value = Arc::new(RangeScanCacheValue::new(Vec::new(), 0));
2238
2239 assert!(cache.get_range_result(&key).is_none());
2240 cache.put_range_result(key.clone(), value.clone());
2241 assert!(cache.get_range_result(&key).is_some());
2242
2243 let enable_all = CacheStrategy::EnableAll(cache.clone());
2244 assert!(enable_all.get_range_result(&key).is_some());
2245
2246 let compaction = CacheStrategy::Compaction(cache.clone());
2247 assert!(compaction.get_range_result(&key).is_none());
2248 compaction.put_range_result(key.clone(), value.clone());
2249 assert!(cache.get_range_result(&key).is_some());
2250
2251 let disabled = CacheStrategy::Disabled;
2252 assert!(disabled.get_range_result(&key).is_none());
2253 disabled.put_range_result(key.clone(), value);
2254 assert!(cache.get_range_result(&key).is_some());
2255 }
2256
2257 #[test]
2258 fn test_range_result_cache_size_configures_limiter() {
2259 let cache_size = 3 * 1024_u64;
2260 let cache = CacheManager::builder()
2261 .range_result_cache_size(cache_size)
2262 .build();
2263
2264 assert_eq!(cache.range_result_cache_size(), cache_size as usize);
2265 assert_eq!(
2266 cache.range_result_memory_limiter().permit_bytes(),
2267 RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize
2268 );
2269 assert_eq!(
2270 cache.range_result_memory_limiter().available_permits(),
2271 (cache_size as usize).div_ceil(RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize)
2272 );
2273 }
2274
2275 #[tokio::test]
2276 async fn range_result_memory_limiter_rejects_oversized_request() {
2277 let limiter = RangeResultMemoryLimiter::new(2 * 1024, 1024);
2278 assert_eq!(limiter.available_permits(), 2);
2279
2280 let err = limiter.acquire(10 * 1024).await.unwrap_err();
2281 assert!(
2282 err.to_string().contains("exceeds limiter capacity"),
2283 "unexpected error: {err}"
2284 );
2285 assert_eq!(limiter.available_permits(), 2);
2286 }
2287
2288 #[tokio::test]
2289 async fn range_result_memory_limiter_allows_request_up_to_capacity() {
2290 let limiter = RangeResultMemoryLimiter::new(2 * 1024, 1024);
2291 let permit = limiter.acquire(2 * 1024).await.unwrap();
2292 assert_eq!(limiter.available_permits(), 0);
2293 drop(permit);
2294 assert_eq!(limiter.available_permits(), 2);
2295 }
2296
2297 #[tokio::test]
2298 async fn test_evict_puffin_cache_clears_all_entries() {
2299 use std::collections::{BTreeMap, HashMap};
2300
2301 let cache = CacheManager::builder()
2302 .index_metadata_size(128)
2303 .index_content_size(128)
2304 .index_content_page_size(64)
2305 .index_result_cache_size(128)
2306 .puffin_metadata_size(128)
2307 .build();
2308 let cache = Arc::new(cache);
2309
2310 let region_id = RegionId::new(1, 1);
2311 let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
2312 let column_id: ColumnId = 1;
2313
2314 let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
2315 let inverted_cache = cache.inverted_index_cache().unwrap().clone();
2316 let result_cache = cache.index_result_cache().unwrap();
2317 let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();
2318
2319 let bloom_key = (
2320 index_id.file_id(),
2321 index_id.version,
2322 column_id,
2323 Tag::Skipping,
2324 );
2325 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
2326 inverted_cache.put_metadata(
2327 (index_id.file_id(), index_id.version),
2328 Arc::new(InvertedIndexMetas::default()),
2329 );
2330 let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
2331 let selection = Arc::new(RowGroupSelection::default());
2332 result_cache.put(predicate.clone(), index_id.file_id(), selection);
2333 let file_id_str = index_id.to_string();
2334 let metadata = Arc::new(FileMetadata {
2335 blobs: Vec::new(),
2336 properties: HashMap::new(),
2337 });
2338 puffin_metadata_cache.put_metadata(file_id_str.clone(), metadata);
2339
2340 assert!(bloom_cache.get_metadata(bloom_key).is_some());
2341 assert!(
2342 inverted_cache
2343 .get_metadata((index_id.file_id(), index_id.version))
2344 .is_some()
2345 );
2346 assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
2347 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());
2348
2349 cache.evict_puffin_cache(index_id).await;
2350
2351 assert!(bloom_cache.get_metadata(bloom_key).is_none());
2352 assert!(
2353 inverted_cache
2354 .get_metadata((index_id.file_id(), index_id.version))
2355 .is_none()
2356 );
2357 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
2358 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
2359
2360 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
2362 inverted_cache.put_metadata(
2363 (index_id.file_id(), index_id.version),
2364 Arc::new(InvertedIndexMetas::default()),
2365 );
2366 result_cache.put(
2367 predicate.clone(),
2368 index_id.file_id(),
2369 Arc::new(RowGroupSelection::default()),
2370 );
2371 puffin_metadata_cache.put_metadata(
2372 file_id_str.clone(),
2373 Arc::new(FileMetadata {
2374 blobs: Vec::new(),
2375 properties: HashMap::new(),
2376 }),
2377 );
2378
2379 let strategy = CacheStrategy::EnableAll(cache.clone());
2380 strategy.evict_puffin_cache(index_id).await;
2381
2382 assert!(bloom_cache.get_metadata(bloom_key).is_none());
2383 assert!(
2384 inverted_cache
2385 .get_metadata((index_id.file_id(), index_id.version))
2386 .is_none()
2387 );
2388 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
2389 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
2390 }
2391
2392 fn wide_region_metadata(column_count: u32) -> RegionMetadata {
2393 let region_id = RegionId::new(1024, 7);
2394 let mut builder = RegionMetadataBuilder::new(region_id);
2395 let mut primary_key = Vec::new();
2396
2397 for column_id in 0..column_count {
2398 let semantic_type = if column_id < 32 {
2399 primary_key.push(column_id);
2400 SemanticType::Tag
2401 } else {
2402 SemanticType::Field
2403 };
2404 let mut column_schema = ColumnSchema::new(
2405 format!("wide_column_{column_id}"),
2406 ConcreteDataType::string_datatype(),
2407 true,
2408 );
2409 column_schema
2410 .mut_metadata()
2411 .insert(format!("cache_key_{column_id}"), "cache_value".repeat(4));
2412 builder.push_column_metadata(ColumnMetadata {
2413 column_schema,
2414 semantic_type,
2415 column_id,
2416 });
2417 }
2418
2419 builder.push_column_metadata(ColumnMetadata {
2420 column_schema: ColumnSchema::new(
2421 "ts",
2422 ConcreteDataType::timestamp_millisecond_datatype(),
2423 false,
2424 ),
2425 semantic_type: SemanticType::Timestamp,
2426 column_id: column_count,
2427 });
2428 builder.primary_key(primary_key);
2429
2430 builder.build().unwrap()
2431 }
2432}