Skip to main content

mito2/
cache.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cache for the engine.
16
17pub(crate) mod cache_size;
18
19pub(crate) mod file_cache;
20pub(crate) mod index;
21pub(crate) mod manifest_cache;
22#[cfg(test)]
23pub(crate) mod test_util;
24pub(crate) mod write_cache;
25
26use std::mem;
27use std::ops::Range;
28use std::sync::Arc;
29
30use bytes::Bytes;
31use common_base::readable_size::ReadableSize;
32use common_telemetry::warn;
33use datatypes::arrow::buffer::BooleanBuffer;
34use datatypes::arrow::record_batch::RecordBatch;
35use datatypes::value::Value;
36use datatypes::vectors::VectorRef;
37use index::bloom_filter_index::{BloomFilterIndexCache, BloomFilterIndexCacheRef};
38use index::result_cache::IndexResultCache;
39use moka::notification::RemovalCause;
40use moka::sync::Cache;
41use object_store::ObjectStore;
42use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
43use parquet::file::metadata::{FileMetaData, PageIndexPolicy, ParquetMetaData};
44use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
45use smallvec::SmallVec;
46use snafu::{OptionExt, ResultExt};
47use store_api::metadata::RegionMetadataRef;
48use store_api::storage::{ConcreteDataType, FileId, RegionId, TimeSeriesRowSelector};
49
50use crate::cache::cache_size::parquet_meta_size;
51use crate::cache::file_cache::{FileType, IndexKey};
52use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCacheRef};
53#[cfg(feature = "vector_index")]
54use crate::cache::index::vector_index::{VectorIndexCache, VectorIndexCacheRef};
55use crate::cache::write_cache::WriteCacheRef;
56use crate::error::{InvalidMetadataSnafu, InvalidParquetSnafu, Result, UnexpectedSnafu};
57use crate::memtable::record_batch_estimated_size;
58use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
59use crate::read::Batch;
60use crate::read::range_cache::{RangeScanCacheKey, RangeScanCacheValue};
61use crate::sst::file::{RegionFileId, RegionIndexId};
62use crate::sst::parquet::PARQUET_METADATA_KEY;
63use crate::sst::parquet::read_columns::ParquetReadColumns;
64use crate::sst::parquet::reader::MetadataCacheMetrics;
65
66/// Metrics type key for sst meta.
67const SST_META_TYPE: &str = "sst_meta";
68/// Metrics type key for vector.
69const VECTOR_TYPE: &str = "vector";
70/// Metrics type key for pages.
71const PAGE_TYPE: &str = "page";
72/// Metrics type key for files on the local store.
73const FILE_TYPE: &str = "file";
74/// Metrics type key for index files (puffin) on the local store.
75const INDEX_TYPE: &str = "index";
76/// Metrics type key for selector result cache.
77const SELECTOR_RESULT_TYPE: &str = "selector_result";
78/// Metrics type key for range scan result cache.
79const RANGE_RESULT_TYPE: &str = "range_result";
80/// Metrics type key for prefilter result cache.
81const PREFILTER_RESULT_TYPE: &str = "prefilter_result";
82const RANGE_RESULT_CONCAT_MEMORY_LIMIT: ReadableSize = ReadableSize::mb(512);
83const RANGE_RESULT_CONCAT_MEMORY_PERMIT: ReadableSize = ReadableSize::kb(1);
84
85#[derive(Debug)]
86pub(crate) struct RangeResultMemoryLimiter {
87    semaphore: Arc<tokio::sync::Semaphore>,
88    permit_bytes: usize,
89    total_permits: usize,
90}
91
92impl Default for RangeResultMemoryLimiter {
93    fn default() -> Self {
94        Self::new(
95            RANGE_RESULT_CONCAT_MEMORY_LIMIT.as_bytes() as usize,
96            RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize,
97        )
98    }
99}
100
101impl RangeResultMemoryLimiter {
102    pub(crate) fn new(limit_bytes: usize, permit_bytes: usize) -> Self {
103        let permit_bytes = permit_bytes.max(1);
104        let total_permits = limit_bytes
105            .div_ceil(permit_bytes)
106            .clamp(1, tokio::sync::Semaphore::MAX_PERMITS);
107        Self {
108            semaphore: Arc::new(tokio::sync::Semaphore::new(total_permits)),
109            permit_bytes,
110            total_permits,
111        }
112    }
113
114    #[cfg(test)]
115    pub(crate) fn permit_bytes(&self) -> usize {
116        self.permit_bytes
117    }
118
119    #[cfg(test)]
120    pub(crate) fn available_permits(&self) -> usize {
121        self.semaphore.available_permits()
122    }
123
124    pub(crate) async fn acquire(&self, bytes: usize) -> Result<tokio::sync::SemaphorePermit<'_>> {
125        let permits = bytes.div_ceil(self.permit_bytes).max(1);
126        if permits > self.total_permits {
127            return UnexpectedSnafu {
128                reason: format!(
129                    "range result memory request of {bytes} bytes exceeds limiter capacity of {} bytes",
130                    self.total_permits.saturating_mul(self.permit_bytes)
131                ),
132            }
133            .fail();
134        }
135        self.semaphore
136            .acquire_many(permits as u32)
137            .await
138            .map_err(|_| {
139                UnexpectedSnafu {
140                    reason: "range result memory limiter is unexpectedly closed",
141                }
142                .build()
143            })
144    }
145}
146
147/// Cached SST metadata combines the parquet footer with the decoded region metadata.
148///
149/// The cached parquet footer strips the `greptime:metadata` JSON payload and stores the decoded
150/// [RegionMetadata] separately so readers can skip repeated deserialization work.
151#[derive(Debug)]
152pub(crate) struct CachedSstMeta {
153    parquet_metadata: Arc<ParquetMetaData>,
154    region_metadata: RegionMetadataRef,
155    region_metadata_weight: usize,
156    page_index_policy: PageIndexPolicy,
157}
158
159impl CachedSstMeta {
160    #[cfg(test)]
161    pub(crate) fn try_new(file_path: &str, parquet_metadata: ParquetMetaData) -> Result<Self> {
162        let page_index_policy = infer_loaded_page_index_policy(&parquet_metadata);
163        Self::try_new_with_page_index_policy(file_path, parquet_metadata, None, page_index_policy)
164    }
165
166    pub(crate) fn try_new_with_region_metadata(
167        file_path: &str,
168        parquet_metadata: ParquetMetaData,
169        region_metadata: Option<RegionMetadataRef>,
170    ) -> Result<Self> {
171        let page_index_policy = infer_loaded_page_index_policy(&parquet_metadata);
172        Self::try_new_with_page_index_policy(
173            file_path,
174            parquet_metadata,
175            region_metadata,
176            page_index_policy,
177        )
178    }
179
180    pub(crate) fn try_new_with_page_index_policy(
181        file_path: &str,
182        parquet_metadata: ParquetMetaData,
183        region_metadata: Option<RegionMetadataRef>,
184        page_index_policy: PageIndexPolicy,
185    ) -> Result<Self> {
186        let file_metadata = parquet_metadata.file_metadata();
187        let key_values = file_metadata
188            .key_value_metadata()
189            .context(InvalidParquetSnafu {
190                file: file_path,
191                reason: "missing key value meta",
192            })?;
193        let meta_value = key_values
194            .iter()
195            .find(|kv| kv.key == PARQUET_METADATA_KEY)
196            .with_context(|| InvalidParquetSnafu {
197                file: file_path,
198                reason: format!("key {} not found", PARQUET_METADATA_KEY),
199            })?;
200        let json = meta_value
201            .value
202            .as_ref()
203            .with_context(|| InvalidParquetSnafu {
204                file: file_path,
205                reason: format!("No value for key {}", PARQUET_METADATA_KEY),
206            })?;
207        let region_metadata = match region_metadata {
208            Some(region_metadata) => region_metadata,
209            None => Arc::new(
210                store_api::metadata::RegionMetadata::from_json(json)
211                    .context(InvalidMetadataSnafu)?,
212            ),
213        };
214        // Keep the previous JSON-byte floor and charge the decoded structures as well.
215        let region_metadata_weight = region_metadata.estimated_size().max(json.len());
216        let parquet_metadata = Arc::new(strip_region_metadata_from_parquet(parquet_metadata));
217
218        Ok(Self {
219            parquet_metadata,
220            region_metadata,
221            region_metadata_weight,
222            page_index_policy,
223        })
224    }
225
226    pub(crate) fn parquet_metadata(&self) -> Arc<ParquetMetaData> {
227        self.parquet_metadata.clone()
228    }
229
230    pub(crate) fn region_metadata(&self) -> RegionMetadataRef {
231        self.region_metadata.clone()
232    }
233
234    fn satisfies_page_index_policy(&self, requested: PageIndexPolicy) -> bool {
235        match requested {
236            PageIndexPolicy::Skip => true,
237            PageIndexPolicy::Optional => self.page_index_policy != PageIndexPolicy::Skip,
238            PageIndexPolicy::Required => self.page_index_policy == PageIndexPolicy::Required,
239        }
240    }
241}
242
243fn infer_loaded_page_index_policy(parquet_metadata: &ParquetMetaData) -> PageIndexPolicy {
244    if parquet_metadata.column_index().is_some() || parquet_metadata.offset_index().is_some() {
245        PageIndexPolicy::Optional
246    } else {
247        PageIndexPolicy::Skip
248    }
249}
250
251fn strip_region_metadata_from_parquet(parquet_metadata: ParquetMetaData) -> ParquetMetaData {
252    let file_metadata = parquet_metadata.file_metadata();
253    let filtered_key_values = file_metadata.key_value_metadata().and_then(|key_values| {
254        let filtered = key_values
255            .iter()
256            .filter(|kv| kv.key != PARQUET_METADATA_KEY)
257            .cloned()
258            .collect::<Vec<_>>();
259        (!filtered.is_empty()).then_some(filtered)
260    });
261    let stripped_file_metadata = FileMetaData::new(
262        file_metadata.version(),
263        file_metadata.num_rows(),
264        file_metadata.created_by().map(ToString::to_string),
265        filtered_key_values,
266        file_metadata.schema_descr_ptr(),
267        file_metadata.column_orders().cloned(),
268    );
269
270    let mut builder = parquet_metadata.into_builder();
271    let row_groups = builder.take_row_groups();
272    let column_index = builder.take_column_index();
273    let offset_index = builder.take_offset_index();
274
275    parquet::file::metadata::ParquetMetaDataBuilder::new(stripped_file_metadata)
276        .set_row_groups(row_groups)
277        .set_column_index(column_index)
278        .set_offset_index(offset_index)
279        .build()
280}
281
282fn removal_cause_str(cause: RemovalCause) -> &'static str {
283    match cause {
284        RemovalCause::Expired => "expired",
285        RemovalCause::Explicit => "explicit",
286        RemovalCause::Replaced => "replaced",
287        RemovalCause::Size => "size",
288    }
289}
290
291#[derive(Debug, Clone, PartialEq, Eq, Hash)]
292pub(crate) struct PrefilterRowSelector {
293    row_count: usize,
294    skip: bool,
295}
296
297// `parquet::arrow::arrow_reader::RowSelector` does not implement `Hash`, but
298// prefilter cache keys must hash the upstream row-selection snapshot. Keep a
299// local hashable mirror of the two fields that define selector semantics.
300// TODO(yingwen): Remove this mirror if upstream `RowSelector` implements `Hash`.
301impl From<&RowSelector> for PrefilterRowSelector {
302    fn from(selector: &RowSelector) -> Self {
303        Self {
304            row_count: selector.row_count,
305            skip: selector.skip,
306        }
307    }
308}
309
310/// Key for a cached prefilter result.
311#[derive(Debug, Clone, PartialEq, Eq, Hash)]
312pub(crate) struct PrefilterKey {
313    file_id: FileId,
314    row_group_idx: u32,
315    row_selection: Option<Arc<Vec<PrefilterRowSelector>>>,
316    schema_version: u64,
317    filter_exprs: SmallVec<[String; 1]>,
318    mem_usage: usize,
319}
320
321impl PrefilterKey {
322    pub(crate) fn row_selection_snapshot(
323        row_selection: Option<&RowSelection>,
324    ) -> Option<Arc<Vec<PrefilterRowSelector>>> {
325        row_selection.map(|selection| {
326            Arc::new(
327                selection
328                    .iter()
329                    .map(PrefilterRowSelector::from)
330                    .collect::<Vec<_>>(),
331            )
332        })
333    }
334
335    pub(crate) fn new(
336        file_id: FileId,
337        row_group_idx: u32,
338        row_selection: Option<Arc<Vec<PrefilterRowSelector>>>,
339        schema_version: u64,
340        filter_exprs: SmallVec<[String; 1]>,
341    ) -> Self {
342        let row_selection_bytes = row_selection
343            .as_ref()
344            .map(|selection| selection.len() * mem::size_of::<PrefilterRowSelector>())
345            .unwrap_or(0);
346        let spilled_expr_bytes = if filter_exprs.spilled() {
347            filter_exprs.capacity() * mem::size_of::<String>()
348        } else {
349            0
350        };
351        let expr_bytes = filter_exprs.iter().map(|s| s.capacity()).sum::<usize>();
352
353        Self {
354            file_id,
355            row_group_idx,
356            row_selection,
357            schema_version,
358            filter_exprs,
359            mem_usage: mem::size_of::<Self>()
360                + row_selection_bytes
361                + spilled_expr_bytes
362                + expr_bytes,
363        }
364    }
365
366    fn mem_usage(&self) -> usize {
367        self.mem_usage
368    }
369}
370
371type PrefilterResultCache = Cache<PrefilterKey, Arc<BooleanBuffer>>;
372
373fn new_prefilter_result_cache(capacity: u64) -> PrefilterResultCache {
374    Cache::builder()
375        .max_capacity(capacity)
376        .weigher(prefilter_result_cache_weight)
377        .eviction_listener(|k, v, cause| {
378            let size = prefilter_result_cache_weight(&k, &v);
379            CACHE_BYTES
380                .with_label_values(&[PREFILTER_RESULT_TYPE])
381                .sub(size.into());
382            CACHE_EVICTION
383                .with_label_values(&[PREFILTER_RESULT_TYPE, removal_cause_str(cause)])
384                .inc();
385        })
386        .build()
387}
388
389fn prefilter_result_cache_weight(k: &PrefilterKey, v: &Arc<BooleanBuffer>) -> u32 {
390    (k.mem_usage() + mem::size_of::<BooleanBuffer>() + v.values().len()) as u32
391}
392
393/// Cache strategies that may only enable a subset of caches.
394#[derive(Clone)]
395pub enum CacheStrategy {
396    /// Strategy for normal operations.
397    /// Doesn't disable any cache.
398    EnableAll(CacheManagerRef),
399    /// Strategy for compaction.
400    /// Disables some caches during compaction to avoid affecting queries.
401    /// Enables the write cache so that the compaction can read files cached
402    /// in the write cache and write the compacted files back to the write cache.
403    Compaction(CacheManagerRef),
404    /// Do not use any cache.
405    Disabled,
406}
407
408impl CacheStrategy {
409    /// Gets fused SST metadata with cache metrics tracking.
410    pub(crate) async fn get_sst_meta_data(
411        &self,
412        file_id: RegionFileId,
413        metrics: &mut MetadataCacheMetrics,
414        page_index_policy: PageIndexPolicy,
415    ) -> Option<Arc<CachedSstMeta>> {
416        match self {
417            CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
418                cache_manager
419                    .get_sst_meta_data(file_id, metrics, page_index_policy)
420                    .await
421            }
422            CacheStrategy::Disabled => {
423                metrics.cache_miss += 1;
424                None
425            }
426        }
427    }
428
429    /// Calls [CacheManager::get_sst_meta_data_from_mem_cache()].
430    pub(crate) fn get_sst_meta_data_from_mem_cache(
431        &self,
432        file_id: RegionFileId,
433        page_index_policy: PageIndexPolicy,
434    ) -> Option<Arc<CachedSstMeta>> {
435        match self {
436            CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
437                cache_manager.get_sst_meta_data_from_mem_cache(file_id, page_index_policy)
438            }
439            CacheStrategy::Disabled => None,
440        }
441    }
442
443    /// Calls [CacheManager::get_parquet_meta_data_from_mem_cache()].
444    pub fn get_parquet_meta_data_from_mem_cache(
445        &self,
446        file_id: RegionFileId,
447    ) -> Option<Arc<ParquetMetaData>> {
448        self.get_sst_meta_data_from_mem_cache(file_id, PageIndexPolicy::Skip)
449            .map(|metadata| metadata.parquet_metadata())
450    }
451
452    /// Calls [CacheManager::put_sst_meta_data()].
453    pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
454        match self {
455            CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
456                cache_manager.put_sst_meta_data(file_id, metadata);
457            }
458            CacheStrategy::Disabled => {}
459        }
460    }
461
462    /// Calls [CacheManager::put_parquet_meta_data()].
463    pub fn put_parquet_meta_data(
464        &self,
465        file_id: RegionFileId,
466        metadata: Arc<ParquetMetaData>,
467        region_metadata: Option<RegionMetadataRef>,
468    ) {
469        match self {
470            CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
471                cache_manager.put_parquet_meta_data(file_id, metadata, region_metadata);
472            }
473            CacheStrategy::Disabled => {}
474        }
475    }
476
477    /// Calls [CacheManager::get_prefilter_result()].
478    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
479    pub(crate) fn get_prefilter_result(&self, key: &PrefilterKey) -> Option<Arc<BooleanBuffer>> {
480        match self {
481            CacheStrategy::EnableAll(cache_manager) => cache_manager.get_prefilter_result(key),
482            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
483        }
484    }
485
486    /// Calls [CacheManager::put_prefilter_result()].
487    /// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
488    pub(crate) fn put_prefilter_result(&self, key: PrefilterKey, result: Arc<BooleanBuffer>) {
489        if let CacheStrategy::EnableAll(cache_manager) = self {
490            cache_manager.put_prefilter_result(key, result);
491        }
492    }
493
494    /// Calls [CacheManager::remove_parquet_meta_data()].
495    pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
496        match self {
497            CacheStrategy::EnableAll(cache_manager) => {
498                cache_manager.remove_parquet_meta_data(file_id);
499            }
500            CacheStrategy::Compaction(cache_manager) => {
501                cache_manager.remove_parquet_meta_data(file_id);
502            }
503            CacheStrategy::Disabled => {}
504        }
505    }
506
507    /// Calls [CacheManager::get_repeated_vector()].
508    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
509    pub fn get_repeated_vector(
510        &self,
511        data_type: &ConcreteDataType,
512        value: &Value,
513    ) -> Option<VectorRef> {
514        match self {
515            CacheStrategy::EnableAll(cache_manager) => {
516                cache_manager.get_repeated_vector(data_type, value)
517            }
518            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
519        }
520    }
521
522    /// Calls [CacheManager::put_repeated_vector()].
523    /// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
524    pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
525        if let CacheStrategy::EnableAll(cache_manager) = self {
526            cache_manager.put_repeated_vector(value, vector);
527        }
528    }
529
530    /// Calls [CacheManager::get_pages()].
531    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
532    pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
533        match self {
534            CacheStrategy::EnableAll(cache_manager) => cache_manager.get_pages(page_key),
535            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
536        }
537    }
538
539    /// Calls [CacheManager::put_pages()].
540    /// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
541    pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
542        if let CacheStrategy::EnableAll(cache_manager) = self {
543            cache_manager.put_pages(page_key, pages);
544        }
545    }
546
547    /// Calls [CacheManager::evict_puffin_cache()].
548    pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
549        match self {
550            CacheStrategy::EnableAll(cache_manager) => {
551                cache_manager.evict_puffin_cache(file_id).await
552            }
553            CacheStrategy::Compaction(cache_manager) => {
554                cache_manager.evict_puffin_cache(file_id).await
555            }
556            CacheStrategy::Disabled => {}
557        }
558    }
559
560    /// Calls [CacheManager::get_selector_result()].
561    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
562    pub fn get_selector_result(
563        &self,
564        selector_key: &SelectorResultKey,
565    ) -> Option<Arc<SelectorResultValue>> {
566        match self {
567            CacheStrategy::EnableAll(cache_manager) => {
568                cache_manager.get_selector_result(selector_key)
569            }
570            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
571        }
572    }
573
574    /// Calls [CacheManager::put_selector_result()].
575    /// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
576    pub fn put_selector_result(
577        &self,
578        selector_key: SelectorResultKey,
579        result: Arc<SelectorResultValue>,
580    ) {
581        if let CacheStrategy::EnableAll(cache_manager) = self {
582            cache_manager.put_selector_result(selector_key, result);
583        }
584    }
585
586    /// Calls [CacheManager::get_range_result()].
587    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
588    #[allow(dead_code)]
589    pub(crate) fn get_range_result(
590        &self,
591        key: &RangeScanCacheKey,
592    ) -> Option<Arc<RangeScanCacheValue>> {
593        match self {
594            CacheStrategy::EnableAll(cache_manager) => cache_manager.get_range_result(key),
595            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
596        }
597    }
598
599    /// Calls [CacheManager::put_range_result()].
600    /// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
601    pub(crate) fn put_range_result(
602        &self,
603        key: RangeScanCacheKey,
604        result: Arc<RangeScanCacheValue>,
605    ) {
606        if let CacheStrategy::EnableAll(cache_manager) = self {
607            cache_manager.put_range_result(key, result);
608        }
609    }
610
611    /// Returns true if the range result cache is enabled.
612    pub(crate) fn has_range_result_cache(&self) -> bool {
613        match self {
614            CacheStrategy::EnableAll(cache_manager) => cache_manager.has_range_result_cache(),
615            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => false,
616        }
617    }
618
619    pub(crate) fn range_result_memory_limiter(&self) -> Option<&Arc<RangeResultMemoryLimiter>> {
620        match self {
621            CacheStrategy::EnableAll(cache_manager) => {
622                Some(cache_manager.range_result_memory_limiter())
623            }
624            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
625        }
626    }
627
628    pub(crate) fn range_result_cache_size(&self) -> Option<usize> {
629        match self {
630            CacheStrategy::EnableAll(cache_manager) => {
631                Some(cache_manager.range_result_cache_size())
632            }
633            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
634        }
635    }
636
637    /// Calls [CacheManager::write_cache()].
638    /// It returns None if the strategy is [CacheStrategy::Disabled].
639    pub fn write_cache(&self) -> Option<&WriteCacheRef> {
640        match self {
641            CacheStrategy::EnableAll(cache_manager) => cache_manager.write_cache(),
642            CacheStrategy::Compaction(cache_manager) => cache_manager.write_cache(),
643            CacheStrategy::Disabled => None,
644        }
645    }
646
647    /// Calls [CacheManager::index_cache()].
648    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
649    pub fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
650        match self {
651            CacheStrategy::EnableAll(cache_manager) => cache_manager.inverted_index_cache(),
652            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
653        }
654    }
655
656    /// Calls [CacheManager::bloom_filter_index_cache()].
657    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
658    pub fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
659        match self {
660            CacheStrategy::EnableAll(cache_manager) => cache_manager.bloom_filter_index_cache(),
661            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
662        }
663    }
664
665    /// Calls [CacheManager::vector_index_cache()].
666    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
667    #[cfg(feature = "vector_index")]
668    pub fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
669        match self {
670            CacheStrategy::EnableAll(cache_manager) => cache_manager.vector_index_cache(),
671            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
672        }
673    }
674
675    /// Calls [CacheManager::puffin_metadata_cache()].
676    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
677    pub fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
678        match self {
679            CacheStrategy::EnableAll(cache_manager) => cache_manager.puffin_metadata_cache(),
680            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
681        }
682    }
683
684    /// Calls [CacheManager::index_result_cache()].
685    /// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
686    pub fn index_result_cache(&self) -> Option<&IndexResultCache> {
687        match self {
688            CacheStrategy::EnableAll(cache_manager) => cache_manager.index_result_cache(),
689            CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
690        }
691    }
692
693    /// Triggers download if the strategy is [CacheStrategy::EnableAll] and write cache is available.
694    pub fn maybe_download_background(
695        &self,
696        index_key: IndexKey,
697        remote_path: String,
698        remote_store: ObjectStore,
699        file_size: u64,
700    ) {
701        if let CacheStrategy::EnableAll(cache_manager) = self
702            && let Some(write_cache) = cache_manager.write_cache()
703        {
704            write_cache.file_cache().maybe_download_background(
705                index_key,
706                remote_path,
707                remote_store,
708                file_size,
709            );
710        }
711    }
712}
713
714/// Manages cached data for the engine.
715///
716/// All caches are disabled by default.
717#[derive(Default)]
718pub struct CacheManager {
719    /// Cache for SST metadata.
720    sst_meta_cache: Option<SstMetaCache>,
721    /// Cache for vectors.
722    vector_cache: Option<VectorCache>,
723    /// Cache for SST pages.
724    page_cache: Option<PageCache>,
725    /// A Cache for writing files to object stores.
726    write_cache: Option<WriteCacheRef>,
727    /// Cache for inverted index.
728    inverted_index_cache: Option<InvertedIndexCacheRef>,
729    /// Cache for bloom filter index.
730    bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
731    /// Cache for vector index.
732    #[cfg(feature = "vector_index")]
733    vector_index_cache: Option<VectorIndexCacheRef>,
734    /// Puffin metadata cache.
735    puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
736    /// Cache for time series selectors.
737    selector_result_cache: Option<SelectorResultCache>,
738    /// Cache for range scan outputs in flat format.
739    range_result_cache: Option<RangeResultCache>,
740    /// Configured capacity for range scan outputs in flat format.
741    range_result_cache_size: u64,
742    /// Shared memory limiter for async range-result cache tasks.
743    range_result_memory_limiter: Arc<RangeResultMemoryLimiter>,
744    /// Cache for index result.
745    index_result_cache: Option<IndexResultCache>,
746    /// Cache for prefilter result.
747    prefilter_result_cache: Option<PrefilterResultCache>,
748}
749
750pub type CacheManagerRef = Arc<CacheManager>;
751
752impl CacheManager {
753    /// Returns a builder to build the cache.
754    pub fn builder() -> CacheManagerBuilder {
755        CacheManagerBuilder::default()
756    }
757
758    /// Gets fused SST metadata with metrics tracking.
759    /// Tries in-memory cache first, then file cache, updating metrics accordingly.
760    pub(crate) async fn get_sst_meta_data(
761        &self,
762        file_id: RegionFileId,
763        metrics: &mut MetadataCacheMetrics,
764        page_index_policy: PageIndexPolicy,
765    ) -> Option<Arc<CachedSstMeta>> {
766        if let Some(metadata) = self.get_sst_meta_data_from_mem_cache(file_id, page_index_policy) {
767            metrics.mem_cache_hit += 1;
768            return Some(metadata);
769        }
770
771        let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
772        if let Some(write_cache) = &self.write_cache
773            && let Some(metadata) = write_cache
774                .file_cache()
775                .get_sst_meta_data(key, metrics, page_index_policy)
776                .await
777        {
778            metrics.file_cache_hit += 1;
779            self.put_sst_meta_data(file_id, metadata.clone());
780            return Some(metadata);
781        }
782
783        metrics.cache_miss += 1;
784        None
785    }
786
787    /// Gets cached [ParquetMetaData] with metrics tracking.
788    /// Tries in-memory cache first, then file cache, updating metrics accordingly.
789    pub(crate) async fn get_parquet_meta_data(
790        &self,
791        file_id: RegionFileId,
792        metrics: &mut MetadataCacheMetrics,
793        page_index_policy: PageIndexPolicy,
794    ) -> Option<Arc<ParquetMetaData>> {
795        self.get_sst_meta_data(file_id, metrics, page_index_policy)
796            .await
797            .map(|metadata| metadata.parquet_metadata())
798    }
799
800    /// Gets cached fused SST metadata from in-memory cache.
801    /// This method does not perform I/O.
802    pub(crate) fn get_sst_meta_data_from_mem_cache(
803        &self,
804        file_id: RegionFileId,
805        page_index_policy: PageIndexPolicy,
806    ) -> Option<Arc<CachedSstMeta>> {
807        self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
808            let value = sst_meta_cache.get(&SstMetaKey(file_id.region_id(), file_id.file_id()));
809            let value =
810                value.filter(|metadata| metadata.satisfies_page_index_policy(page_index_policy));
811            update_hit_miss(value, SST_META_TYPE)
812        })
813    }
814
815    /// Gets cached [ParquetMetaData] from in-memory cache.
816    /// This method does not perform I/O.
817    pub fn get_parquet_meta_data_from_mem_cache(
818        &self,
819        file_id: RegionFileId,
820    ) -> Option<Arc<ParquetMetaData>> {
821        self.get_sst_meta_data_from_mem_cache(file_id, PageIndexPolicy::Skip)
822            .map(|metadata| metadata.parquet_metadata())
823    }
824
825    /// Puts fused SST metadata into the cache.
826    pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
827        if let Some(cache) = &self.sst_meta_cache {
828            let key = SstMetaKey(file_id.region_id(), file_id.file_id());
829            CACHE_BYTES
830                .with_label_values(&[SST_META_TYPE])
831                .add(meta_cache_weight(&key, &metadata).into());
832            cache.insert(key, metadata);
833        }
834    }
835
836    /// Puts [ParquetMetaData] into the cache.
837    pub fn put_parquet_meta_data(
838        &self,
839        file_id: RegionFileId,
840        metadata: Arc<ParquetMetaData>,
841        region_metadata: Option<RegionMetadataRef>,
842    ) {
843        if self.sst_meta_cache.is_some() {
844            let file_path = format!(
845                "region_id={}, file_id={}",
846                file_id.region_id(),
847                file_id.file_id()
848            );
849            match CachedSstMeta::try_new_with_region_metadata(
850                &file_path,
851                Arc::unwrap_or_clone(metadata),
852                region_metadata,
853            ) {
854                Ok(metadata) => self.put_sst_meta_data(file_id, Arc::new(metadata)),
855                Err(err) => warn!(
856                    err; "Failed to decode region metadata while caching parquet metadata, region_id: {}, file_id: {}",
857                    file_id.region_id(),
858                    file_id.file_id()
859                ),
860            }
861        }
862    }
863
864    /// Removes [ParquetMetaData] from the cache.
865    pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
866        if let Some(cache) = &self.sst_meta_cache {
867            cache.remove(&SstMetaKey(file_id.region_id(), file_id.file_id()));
868        }
869    }
870
871    /// Returns the total weighted size of the in-memory SST meta cache.
872    pub(crate) fn sst_meta_cache_weighted_size(&self) -> u64 {
873        self.sst_meta_cache
874            .as_ref()
875            .map(|cache| cache.weighted_size())
876            .unwrap_or(0)
877    }
878
879    /// Returns true if the in-memory SST meta cache is enabled.
880    pub(crate) fn sst_meta_cache_enabled(&self) -> bool {
881        self.sst_meta_cache.is_some()
882    }
883
884    /// Gets a vector with repeated value for specific `key`.
885    pub fn get_repeated_vector(
886        &self,
887        data_type: &ConcreteDataType,
888        value: &Value,
889    ) -> Option<VectorRef> {
890        self.vector_cache.as_ref().and_then(|vector_cache| {
891            let value = vector_cache.get(&(data_type.clone(), value.clone()));
892            update_hit_miss(value, VECTOR_TYPE)
893        })
894    }
895
896    /// Puts a vector with repeated value into the cache.
897    pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
898        if let Some(cache) = &self.vector_cache {
899            let key = (vector.data_type(), value);
900            CACHE_BYTES
901                .with_label_values(&[VECTOR_TYPE])
902                .add(vector_cache_weight(&key, &vector).into());
903            cache.insert(key, vector);
904        }
905    }
906
907    /// Gets pages for the row group.
908    pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
909        self.page_cache.as_ref().and_then(|page_cache| {
910            let value = page_cache.get(page_key);
911            update_hit_miss(value, PAGE_TYPE)
912        })
913    }
914
915    /// Puts pages of the row group into the cache.
916    pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
917        if let Some(cache) = &self.page_cache {
918            CACHE_BYTES
919                .with_label_values(&[PAGE_TYPE])
920                .add(page_cache_weight(&page_key, &pages).into());
921            cache.insert(page_key, pages);
922        }
923    }
924
925    /// Evicts every puffin-related cache entry for the given file.
926    pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
927        if let Some(cache) = &self.bloom_filter_index_cache {
928            cache.invalidate_file(file_id.file_id());
929        }
930
931        if let Some(cache) = &self.inverted_index_cache {
932            cache.invalidate_file(file_id.file_id());
933        }
934
935        if let Some(cache) = &self.index_result_cache {
936            cache.invalidate_file(file_id.file_id());
937        }
938
939        #[cfg(feature = "vector_index")]
940        if let Some(cache) = &self.vector_index_cache {
941            cache.invalidate_file(file_id.file_id());
942        }
943
944        if let Some(cache) = &self.puffin_metadata_cache {
945            cache.remove(&file_id.to_string());
946        }
947
948        if let Some(write_cache) = &self.write_cache {
949            write_cache
950                .remove(IndexKey::new(
951                    file_id.region_id(),
952                    file_id.file_id(),
953                    FileType::Puffin(file_id.version),
954                ))
955                .await;
956        }
957    }
958
959    /// Gets result of for the selector.
960    pub fn get_selector_result(
961        &self,
962        selector_key: &SelectorResultKey,
963    ) -> Option<Arc<SelectorResultValue>> {
964        self.selector_result_cache
965            .as_ref()
966            .and_then(|selector_result_cache| selector_result_cache.get(selector_key))
967    }
968
969    /// Puts result of the selector into the cache.
970    pub fn put_selector_result(
971        &self,
972        selector_key: SelectorResultKey,
973        result: Arc<SelectorResultValue>,
974    ) {
975        if let Some(cache) = &self.selector_result_cache {
976            CACHE_BYTES
977                .with_label_values(&[SELECTOR_RESULT_TYPE])
978                .add(selector_result_cache_weight(&selector_key, &result).into());
979            cache.insert(selector_key, result);
980        }
981    }
982
983    /// Gets cached result for range scan.
984    #[allow(dead_code)]
985    pub(crate) fn get_range_result(
986        &self,
987        key: &RangeScanCacheKey,
988    ) -> Option<Arc<RangeScanCacheValue>> {
989        self.range_result_cache
990            .as_ref()
991            .and_then(|cache| update_hit_miss(cache.get(key), RANGE_RESULT_TYPE))
992    }
993
994    /// Puts range scan result into cache.
995    pub(crate) fn put_range_result(
996        &self,
997        key: RangeScanCacheKey,
998        result: Arc<RangeScanCacheValue>,
999    ) {
1000        if let Some(cache) = &self.range_result_cache {
1001            CACHE_BYTES
1002                .with_label_values(&[RANGE_RESULT_TYPE])
1003                .add(range_result_cache_weight(&key, &result).into());
1004            cache.insert(key, result);
1005        }
1006    }
1007
1008    /// Returns true if the range result cache is enabled.
1009    pub(crate) fn has_range_result_cache(&self) -> bool {
1010        self.range_result_cache.is_some()
1011    }
1012
1013    pub(crate) fn range_result_memory_limiter(&self) -> &Arc<RangeResultMemoryLimiter> {
1014        &self.range_result_memory_limiter
1015    }
1016
1017    pub(crate) fn range_result_cache_size(&self) -> usize {
1018        self.range_result_cache_size as usize
1019    }
1020
1021    /// Gets the write cache.
1022    pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
1023        self.write_cache.as_ref()
1024    }
1025
1026    pub(crate) fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
1027        self.inverted_index_cache.as_ref()
1028    }
1029
1030    pub(crate) fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
1031        self.bloom_filter_index_cache.as_ref()
1032    }
1033
1034    #[cfg(feature = "vector_index")]
1035    pub(crate) fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
1036        self.vector_index_cache.as_ref()
1037    }
1038
1039    pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
1040        self.puffin_metadata_cache.as_ref()
1041    }
1042
1043    pub(crate) fn index_result_cache(&self) -> Option<&IndexResultCache> {
1044        self.index_result_cache.as_ref()
1045    }
1046
1047    pub(crate) fn get_prefilter_result(&self, key: &PrefilterKey) -> Option<Arc<BooleanBuffer>> {
1048        self.prefilter_result_cache
1049            .as_ref()
1050            .and_then(|cache| update_hit_miss(cache.get(key), PREFILTER_RESULT_TYPE))
1051    }
1052
1053    pub(crate) fn put_prefilter_result(&self, key: PrefilterKey, result: Arc<BooleanBuffer>) {
1054        if let Some(cache) = &self.prefilter_result_cache {
1055            CACHE_BYTES
1056                .with_label_values(&[PREFILTER_RESULT_TYPE])
1057                .add(prefilter_result_cache_weight(&key, &result).into());
1058            cache.insert(key, result);
1059        }
1060    }
1061}
1062
1063/// Increases selector cache miss metrics.
1064pub fn selector_result_cache_miss() {
1065    CACHE_MISS.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
1066}
1067
1068/// Increases selector cache hit metrics.
1069pub fn selector_result_cache_hit() {
1070    CACHE_HIT.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
1071}
1072
1073/// Builder to construct a [CacheManager].
1074#[derive(Default)]
1075pub struct CacheManagerBuilder {
1076    sst_meta_cache_size: u64,
1077    vector_cache_size: u64,
1078    page_cache_size: u64,
1079    index_metadata_size: u64,
1080    index_content_size: u64,
1081    index_content_page_size: u64,
1082    index_result_cache_size: u64,
1083    prefilter_result_cache_size: u64,
1084    puffin_metadata_size: u64,
1085    write_cache: Option<WriteCacheRef>,
1086    selector_result_cache_size: u64,
1087    range_result_cache_size: u64,
1088}
1089
1090impl CacheManagerBuilder {
1091    /// Sets meta cache size.
1092    pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
1093        self.sst_meta_cache_size = bytes;
1094        self
1095    }
1096
1097    /// Sets vector cache size.
1098    pub fn vector_cache_size(mut self, bytes: u64) -> Self {
1099        self.vector_cache_size = bytes;
1100        self
1101    }
1102
1103    /// Sets page cache size.
1104    pub fn page_cache_size(mut self, bytes: u64) -> Self {
1105        self.page_cache_size = bytes;
1106        self
1107    }
1108
1109    /// Sets write cache.
1110    pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
1111        self.write_cache = cache;
1112        self
1113    }
1114
1115    /// Sets cache size for index metadata.
1116    pub fn index_metadata_size(mut self, bytes: u64) -> Self {
1117        self.index_metadata_size = bytes;
1118        self
1119    }
1120
1121    /// Sets cache size for index content.
1122    pub fn index_content_size(mut self, bytes: u64) -> Self {
1123        self.index_content_size = bytes;
1124        self
1125    }
1126
1127    /// Sets page size for index content.
1128    pub fn index_content_page_size(mut self, bytes: u64) -> Self {
1129        self.index_content_page_size = bytes;
1130        self
1131    }
1132
1133    /// Sets cache size for index result.
1134    pub fn index_result_cache_size(mut self, bytes: u64) -> Self {
1135        self.index_result_cache_size = bytes;
1136        self
1137    }
1138
1139    /// Sets cache size for prefilter result.
1140    pub fn prefilter_result_cache_size(mut self, bytes: u64) -> Self {
1141        self.prefilter_result_cache_size = bytes;
1142        self
1143    }
1144
1145    /// Sets cache size for puffin metadata.
1146    pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
1147        self.puffin_metadata_size = bytes;
1148        self
1149    }
1150
1151    /// Sets selector result cache size.
1152    pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
1153        self.selector_result_cache_size = bytes;
1154        self
1155    }
1156
1157    /// Sets range result cache size.
1158    pub fn range_result_cache_size(mut self, bytes: u64) -> Self {
1159        self.range_result_cache_size = bytes;
1160        self
1161    }
1162
1163    /// Builds the [CacheManager].
1164    pub fn build(self) -> CacheManager {
1165        let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
1166            Cache::builder()
1167                .max_capacity(self.sst_meta_cache_size)
1168                .weigher(meta_cache_weight)
1169                .eviction_listener(|k, v, cause| {
1170                    let size = meta_cache_weight(&k, &v);
1171                    CACHE_BYTES
1172                        .with_label_values(&[SST_META_TYPE])
1173                        .sub(size.into());
1174                    CACHE_EVICTION
1175                        .with_label_values(&[SST_META_TYPE, removal_cause_str(cause)])
1176                        .inc();
1177                })
1178                .build()
1179        });
1180        let vector_cache = (self.vector_cache_size != 0).then(|| {
1181            Cache::builder()
1182                .max_capacity(self.vector_cache_size)
1183                .weigher(vector_cache_weight)
1184                .eviction_listener(|k, v, cause| {
1185                    let size = vector_cache_weight(&k, &v);
1186                    CACHE_BYTES
1187                        .with_label_values(&[VECTOR_TYPE])
1188                        .sub(size.into());
1189                    CACHE_EVICTION
1190                        .with_label_values(&[VECTOR_TYPE, removal_cause_str(cause)])
1191                        .inc();
1192                })
1193                .build()
1194        });
1195        let page_cache = (self.page_cache_size != 0).then(|| {
1196            Cache::builder()
1197                .max_capacity(self.page_cache_size)
1198                .weigher(page_cache_weight)
1199                .eviction_listener(|k, v, cause| {
1200                    let size = page_cache_weight(&k, &v);
1201                    CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
1202                    CACHE_EVICTION
1203                        .with_label_values(&[PAGE_TYPE, removal_cause_str(cause)])
1204                        .inc();
1205                })
1206                .build()
1207        });
1208        let inverted_index_cache = InvertedIndexCache::new(
1209            self.index_metadata_size,
1210            self.index_content_size,
1211            self.index_content_page_size,
1212        );
1213        // TODO(ruihang): check if it's ok to reuse the same param with inverted index
1214        let bloom_filter_index_cache = BloomFilterIndexCache::new(
1215            self.index_metadata_size,
1216            self.index_content_size,
1217            self.index_content_page_size,
1218        );
1219        #[cfg(feature = "vector_index")]
1220        let vector_index_cache = (self.index_content_size != 0)
1221            .then(|| Arc::new(VectorIndexCache::new(self.index_content_size)));
1222        let index_result_cache = (self.index_result_cache_size != 0)
1223            .then(|| IndexResultCache::new(self.index_result_cache_size));
1224        let prefilter_result_cache = (self.prefilter_result_cache_size != 0)
1225            .then(|| new_prefilter_result_cache(self.prefilter_result_cache_size));
1226        let puffin_metadata_cache =
1227            PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
1228        let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
1229            Cache::builder()
1230                .max_capacity(self.selector_result_cache_size)
1231                .weigher(selector_result_cache_weight)
1232                .eviction_listener(|k, v, cause| {
1233                    let size = selector_result_cache_weight(&k, &v);
1234                    CACHE_BYTES
1235                        .with_label_values(&[SELECTOR_RESULT_TYPE])
1236                        .sub(size.into());
1237                    CACHE_EVICTION
1238                        .with_label_values(&[SELECTOR_RESULT_TYPE, removal_cause_str(cause)])
1239                        .inc();
1240                })
1241                .build()
1242        });
1243        let range_result_cache = (self.range_result_cache_size != 0).then(|| {
1244            Cache::builder()
1245                .max_capacity(self.range_result_cache_size)
1246                .weigher(range_result_cache_weight)
1247                .eviction_listener(move |k, v, cause| {
1248                    let size = range_result_cache_weight(&k, &v);
1249                    CACHE_BYTES
1250                        .with_label_values(&[RANGE_RESULT_TYPE])
1251                        .sub(size.into());
1252                    CACHE_EVICTION
1253                        .with_label_values(&[RANGE_RESULT_TYPE, removal_cause_str(cause)])
1254                        .inc();
1255                })
1256                .build()
1257        });
1258        CacheManager {
1259            sst_meta_cache,
1260            vector_cache,
1261            page_cache,
1262            write_cache: self.write_cache,
1263            inverted_index_cache: Some(Arc::new(inverted_index_cache)),
1264            bloom_filter_index_cache: Some(Arc::new(bloom_filter_index_cache)),
1265            #[cfg(feature = "vector_index")]
1266            vector_index_cache,
1267            puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
1268            selector_result_cache,
1269            range_result_cache,
1270            range_result_cache_size: self.range_result_cache_size,
1271            range_result_memory_limiter: Arc::new(RangeResultMemoryLimiter::new(
1272                self.range_result_cache_size as usize,
1273                RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize,
1274            )),
1275            index_result_cache,
1276            prefilter_result_cache,
1277        }
1278    }
1279}
1280
1281fn meta_cache_weight(k: &SstMetaKey, v: &Arc<CachedSstMeta>) -> u32 {
1282    // We ignore the size of `Arc`.
1283    (k.estimated_size() + parquet_meta_size(&v.parquet_metadata) + v.region_metadata_weight) as u32
1284}
1285
1286fn vector_cache_weight(_k: &(ConcreteDataType, Value), v: &VectorRef) -> u32 {
1287    // We ignore the heap size of `Value`.
1288    (mem::size_of::<ConcreteDataType>() + mem::size_of::<Value>() + v.memory_size()) as u32
1289}
1290
1291fn page_cache_weight(k: &PageKey, v: &Arc<PageValue>) -> u32 {
1292    (k.estimated_size() + v.estimated_size()) as u32
1293}
1294
1295fn selector_result_cache_weight(k: &SelectorResultKey, v: &Arc<SelectorResultValue>) -> u32 {
1296    (mem::size_of_val(k) + v.estimated_size()) as u32
1297}
1298
1299fn range_result_cache_weight(k: &RangeScanCacheKey, v: &Arc<RangeScanCacheValue>) -> u32 {
1300    (k.estimated_size() + v.estimated_size()) as u32
1301}
1302
1303/// Updates cache hit/miss metrics.
1304fn update_hit_miss<T>(value: Option<T>, cache_type: &str) -> Option<T> {
1305    if value.is_some() {
1306        CACHE_HIT.with_label_values(&[cache_type]).inc();
1307    } else {
1308        CACHE_MISS.with_label_values(&[cache_type]).inc();
1309    }
1310    value
1311}
1312
1313/// Cache key (region id, file id) for SST meta.
1314#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1315struct SstMetaKey(RegionId, FileId);
1316
1317impl SstMetaKey {
1318    /// Returns memory used by the key (estimated).
1319    fn estimated_size(&self) -> usize {
1320        mem::size_of::<Self>()
1321    }
1322}
1323
1324/// Path to column pages in the SST file.
1325#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1326pub struct ColumnPagePath {
1327    /// Region id of the SST file to cache.
1328    region_id: RegionId,
1329    /// Id of the SST file to cache.
1330    file_id: FileId,
1331    /// Index of the row group.
1332    row_group_idx: usize,
1333    /// Index of the column in the row group.
1334    column_idx: usize,
1335}
1336
1337/// Cache key to pages in a row group (after projection).
1338///
1339/// Different projections will have different cache keys.
1340/// We cache all ranges together because they may refer to the same `Bytes`.
1341#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1342pub struct PageKey {
1343    /// Id of the SST file to cache.
1344    file_id: FileId,
1345    /// Index of the row group.
1346    row_group_idx: usize,
1347    /// Byte ranges of the pages to cache.
1348    ranges: Vec<Range<u64>>,
1349}
1350
1351impl PageKey {
1352    /// Creates a key for a list of pages.
1353    pub fn new(file_id: FileId, row_group_idx: usize, ranges: Vec<Range<u64>>) -> PageKey {
1354        PageKey {
1355            file_id,
1356            row_group_idx,
1357            ranges,
1358        }
1359    }
1360
1361    /// Returns memory used by the key (estimated).
1362    fn estimated_size(&self) -> usize {
1363        mem::size_of::<Self>() + mem::size_of_val(self.ranges.as_slice())
1364    }
1365}
1366
1367/// Cached row group pages for a column.
1368// We don't use enum here to make it easier to mock and use the struct.
1369#[derive(Default)]
1370pub struct PageValue {
1371    /// Compressed page in the row group.
1372    pub compressed: Vec<Bytes>,
1373    /// Total size of the pages (may be larger than sum of compressed bytes due to gaps).
1374    pub page_size: u64,
1375}
1376
1377impl PageValue {
1378    /// Creates a new value from a range of compressed pages.
1379    pub fn new(bytes: Vec<Bytes>, page_size: u64) -> PageValue {
1380        PageValue {
1381            compressed: bytes,
1382            page_size,
1383        }
1384    }
1385
1386    /// Returns memory used by the value (estimated).
1387    fn estimated_size(&self) -> usize {
1388        mem::size_of::<Self>()
1389            + self.page_size as usize
1390            + self.compressed.iter().map(mem::size_of_val).sum::<usize>()
1391    }
1392}
1393
1394/// Cache key for time series row selector result.
1395#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1396pub struct SelectorResultKey {
1397    /// Id of the SST file.
1398    pub file_id: FileId,
1399    /// Index of the row group.
1400    pub row_group_idx: usize,
1401    /// Time series row selector.
1402    pub selector: TimeSeriesRowSelector,
1403}
1404
1405/// Result stored in the selector result cache.
1406pub enum SelectorResult {
1407    /// Batches in the primary key format.
1408    PrimaryKey(Vec<Batch>),
1409    /// Record batches in the flat format.
1410    Flat(Vec<RecordBatch>),
1411}
1412
1413/// Cached result for time series row selector.
1414pub struct SelectorResultValue {
1415    /// Batches of rows selected by the selector.
1416    pub result: SelectorResult,
1417    /// The read columns of rows.
1418    pub read_cols: ParquetReadColumns,
1419}
1420
1421impl SelectorResultValue {
1422    /// Creates a new selector result value with primary key format.
1423    pub fn new(result: Vec<Batch>, read_cols: ParquetReadColumns) -> SelectorResultValue {
1424        SelectorResultValue {
1425            result: SelectorResult::PrimaryKey(result),
1426            read_cols,
1427        }
1428    }
1429
1430    /// Creates a new selector result value with flat format.
1431    pub fn new_flat(
1432        result: Vec<RecordBatch>,
1433        read_cols: ParquetReadColumns,
1434    ) -> SelectorResultValue {
1435        SelectorResultValue {
1436            result: SelectorResult::Flat(result),
1437            read_cols,
1438        }
1439    }
1440
1441    /// Returns memory used by the value (estimated).
1442    fn estimated_size(&self) -> usize {
1443        match &self.result {
1444            SelectorResult::PrimaryKey(batches) => {
1445                batches.iter().map(|batch| batch.memory_size()).sum()
1446            }
1447            SelectorResult::Flat(batches) => batches.iter().map(record_batch_estimated_size).sum(),
1448        }
1449    }
1450}
1451
1452/// Maps (region id, file id) to fused SST metadata.
1453type SstMetaCache = Cache<SstMetaKey, Arc<CachedSstMeta>>;
1454/// Maps [Value] to a vector that holds this value repeatedly.
1455///
1456/// e.g. `"hello" => ["hello", "hello", "hello"]`
1457type VectorCache = Cache<(ConcreteDataType, Value), VectorRef>;
1458/// Maps (region, file, row group, column) to [PageValue].
1459type PageCache = Cache<PageKey, Arc<PageValue>>;
1460/// Maps (file id, row group id, time series row selector) to [SelectorResultValue].
1461type SelectorResultCache = Cache<SelectorResultKey, Arc<SelectorResultValue>>;
1462/// Maps partition-range scan key to cached flat batches.
1463type RangeResultCache = Cache<RangeScanCacheKey, Arc<RangeScanCacheValue>>;
1464
1465#[cfg(test)]
1466mod tests {
1467    use std::sync::Arc;
1468
1469    use api::v1::SemanticType;
1470    use api::v1::index::{BloomFilterMeta, InvertedIndexMetas};
1471    use datatypes::schema::ColumnSchema;
1472    use datatypes::vectors::Int64Vector;
1473    use puffin::file_metadata::FileMetadata;
1474    use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
1475    use store_api::storage::ColumnId;
1476
1477    use super::*;
1478    use crate::cache::index::bloom_filter_index::Tag;
1479    use crate::cache::index::result_cache::PredicateKey;
1480    use crate::cache::test_util::{
1481        parquet_meta, sst_parquet_meta, sst_parquet_meta_with_region_metadata,
1482    };
1483    use crate::read::range_cache::{
1484        RangeScanCacheKey, RangeScanCacheValue, ScanRequestFingerprintBuilder,
1485    };
1486    use crate::read::read_columns::ReadColumns;
1487    use crate::sst::parquet::row_selection::RowGroupSelection;
1488
1489    #[tokio::test]
1490    async fn test_disable_cache() {
1491        let cache = CacheManager::default();
1492        assert!(cache.sst_meta_cache.is_none());
1493        assert!(cache.vector_cache.is_none());
1494        assert!(cache.page_cache.is_none());
1495
1496        let region_id = RegionId::new(1, 1);
1497        let file_id = RegionFileId::new(region_id, FileId::random());
1498        let metadata = parquet_meta();
1499        let mut metrics = MetadataCacheMetrics::default();
1500        cache.put_parquet_meta_data(file_id, metadata, None);
1501        assert!(
1502            cache
1503                .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1504                .await
1505                .is_none()
1506        );
1507
1508        let value = Value::Int64(10);
1509        let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1510        cache.put_repeated_vector(value.clone(), vector.clone());
1511        assert!(
1512            cache
1513                .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1514                .is_none()
1515        );
1516
1517        let key = PageKey::new(file_id.file_id(), 1, vec![Range { start: 0, end: 5 }]);
1518        let pages = Arc::new(PageValue::default());
1519        cache.put_pages(key.clone(), pages);
1520        assert!(cache.get_pages(&key).is_none());
1521
1522        assert!(cache.write_cache().is_none());
1523    }
1524
1525    #[tokio::test]
1526    async fn test_parquet_meta_cache() {
1527        let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1528        let mut metrics = MetadataCacheMetrics::default();
1529        let region_id = RegionId::new(1, 1);
1530        let file_id = RegionFileId::new(region_id, FileId::random());
1531        assert!(
1532            cache
1533                .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1534                .await
1535                .is_none()
1536        );
1537        let (metadata, region_metadata) = sst_parquet_meta();
1538        cache.put_parquet_meta_data(file_id, metadata, None);
1539        let cached = cache
1540            .get_sst_meta_data(file_id, &mut metrics, Default::default())
1541            .await
1542            .unwrap();
1543        assert_eq!(region_metadata, cached.region_metadata());
1544        assert!(
1545            cached
1546                .parquet_metadata()
1547                .file_metadata()
1548                .key_value_metadata()
1549                .is_none_or(|key_values| {
1550                    key_values
1551                        .iter()
1552                        .all(|key_value| key_value.key != PARQUET_METADATA_KEY)
1553                })
1554        );
1555        cache.remove_parquet_meta_data(file_id);
1556        assert!(
1557            cache
1558                .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1559                .await
1560                .is_none()
1561        );
1562    }
1563
1564    #[tokio::test]
1565    async fn test_parquet_meta_cache_with_provided_region_metadata() {
1566        let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1567        let mut metrics = MetadataCacheMetrics::default();
1568        let region_id = RegionId::new(1, 1);
1569        let file_id = RegionFileId::new(region_id, FileId::random());
1570        let (metadata, region_metadata) = sst_parquet_meta();
1571
1572        cache.put_parquet_meta_data(file_id, metadata, Some(region_metadata.clone()));
1573
1574        let cached = cache
1575            .get_sst_meta_data(file_id, &mut metrics, Default::default())
1576            .await
1577            .unwrap();
1578        assert!(Arc::ptr_eq(&region_metadata, &cached.region_metadata()));
1579    }
1580
1581    #[tokio::test]
1582    async fn test_parquet_meta_cache_respects_page_index_policy() {
1583        let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1584        let region_id = RegionId::new(1, 1);
1585        let file_id = RegionFileId::new(region_id, FileId::random());
1586        let (metadata, _) = sst_parquet_meta();
1587
1588        let skip_metadata = Arc::new(
1589            CachedSstMeta::try_new_with_page_index_policy(
1590                "test.parquet",
1591                Arc::unwrap_or_clone(metadata.clone()),
1592                None,
1593                PageIndexPolicy::Skip,
1594            )
1595            .unwrap(),
1596        );
1597        cache.put_sst_meta_data(file_id, skip_metadata);
1598
1599        let mut metrics = MetadataCacheMetrics::default();
1600        assert!(
1601            cache
1602                .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Optional)
1603                .await
1604                .is_none()
1605        );
1606        assert_eq!(1, metrics.cache_miss);
1607
1608        let optional_metadata = Arc::new(
1609            CachedSstMeta::try_new_with_page_index_policy(
1610                "test.parquet",
1611                Arc::unwrap_or_clone(metadata),
1612                None,
1613                PageIndexPolicy::Optional,
1614            )
1615            .unwrap(),
1616        );
1617        cache.put_sst_meta_data(file_id, optional_metadata);
1618
1619        let mut metrics = MetadataCacheMetrics::default();
1620        assert!(
1621            cache
1622                .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Optional)
1623                .await
1624                .is_some()
1625        );
1626        assert_eq!(1, metrics.mem_cache_hit);
1627
1628        let mut metrics = MetadataCacheMetrics::default();
1629        assert!(
1630            cache
1631                .get_sst_meta_data(file_id, &mut metrics, PageIndexPolicy::Skip)
1632                .await
1633                .is_some()
1634        );
1635        assert_eq!(1, metrics.mem_cache_hit);
1636    }
1637
1638    #[test]
1639    fn test_meta_cache_weight_accounts_for_decoded_region_metadata() {
1640        let region_metadata = Arc::new(wide_region_metadata(128));
1641        let json_len = region_metadata.to_json().unwrap().len();
1642        let metadata = sst_parquet_meta_with_region_metadata(region_metadata.clone());
1643        let cached = Arc::new(
1644            CachedSstMeta::try_new("test.parquet", Arc::unwrap_or_clone(metadata)).unwrap(),
1645        );
1646        let key = SstMetaKey(region_metadata.region_id, FileId::random());
1647
1648        assert!(cached.region_metadata_weight > json_len);
1649        assert_eq!(
1650            meta_cache_weight(&key, &cached) as usize,
1651            key.estimated_size()
1652                + parquet_meta_size(&cached.parquet_metadata)
1653                + cached.region_metadata_weight
1654        );
1655    }
1656
1657    #[test]
1658    fn test_repeated_vector_cache() {
1659        let cache = CacheManager::builder().vector_cache_size(4096).build();
1660        let value = Value::Int64(10);
1661        assert!(
1662            cache
1663                .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1664                .is_none()
1665        );
1666        let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1667        cache.put_repeated_vector(value.clone(), vector.clone());
1668        let cached = cache
1669            .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1670            .unwrap();
1671        assert_eq!(vector, cached);
1672    }
1673
1674    #[test]
1675    fn test_page_cache() {
1676        let cache = CacheManager::builder().page_cache_size(1000).build();
1677        let file_id = FileId::random();
1678        let key = PageKey::new(file_id, 0, vec![(0..10), (10..20)]);
1679        assert!(cache.get_pages(&key).is_none());
1680        let pages = Arc::new(PageValue::default());
1681        cache.put_pages(key.clone(), pages);
1682        assert!(cache.get_pages(&key).is_some());
1683    }
1684
1685    #[test]
1686    fn test_selector_result_cache() {
1687        let cache = CacheManager::builder()
1688            .selector_result_cache_size(1000)
1689            .build();
1690        let file_id = FileId::random();
1691        let key = SelectorResultKey {
1692            file_id,
1693            row_group_idx: 0,
1694            selector: TimeSeriesRowSelector::LastRow,
1695        };
1696        assert!(cache.get_selector_result(&key).is_none());
1697        let result = Arc::new(SelectorResultValue::new(
1698            Vec::new(),
1699            ParquetReadColumns::from_deduped(Vec::new()),
1700        ));
1701        cache.put_selector_result(key, result);
1702        assert!(cache.get_selector_result(&key).is_some());
1703    }
1704
1705    #[test]
1706    fn test_prefilter_result_cache() {
1707        let disabled = CacheManager::builder().build();
1708        let file_id = FileId::random();
1709        let key = PrefilterKey::new(
1710            file_id,
1711            0,
1712            None,
1713            1,
1714            SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()]),
1715        );
1716        let selection = Arc::new(BooleanBuffer::new_set(3));
1717
1718        disabled.put_prefilter_result(key.clone(), selection.clone());
1719        assert!(disabled.get_prefilter_result(&key).is_none());
1720
1721        let cache = Arc::new(
1722            CacheManager::builder()
1723                .prefilter_result_cache_size(1000)
1724                .build(),
1725        );
1726        assert!(cache.get_prefilter_result(&key).is_none());
1727        cache.put_prefilter_result(key.clone(), selection.clone());
1728        assert_eq!(
1729            cache.get_prefilter_result(&key).unwrap().as_ref(),
1730            selection.as_ref()
1731        );
1732
1733        let enable_all = CacheStrategy::EnableAll(cache.clone());
1734        assert!(enable_all.get_prefilter_result(&key).is_some());
1735
1736        let compaction = CacheStrategy::Compaction(cache.clone());
1737        assert!(compaction.get_prefilter_result(&key).is_none());
1738        compaction.put_prefilter_result(key.clone(), selection.clone());
1739        assert!(cache.get_prefilter_result(&key).is_some());
1740
1741        let disabled_strategy = CacheStrategy::Disabled;
1742        assert!(disabled_strategy.get_prefilter_result(&key).is_none());
1743        disabled_strategy.put_prefilter_result(key.clone(), selection);
1744        assert!(cache.get_prefilter_result(&key).is_some());
1745    }
1746
1747    #[test]
1748    fn test_prefilter_key_distinguishes_dimensions() {
1749        let file_id = FileId::random();
1750        let row_selection = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(3)]);
1751        let other_row_selection =
1752            RowSelection::from(vec![RowSelector::skip(2), RowSelector::select(2)]);
1753        let row_selection = PrefilterKey::row_selection_snapshot(Some(&row_selection));
1754        let other_row_selection = PrefilterKey::row_selection_snapshot(Some(&other_row_selection));
1755        let base = PrefilterKey::new(
1756            file_id,
1757            0,
1758            row_selection.clone(),
1759            1,
1760            SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()]),
1761        );
1762
1763        assert_ne!(
1764            base,
1765            PrefilterKey::new(
1766                FileId::random(),
1767                0,
1768                row_selection.clone(),
1769                1,
1770                SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
1771            )
1772        );
1773        assert_ne!(
1774            base,
1775            PrefilterKey::new(
1776                file_id,
1777                1,
1778                row_selection.clone(),
1779                1,
1780                SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
1781            )
1782        );
1783        assert_ne!(
1784            base,
1785            PrefilterKey::new(
1786                file_id,
1787                0,
1788                other_row_selection,
1789                1,
1790                SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
1791            )
1792        );
1793        assert_ne!(
1794            base,
1795            PrefilterKey::new(
1796                file_id,
1797                0,
1798                row_selection.clone(),
1799                1,
1800                SmallVec::from_vec(vec!["tag_0 IN ([b])".to_string()])
1801            )
1802        );
1803        assert_ne!(
1804            base,
1805            PrefilterKey::new(
1806                file_id,
1807                0,
1808                row_selection.clone(),
1809                2,
1810                SmallVec::from_vec(vec!["tag_0 IN ([a])".to_string()])
1811            )
1812        );
1813        let pk_group = PrefilterKey::new(
1814            file_id,
1815            0,
1816            row_selection,
1817            1,
1818            SmallVec::from_vec(vec![
1819                "tag_0 IN ([a])".to_string(),
1820                "tag_1 IN ([x])".to_string(),
1821            ]),
1822        );
1823        assert_ne!(base, pk_group);
1824    }
1825
1826    #[test]
1827    fn test_range_result_cache() {
1828        let cache = Arc::new(
1829            CacheManager::builder()
1830                .range_result_cache_size(1024 * 1024)
1831                .build(),
1832        );
1833
1834        let key = RangeScanCacheKey {
1835            region_id: RegionId::new(1, 1),
1836            row_groups: vec![(FileId::random(), 0)],
1837            scan: ScanRequestFingerprintBuilder {
1838                read_columns: ReadColumns::from_deduped_column_ids(std::iter::empty()),
1839                read_column_types: vec![],
1840                filters: vec!["tag_0 = 1".to_string()],
1841                time_filters: vec![],
1842                series_row_selector: None,
1843                append_mode: false,
1844                filter_deleted: true,
1845                merge_mode: crate::region::options::MergeMode::LastRow,
1846                partition_expr_version: 0,
1847            }
1848            .build(),
1849        };
1850        let value = Arc::new(RangeScanCacheValue::new(Vec::new(), 0));
1851
1852        assert!(cache.get_range_result(&key).is_none());
1853        cache.put_range_result(key.clone(), value.clone());
1854        assert!(cache.get_range_result(&key).is_some());
1855
1856        let enable_all = CacheStrategy::EnableAll(cache.clone());
1857        assert!(enable_all.get_range_result(&key).is_some());
1858
1859        let compaction = CacheStrategy::Compaction(cache.clone());
1860        assert!(compaction.get_range_result(&key).is_none());
1861        compaction.put_range_result(key.clone(), value.clone());
1862        assert!(cache.get_range_result(&key).is_some());
1863
1864        let disabled = CacheStrategy::Disabled;
1865        assert!(disabled.get_range_result(&key).is_none());
1866        disabled.put_range_result(key.clone(), value);
1867        assert!(cache.get_range_result(&key).is_some());
1868    }
1869
1870    #[test]
1871    fn test_range_result_cache_size_configures_limiter() {
1872        let cache_size = 3 * 1024_u64;
1873        let cache = CacheManager::builder()
1874            .range_result_cache_size(cache_size)
1875            .build();
1876
1877        assert_eq!(cache.range_result_cache_size(), cache_size as usize);
1878        assert_eq!(
1879            cache.range_result_memory_limiter().permit_bytes(),
1880            RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize
1881        );
1882        assert_eq!(
1883            cache.range_result_memory_limiter().available_permits(),
1884            (cache_size as usize).div_ceil(RANGE_RESULT_CONCAT_MEMORY_PERMIT.as_bytes() as usize)
1885        );
1886    }
1887
1888    #[tokio::test]
1889    async fn range_result_memory_limiter_rejects_oversized_request() {
1890        let limiter = RangeResultMemoryLimiter::new(2 * 1024, 1024);
1891        assert_eq!(limiter.available_permits(), 2);
1892
1893        let err = limiter.acquire(10 * 1024).await.unwrap_err();
1894        assert!(
1895            err.to_string().contains("exceeds limiter capacity"),
1896            "unexpected error: {err}"
1897        );
1898        assert_eq!(limiter.available_permits(), 2);
1899    }
1900
1901    #[tokio::test]
1902    async fn range_result_memory_limiter_allows_request_up_to_capacity() {
1903        let limiter = RangeResultMemoryLimiter::new(2 * 1024, 1024);
1904        let permit = limiter.acquire(2 * 1024).await.unwrap();
1905        assert_eq!(limiter.available_permits(), 0);
1906        drop(permit);
1907        assert_eq!(limiter.available_permits(), 2);
1908    }
1909
1910    #[tokio::test]
1911    async fn test_evict_puffin_cache_clears_all_entries() {
1912        use std::collections::{BTreeMap, HashMap};
1913
1914        let cache = CacheManager::builder()
1915            .index_metadata_size(128)
1916            .index_content_size(128)
1917            .index_content_page_size(64)
1918            .index_result_cache_size(128)
1919            .puffin_metadata_size(128)
1920            .build();
1921        let cache = Arc::new(cache);
1922
1923        let region_id = RegionId::new(1, 1);
1924        let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
1925        let column_id: ColumnId = 1;
1926
1927        let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
1928        let inverted_cache = cache.inverted_index_cache().unwrap().clone();
1929        let result_cache = cache.index_result_cache().unwrap();
1930        let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();
1931
1932        let bloom_key = (
1933            index_id.file_id(),
1934            index_id.version,
1935            column_id,
1936            Tag::Skipping,
1937        );
1938        bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1939        inverted_cache.put_metadata(
1940            (index_id.file_id(), index_id.version),
1941            Arc::new(InvertedIndexMetas::default()),
1942        );
1943        let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
1944        let selection = Arc::new(RowGroupSelection::default());
1945        result_cache.put(predicate.clone(), index_id.file_id(), selection);
1946        let file_id_str = index_id.to_string();
1947        let metadata = Arc::new(FileMetadata {
1948            blobs: Vec::new(),
1949            properties: HashMap::new(),
1950        });
1951        puffin_metadata_cache.put_metadata(file_id_str.clone(), metadata);
1952
1953        assert!(bloom_cache.get_metadata(bloom_key).is_some());
1954        assert!(
1955            inverted_cache
1956                .get_metadata((index_id.file_id(), index_id.version))
1957                .is_some()
1958        );
1959        assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
1960        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());
1961
1962        cache.evict_puffin_cache(index_id).await;
1963
1964        assert!(bloom_cache.get_metadata(bloom_key).is_none());
1965        assert!(
1966            inverted_cache
1967                .get_metadata((index_id.file_id(), index_id.version))
1968                .is_none()
1969        );
1970        assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
1971        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
1972
1973        // Refill caches and evict via CacheStrategy to ensure delegation works.
1974        bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1975        inverted_cache.put_metadata(
1976            (index_id.file_id(), index_id.version),
1977            Arc::new(InvertedIndexMetas::default()),
1978        );
1979        result_cache.put(
1980            predicate.clone(),
1981            index_id.file_id(),
1982            Arc::new(RowGroupSelection::default()),
1983        );
1984        puffin_metadata_cache.put_metadata(
1985            file_id_str.clone(),
1986            Arc::new(FileMetadata {
1987                blobs: Vec::new(),
1988                properties: HashMap::new(),
1989            }),
1990        );
1991
1992        let strategy = CacheStrategy::EnableAll(cache.clone());
1993        strategy.evict_puffin_cache(index_id).await;
1994
1995        assert!(bloom_cache.get_metadata(bloom_key).is_none());
1996        assert!(
1997            inverted_cache
1998                .get_metadata((index_id.file_id(), index_id.version))
1999                .is_none()
2000        );
2001        assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
2002        assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
2003    }
2004
2005    fn wide_region_metadata(column_count: u32) -> RegionMetadata {
2006        let region_id = RegionId::new(1024, 7);
2007        let mut builder = RegionMetadataBuilder::new(region_id);
2008        let mut primary_key = Vec::new();
2009
2010        for column_id in 0..column_count {
2011            let semantic_type = if column_id < 32 {
2012                primary_key.push(column_id);
2013                SemanticType::Tag
2014            } else {
2015                SemanticType::Field
2016            };
2017            let mut column_schema = ColumnSchema::new(
2018                format!("wide_column_{column_id}"),
2019                ConcreteDataType::string_datatype(),
2020                true,
2021            );
2022            column_schema
2023                .mut_metadata()
2024                .insert(format!("cache_key_{column_id}"), "cache_value".repeat(4));
2025            builder.push_column_metadata(ColumnMetadata {
2026                column_schema,
2027                semantic_type,
2028                column_id,
2029            });
2030        }
2031
2032        builder.push_column_metadata(ColumnMetadata {
2033            column_schema: ColumnSchema::new(
2034                "ts",
2035                ConcreteDataType::timestamp_millisecond_datatype(),
2036                false,
2037            ),
2038            semantic_type: SemanticType::Timestamp,
2039            column_id: column_count,
2040        });
2041        builder.primary_key(primary_key);
2042
2043        builder.build().unwrap()
2044    }
2045}