1pub(crate) mod cache_size;
18
19pub(crate) mod file_cache;
20pub(crate) mod index;
21pub(crate) mod manifest_cache;
22#[cfg(test)]
23pub(crate) mod test_util;
24pub(crate) mod write_cache;
25
26use std::mem;
27use std::ops::Range;
28use std::sync::Arc;
29
30use bytes::Bytes;
31use datatypes::arrow::record_batch::RecordBatch;
32use datatypes::value::Value;
33use datatypes::vectors::VectorRef;
34use index::bloom_filter_index::{BloomFilterIndexCache, BloomFilterIndexCacheRef};
35use index::result_cache::IndexResultCache;
36use moka::notification::RemovalCause;
37use moka::sync::Cache;
38use object_store::ObjectStore;
39use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData};
40use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
41use store_api::storage::{ConcreteDataType, FileId, RegionId, TimeSeriesRowSelector};
42
43use crate::cache::cache_size::parquet_meta_size;
44use crate::cache::file_cache::{FileType, IndexKey};
45use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCacheRef};
46#[cfg(feature = "vector_index")]
47use crate::cache::index::vector_index::{VectorIndexCache, VectorIndexCacheRef};
48use crate::cache::write_cache::WriteCacheRef;
49use crate::memtable::record_batch_estimated_size;
50use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
51use crate::read::Batch;
52use crate::sst::file::{RegionFileId, RegionIndexId};
53use crate::sst::parquet::reader::MetadataCacheMetrics;
54
55const SST_META_TYPE: &str = "sst_meta";
57const VECTOR_TYPE: &str = "vector";
59const PAGE_TYPE: &str = "page";
61const FILE_TYPE: &str = "file";
63const INDEX_TYPE: &str = "index";
65const SELECTOR_RESULT_TYPE: &str = "selector_result";
67
68#[derive(Clone)]
70pub enum CacheStrategy {
71 EnableAll(CacheManagerRef),
74 Compaction(CacheManagerRef),
79 Disabled,
81}
82
83impl CacheStrategy {
84 pub(crate) async fn get_parquet_meta_data(
87 &self,
88 file_id: RegionFileId,
89 metrics: &mut MetadataCacheMetrics,
90 page_index_policy: PageIndexPolicy,
91 ) -> Option<Arc<ParquetMetaData>> {
92 match self {
93 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
94 cache_manager
95 .get_parquet_meta_data(file_id, metrics, page_index_policy)
96 .await
97 }
98 CacheStrategy::Disabled => {
99 metrics.cache_miss += 1;
100 None
101 }
102 }
103 }
104
105 pub fn get_parquet_meta_data_from_mem_cache(
107 &self,
108 file_id: RegionFileId,
109 ) -> Option<Arc<ParquetMetaData>> {
110 match self {
111 CacheStrategy::EnableAll(cache_manager) => {
112 cache_manager.get_parquet_meta_data_from_mem_cache(file_id)
113 }
114 CacheStrategy::Compaction(cache_manager) => {
115 cache_manager.get_parquet_meta_data_from_mem_cache(file_id)
116 }
117 CacheStrategy::Disabled => None,
118 }
119 }
120
121 pub fn put_parquet_meta_data(&self, file_id: RegionFileId, metadata: Arc<ParquetMetaData>) {
123 match self {
124 CacheStrategy::EnableAll(cache_manager) => {
125 cache_manager.put_parquet_meta_data(file_id, metadata);
126 }
127 CacheStrategy::Compaction(cache_manager) => {
128 cache_manager.put_parquet_meta_data(file_id, metadata);
129 }
130 CacheStrategy::Disabled => {}
131 }
132 }
133
134 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
136 match self {
137 CacheStrategy::EnableAll(cache_manager) => {
138 cache_manager.remove_parquet_meta_data(file_id);
139 }
140 CacheStrategy::Compaction(cache_manager) => {
141 cache_manager.remove_parquet_meta_data(file_id);
142 }
143 CacheStrategy::Disabled => {}
144 }
145 }
146
147 pub fn get_repeated_vector(
150 &self,
151 data_type: &ConcreteDataType,
152 value: &Value,
153 ) -> Option<VectorRef> {
154 match self {
155 CacheStrategy::EnableAll(cache_manager) => {
156 cache_manager.get_repeated_vector(data_type, value)
157 }
158 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
159 }
160 }
161
162 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
165 if let CacheStrategy::EnableAll(cache_manager) = self {
166 cache_manager.put_repeated_vector(value, vector);
167 }
168 }
169
170 pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
173 match self {
174 CacheStrategy::EnableAll(cache_manager) => cache_manager.get_pages(page_key),
175 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
176 }
177 }
178
179 pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
182 if let CacheStrategy::EnableAll(cache_manager) = self {
183 cache_manager.put_pages(page_key, pages);
184 }
185 }
186
187 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
189 match self {
190 CacheStrategy::EnableAll(cache_manager) => {
191 cache_manager.evict_puffin_cache(file_id).await
192 }
193 CacheStrategy::Compaction(cache_manager) => {
194 cache_manager.evict_puffin_cache(file_id).await
195 }
196 CacheStrategy::Disabled => {}
197 }
198 }
199
200 pub fn get_selector_result(
203 &self,
204 selector_key: &SelectorResultKey,
205 ) -> Option<Arc<SelectorResultValue>> {
206 match self {
207 CacheStrategy::EnableAll(cache_manager) => {
208 cache_manager.get_selector_result(selector_key)
209 }
210 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
211 }
212 }
213
214 pub fn put_selector_result(
217 &self,
218 selector_key: SelectorResultKey,
219 result: Arc<SelectorResultValue>,
220 ) {
221 if let CacheStrategy::EnableAll(cache_manager) = self {
222 cache_manager.put_selector_result(selector_key, result);
223 }
224 }
225
226 pub fn write_cache(&self) -> Option<&WriteCacheRef> {
229 match self {
230 CacheStrategy::EnableAll(cache_manager) => cache_manager.write_cache(),
231 CacheStrategy::Compaction(cache_manager) => cache_manager.write_cache(),
232 CacheStrategy::Disabled => None,
233 }
234 }
235
236 pub fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
239 match self {
240 CacheStrategy::EnableAll(cache_manager) => cache_manager.inverted_index_cache(),
241 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
242 }
243 }
244
245 pub fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
248 match self {
249 CacheStrategy::EnableAll(cache_manager) => cache_manager.bloom_filter_index_cache(),
250 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
251 }
252 }
253
254 #[cfg(feature = "vector_index")]
257 pub fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
258 match self {
259 CacheStrategy::EnableAll(cache_manager) => cache_manager.vector_index_cache(),
260 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
261 }
262 }
263
264 pub fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
267 match self {
268 CacheStrategy::EnableAll(cache_manager) => cache_manager.puffin_metadata_cache(),
269 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
270 }
271 }
272
273 pub fn index_result_cache(&self) -> Option<&IndexResultCache> {
276 match self {
277 CacheStrategy::EnableAll(cache_manager) => cache_manager.index_result_cache(),
278 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
279 }
280 }
281
282 pub fn maybe_download_background(
284 &self,
285 index_key: IndexKey,
286 remote_path: String,
287 remote_store: ObjectStore,
288 file_size: u64,
289 ) {
290 if let CacheStrategy::EnableAll(cache_manager) = self
291 && let Some(write_cache) = cache_manager.write_cache()
292 {
293 write_cache.file_cache().maybe_download_background(
294 index_key,
295 remote_path,
296 remote_store,
297 file_size,
298 );
299 }
300 }
301}
302
303#[derive(Default)]
307pub struct CacheManager {
308 sst_meta_cache: Option<SstMetaCache>,
310 vector_cache: Option<VectorCache>,
312 page_cache: Option<PageCache>,
314 write_cache: Option<WriteCacheRef>,
316 inverted_index_cache: Option<InvertedIndexCacheRef>,
318 bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
320 #[cfg(feature = "vector_index")]
322 vector_index_cache: Option<VectorIndexCacheRef>,
323 puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
325 selector_result_cache: Option<SelectorResultCache>,
327 index_result_cache: Option<IndexResultCache>,
329}
330
331pub type CacheManagerRef = Arc<CacheManager>;
332
333impl CacheManager {
334 pub fn builder() -> CacheManagerBuilder {
336 CacheManagerBuilder::default()
337 }
338
339 pub(crate) async fn get_parquet_meta_data(
342 &self,
343 file_id: RegionFileId,
344 metrics: &mut MetadataCacheMetrics,
345 page_index_policy: PageIndexPolicy,
346 ) -> Option<Arc<ParquetMetaData>> {
347 if let Some(metadata) = self.get_parquet_meta_data_from_mem_cache(file_id) {
349 metrics.mem_cache_hit += 1;
350 return Some(metadata);
351 }
352
353 let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
355 if let Some(write_cache) = &self.write_cache
356 && let Some(metadata) = write_cache
357 .file_cache()
358 .get_parquet_meta_data(key, metrics, page_index_policy)
359 .await
360 {
361 metrics.file_cache_hit += 1;
362 let metadata = Arc::new(metadata);
363 self.put_parquet_meta_data(file_id, metadata.clone());
365 return Some(metadata);
366 };
367 metrics.cache_miss += 1;
368
369 None
370 }
371
372 pub fn get_parquet_meta_data_from_mem_cache(
375 &self,
376 file_id: RegionFileId,
377 ) -> Option<Arc<ParquetMetaData>> {
378 self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
380 let value = sst_meta_cache.get(&SstMetaKey(file_id.region_id(), file_id.file_id()));
381 update_hit_miss(value, SST_META_TYPE)
382 })
383 }
384
385 pub fn put_parquet_meta_data(&self, file_id: RegionFileId, metadata: Arc<ParquetMetaData>) {
387 if let Some(cache) = &self.sst_meta_cache {
388 let key = SstMetaKey(file_id.region_id(), file_id.file_id());
389 CACHE_BYTES
390 .with_label_values(&[SST_META_TYPE])
391 .add(meta_cache_weight(&key, &metadata).into());
392 cache.insert(key, metadata);
393 }
394 }
395
396 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
398 if let Some(cache) = &self.sst_meta_cache {
399 cache.remove(&SstMetaKey(file_id.region_id(), file_id.file_id()));
400 }
401 }
402
403 pub fn get_repeated_vector(
405 &self,
406 data_type: &ConcreteDataType,
407 value: &Value,
408 ) -> Option<VectorRef> {
409 self.vector_cache.as_ref().and_then(|vector_cache| {
410 let value = vector_cache.get(&(data_type.clone(), value.clone()));
411 update_hit_miss(value, VECTOR_TYPE)
412 })
413 }
414
415 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
417 if let Some(cache) = &self.vector_cache {
418 let key = (vector.data_type(), value);
419 CACHE_BYTES
420 .with_label_values(&[VECTOR_TYPE])
421 .add(vector_cache_weight(&key, &vector).into());
422 cache.insert(key, vector);
423 }
424 }
425
426 pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
428 self.page_cache.as_ref().and_then(|page_cache| {
429 let value = page_cache.get(page_key);
430 update_hit_miss(value, PAGE_TYPE)
431 })
432 }
433
434 pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
436 if let Some(cache) = &self.page_cache {
437 CACHE_BYTES
438 .with_label_values(&[PAGE_TYPE])
439 .add(page_cache_weight(&page_key, &pages).into());
440 cache.insert(page_key, pages);
441 }
442 }
443
444 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
446 if let Some(cache) = &self.bloom_filter_index_cache {
447 cache.invalidate_file(file_id.file_id());
448 }
449
450 if let Some(cache) = &self.inverted_index_cache {
451 cache.invalidate_file(file_id.file_id());
452 }
453
454 if let Some(cache) = &self.index_result_cache {
455 cache.invalidate_file(file_id.file_id());
456 }
457
458 #[cfg(feature = "vector_index")]
459 if let Some(cache) = &self.vector_index_cache {
460 cache.invalidate_file(file_id.file_id());
461 }
462
463 if let Some(cache) = &self.puffin_metadata_cache {
464 cache.remove(&file_id.to_string());
465 }
466
467 if let Some(write_cache) = &self.write_cache {
468 write_cache
469 .remove(IndexKey::new(
470 file_id.region_id(),
471 file_id.file_id(),
472 FileType::Puffin(file_id.version),
473 ))
474 .await;
475 }
476 }
477
478 pub fn get_selector_result(
480 &self,
481 selector_key: &SelectorResultKey,
482 ) -> Option<Arc<SelectorResultValue>> {
483 self.selector_result_cache
484 .as_ref()
485 .and_then(|selector_result_cache| selector_result_cache.get(selector_key))
486 }
487
488 pub fn put_selector_result(
490 &self,
491 selector_key: SelectorResultKey,
492 result: Arc<SelectorResultValue>,
493 ) {
494 if let Some(cache) = &self.selector_result_cache {
495 CACHE_BYTES
496 .with_label_values(&[SELECTOR_RESULT_TYPE])
497 .add(selector_result_cache_weight(&selector_key, &result).into());
498 cache.insert(selector_key, result);
499 }
500 }
501
502 pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
504 self.write_cache.as_ref()
505 }
506
507 pub(crate) fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
508 self.inverted_index_cache.as_ref()
509 }
510
511 pub(crate) fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
512 self.bloom_filter_index_cache.as_ref()
513 }
514
515 #[cfg(feature = "vector_index")]
516 pub(crate) fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
517 self.vector_index_cache.as_ref()
518 }
519
520 pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
521 self.puffin_metadata_cache.as_ref()
522 }
523
524 pub(crate) fn index_result_cache(&self) -> Option<&IndexResultCache> {
525 self.index_result_cache.as_ref()
526 }
527}
528
529pub fn selector_result_cache_miss() {
531 CACHE_MISS.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
532}
533
534pub fn selector_result_cache_hit() {
536 CACHE_HIT.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
537}
538
539#[derive(Default)]
541pub struct CacheManagerBuilder {
542 sst_meta_cache_size: u64,
543 vector_cache_size: u64,
544 page_cache_size: u64,
545 index_metadata_size: u64,
546 index_content_size: u64,
547 index_content_page_size: u64,
548 index_result_cache_size: u64,
549 puffin_metadata_size: u64,
550 write_cache: Option<WriteCacheRef>,
551 selector_result_cache_size: u64,
552}
553
554impl CacheManagerBuilder {
555 pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
557 self.sst_meta_cache_size = bytes;
558 self
559 }
560
561 pub fn vector_cache_size(mut self, bytes: u64) -> Self {
563 self.vector_cache_size = bytes;
564 self
565 }
566
567 pub fn page_cache_size(mut self, bytes: u64) -> Self {
569 self.page_cache_size = bytes;
570 self
571 }
572
573 pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
575 self.write_cache = cache;
576 self
577 }
578
579 pub fn index_metadata_size(mut self, bytes: u64) -> Self {
581 self.index_metadata_size = bytes;
582 self
583 }
584
585 pub fn index_content_size(mut self, bytes: u64) -> Self {
587 self.index_content_size = bytes;
588 self
589 }
590
591 pub fn index_content_page_size(mut self, bytes: u64) -> Self {
593 self.index_content_page_size = bytes;
594 self
595 }
596
597 pub fn index_result_cache_size(mut self, bytes: u64) -> Self {
599 self.index_result_cache_size = bytes;
600 self
601 }
602
603 pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
605 self.puffin_metadata_size = bytes;
606 self
607 }
608
609 pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
611 self.selector_result_cache_size = bytes;
612 self
613 }
614
615 pub fn build(self) -> CacheManager {
617 fn to_str(cause: RemovalCause) -> &'static str {
618 match cause {
619 RemovalCause::Expired => "expired",
620 RemovalCause::Explicit => "explicit",
621 RemovalCause::Replaced => "replaced",
622 RemovalCause::Size => "size",
623 }
624 }
625
626 let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
627 Cache::builder()
628 .max_capacity(self.sst_meta_cache_size)
629 .weigher(meta_cache_weight)
630 .eviction_listener(|k, v, cause| {
631 let size = meta_cache_weight(&k, &v);
632 CACHE_BYTES
633 .with_label_values(&[SST_META_TYPE])
634 .sub(size.into());
635 CACHE_EVICTION
636 .with_label_values(&[SST_META_TYPE, to_str(cause)])
637 .inc();
638 })
639 .build()
640 });
641 let vector_cache = (self.vector_cache_size != 0).then(|| {
642 Cache::builder()
643 .max_capacity(self.vector_cache_size)
644 .weigher(vector_cache_weight)
645 .eviction_listener(|k, v, cause| {
646 let size = vector_cache_weight(&k, &v);
647 CACHE_BYTES
648 .with_label_values(&[VECTOR_TYPE])
649 .sub(size.into());
650 CACHE_EVICTION
651 .with_label_values(&[VECTOR_TYPE, to_str(cause)])
652 .inc();
653 })
654 .build()
655 });
656 let page_cache = (self.page_cache_size != 0).then(|| {
657 Cache::builder()
658 .max_capacity(self.page_cache_size)
659 .weigher(page_cache_weight)
660 .eviction_listener(|k, v, cause| {
661 let size = page_cache_weight(&k, &v);
662 CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
663 CACHE_EVICTION
664 .with_label_values(&[PAGE_TYPE, to_str(cause)])
665 .inc();
666 })
667 .build()
668 });
669 let inverted_index_cache = InvertedIndexCache::new(
670 self.index_metadata_size,
671 self.index_content_size,
672 self.index_content_page_size,
673 );
674 let bloom_filter_index_cache = BloomFilterIndexCache::new(
676 self.index_metadata_size,
677 self.index_content_size,
678 self.index_content_page_size,
679 );
680 #[cfg(feature = "vector_index")]
681 let vector_index_cache = (self.index_content_size != 0)
682 .then(|| Arc::new(VectorIndexCache::new(self.index_content_size)));
683 let index_result_cache = (self.index_result_cache_size != 0)
684 .then(|| IndexResultCache::new(self.index_result_cache_size));
685 let puffin_metadata_cache =
686 PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
687 let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
688 Cache::builder()
689 .max_capacity(self.selector_result_cache_size)
690 .weigher(selector_result_cache_weight)
691 .eviction_listener(|k, v, cause| {
692 let size = selector_result_cache_weight(&k, &v);
693 CACHE_BYTES
694 .with_label_values(&[SELECTOR_RESULT_TYPE])
695 .sub(size.into());
696 CACHE_EVICTION
697 .with_label_values(&[SELECTOR_RESULT_TYPE, to_str(cause)])
698 .inc();
699 })
700 .build()
701 });
702 CacheManager {
703 sst_meta_cache,
704 vector_cache,
705 page_cache,
706 write_cache: self.write_cache,
707 inverted_index_cache: Some(Arc::new(inverted_index_cache)),
708 bloom_filter_index_cache: Some(Arc::new(bloom_filter_index_cache)),
709 #[cfg(feature = "vector_index")]
710 vector_index_cache,
711 puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
712 selector_result_cache,
713 index_result_cache,
714 }
715 }
716}
717
718fn meta_cache_weight(k: &SstMetaKey, v: &Arc<ParquetMetaData>) -> u32 {
719 (k.estimated_size() + parquet_meta_size(v)) as u32
721}
722
723fn vector_cache_weight(_k: &(ConcreteDataType, Value), v: &VectorRef) -> u32 {
724 (mem::size_of::<ConcreteDataType>() + mem::size_of::<Value>() + v.memory_size()) as u32
726}
727
728fn page_cache_weight(k: &PageKey, v: &Arc<PageValue>) -> u32 {
729 (k.estimated_size() + v.estimated_size()) as u32
730}
731
732fn selector_result_cache_weight(k: &SelectorResultKey, v: &Arc<SelectorResultValue>) -> u32 {
733 (mem::size_of_val(k) + v.estimated_size()) as u32
734}
735
736fn update_hit_miss<T>(value: Option<T>, cache_type: &str) -> Option<T> {
738 if value.is_some() {
739 CACHE_HIT.with_label_values(&[cache_type]).inc();
740 } else {
741 CACHE_MISS.with_label_values(&[cache_type]).inc();
742 }
743 value
744}
745
746#[derive(Debug, Clone, PartialEq, Eq, Hash)]
748struct SstMetaKey(RegionId, FileId);
749
750impl SstMetaKey {
751 fn estimated_size(&self) -> usize {
753 mem::size_of::<Self>()
754 }
755}
756
757#[derive(Debug, Clone, PartialEq, Eq, Hash)]
759pub struct ColumnPagePath {
760 region_id: RegionId,
762 file_id: FileId,
764 row_group_idx: usize,
766 column_idx: usize,
768}
769
770#[derive(Debug, Clone, PartialEq, Eq, Hash)]
775pub struct PageKey {
776 file_id: FileId,
778 row_group_idx: usize,
780 ranges: Vec<Range<u64>>,
782}
783
784impl PageKey {
785 pub fn new(file_id: FileId, row_group_idx: usize, ranges: Vec<Range<u64>>) -> PageKey {
787 PageKey {
788 file_id,
789 row_group_idx,
790 ranges,
791 }
792 }
793
794 fn estimated_size(&self) -> usize {
796 mem::size_of::<Self>() + mem::size_of_val(self.ranges.as_slice())
797 }
798}
799
800#[derive(Default)]
803pub struct PageValue {
804 pub compressed: Vec<Bytes>,
806 pub page_size: u64,
808}
809
810impl PageValue {
811 pub fn new(bytes: Vec<Bytes>, page_size: u64) -> PageValue {
813 PageValue {
814 compressed: bytes,
815 page_size,
816 }
817 }
818
819 fn estimated_size(&self) -> usize {
821 mem::size_of::<Self>()
822 + self.page_size as usize
823 + self.compressed.iter().map(mem::size_of_val).sum::<usize>()
824 }
825}
826
827#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
829pub struct SelectorResultKey {
830 pub file_id: FileId,
832 pub row_group_idx: usize,
834 pub selector: TimeSeriesRowSelector,
836}
837
838pub enum SelectorResult {
840 PrimaryKey(Vec<Batch>),
842 Flat(Vec<RecordBatch>),
844}
845
846pub struct SelectorResultValue {
848 pub result: SelectorResult,
850 pub projection: Vec<usize>,
852}
853
854impl SelectorResultValue {
855 pub fn new(result: Vec<Batch>, projection: Vec<usize>) -> SelectorResultValue {
857 SelectorResultValue {
858 result: SelectorResult::PrimaryKey(result),
859 projection,
860 }
861 }
862
863 pub fn new_flat(result: Vec<RecordBatch>, projection: Vec<usize>) -> SelectorResultValue {
865 SelectorResultValue {
866 result: SelectorResult::Flat(result),
867 projection,
868 }
869 }
870
871 fn estimated_size(&self) -> usize {
873 match &self.result {
874 SelectorResult::PrimaryKey(batches) => {
875 batches.iter().map(|batch| batch.memory_size()).sum()
876 }
877 SelectorResult::Flat(batches) => batches.iter().map(record_batch_estimated_size).sum(),
878 }
879 }
880}
881
882type SstMetaCache = Cache<SstMetaKey, Arc<ParquetMetaData>>;
884type VectorCache = Cache<(ConcreteDataType, Value), VectorRef>;
888type PageCache = Cache<PageKey, Arc<PageValue>>;
890type SelectorResultCache = Cache<SelectorResultKey, Arc<SelectorResultValue>>;
892
893#[cfg(test)]
894mod tests {
895 use std::sync::Arc;
896
897 use api::v1::index::{BloomFilterMeta, InvertedIndexMetas};
898 use datatypes::vectors::Int64Vector;
899 use puffin::file_metadata::FileMetadata;
900 use store_api::storage::ColumnId;
901
902 use super::*;
903 use crate::cache::index::bloom_filter_index::Tag;
904 use crate::cache::index::result_cache::PredicateKey;
905 use crate::cache::test_util::parquet_meta;
906 use crate::sst::parquet::row_selection::RowGroupSelection;
907
908 #[tokio::test]
909 async fn test_disable_cache() {
910 let cache = CacheManager::default();
911 assert!(cache.sst_meta_cache.is_none());
912 assert!(cache.vector_cache.is_none());
913 assert!(cache.page_cache.is_none());
914
915 let region_id = RegionId::new(1, 1);
916 let file_id = RegionFileId::new(region_id, FileId::random());
917 let metadata = parquet_meta();
918 let mut metrics = MetadataCacheMetrics::default();
919 cache.put_parquet_meta_data(file_id, metadata);
920 assert!(
921 cache
922 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
923 .await
924 .is_none()
925 );
926
927 let value = Value::Int64(10);
928 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
929 cache.put_repeated_vector(value.clone(), vector.clone());
930 assert!(
931 cache
932 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
933 .is_none()
934 );
935
936 let key = PageKey::new(file_id.file_id(), 1, vec![Range { start: 0, end: 5 }]);
937 let pages = Arc::new(PageValue::default());
938 cache.put_pages(key.clone(), pages);
939 assert!(cache.get_pages(&key).is_none());
940
941 assert!(cache.write_cache().is_none());
942 }
943
944 #[tokio::test]
945 async fn test_parquet_meta_cache() {
946 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
947 let mut metrics = MetadataCacheMetrics::default();
948 let region_id = RegionId::new(1, 1);
949 let file_id = RegionFileId::new(region_id, FileId::random());
950 assert!(
951 cache
952 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
953 .await
954 .is_none()
955 );
956 let metadata = parquet_meta();
957 cache.put_parquet_meta_data(file_id, metadata);
958 assert!(
959 cache
960 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
961 .await
962 .is_some()
963 );
964 cache.remove_parquet_meta_data(file_id);
965 assert!(
966 cache
967 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
968 .await
969 .is_none()
970 );
971 }
972
973 #[test]
974 fn test_repeated_vector_cache() {
975 let cache = CacheManager::builder().vector_cache_size(4096).build();
976 let value = Value::Int64(10);
977 assert!(
978 cache
979 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
980 .is_none()
981 );
982 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
983 cache.put_repeated_vector(value.clone(), vector.clone());
984 let cached = cache
985 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
986 .unwrap();
987 assert_eq!(vector, cached);
988 }
989
990 #[test]
991 fn test_page_cache() {
992 let cache = CacheManager::builder().page_cache_size(1000).build();
993 let file_id = FileId::random();
994 let key = PageKey::new(file_id, 0, vec![(0..10), (10..20)]);
995 assert!(cache.get_pages(&key).is_none());
996 let pages = Arc::new(PageValue::default());
997 cache.put_pages(key.clone(), pages);
998 assert!(cache.get_pages(&key).is_some());
999 }
1000
1001 #[test]
1002 fn test_selector_result_cache() {
1003 let cache = CacheManager::builder()
1004 .selector_result_cache_size(1000)
1005 .build();
1006 let file_id = FileId::random();
1007 let key = SelectorResultKey {
1008 file_id,
1009 row_group_idx: 0,
1010 selector: TimeSeriesRowSelector::LastRow,
1011 };
1012 assert!(cache.get_selector_result(&key).is_none());
1013 let result = Arc::new(SelectorResultValue::new(Vec::new(), Vec::new()));
1014 cache.put_selector_result(key, result);
1015 assert!(cache.get_selector_result(&key).is_some());
1016 }
1017
1018 #[tokio::test]
1019 async fn test_evict_puffin_cache_clears_all_entries() {
1020 use std::collections::{BTreeMap, HashMap};
1021
1022 let cache = CacheManager::builder()
1023 .index_metadata_size(128)
1024 .index_content_size(128)
1025 .index_content_page_size(64)
1026 .index_result_cache_size(128)
1027 .puffin_metadata_size(128)
1028 .build();
1029 let cache = Arc::new(cache);
1030
1031 let region_id = RegionId::new(1, 1);
1032 let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
1033 let column_id: ColumnId = 1;
1034
1035 let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
1036 let inverted_cache = cache.inverted_index_cache().unwrap().clone();
1037 let result_cache = cache.index_result_cache().unwrap();
1038 let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();
1039
1040 let bloom_key = (
1041 index_id.file_id(),
1042 index_id.version,
1043 column_id,
1044 Tag::Skipping,
1045 );
1046 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1047 inverted_cache.put_metadata(
1048 (index_id.file_id(), index_id.version),
1049 Arc::new(InvertedIndexMetas::default()),
1050 );
1051 let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
1052 let selection = Arc::new(RowGroupSelection::default());
1053 result_cache.put(predicate.clone(), index_id.file_id(), selection);
1054 let file_id_str = index_id.to_string();
1055 let metadata = Arc::new(FileMetadata {
1056 blobs: Vec::new(),
1057 properties: HashMap::new(),
1058 });
1059 puffin_metadata_cache.put_metadata(file_id_str.clone(), metadata);
1060
1061 assert!(bloom_cache.get_metadata(bloom_key).is_some());
1062 assert!(
1063 inverted_cache
1064 .get_metadata((index_id.file_id(), index_id.version))
1065 .is_some()
1066 );
1067 assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
1068 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());
1069
1070 cache.evict_puffin_cache(index_id).await;
1071
1072 assert!(bloom_cache.get_metadata(bloom_key).is_none());
1073 assert!(
1074 inverted_cache
1075 .get_metadata((index_id.file_id(), index_id.version))
1076 .is_none()
1077 );
1078 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
1079 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
1080
1081 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1083 inverted_cache.put_metadata(
1084 (index_id.file_id(), index_id.version),
1085 Arc::new(InvertedIndexMetas::default()),
1086 );
1087 result_cache.put(
1088 predicate.clone(),
1089 index_id.file_id(),
1090 Arc::new(RowGroupSelection::default()),
1091 );
1092 puffin_metadata_cache.put_metadata(
1093 file_id_str.clone(),
1094 Arc::new(FileMetadata {
1095 blobs: Vec::new(),
1096 properties: HashMap::new(),
1097 }),
1098 );
1099
1100 let strategy = CacheStrategy::EnableAll(cache.clone());
1101 strategy.evict_puffin_cache(index_id).await;
1102
1103 assert!(bloom_cache.get_metadata(bloom_key).is_none());
1104 assert!(
1105 inverted_cache
1106 .get_metadata((index_id.file_id(), index_id.version))
1107 .is_none()
1108 );
1109 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
1110 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
1111 }
1112}