1pub(crate) mod cache_size;
18
19pub(crate) mod file_cache;
20pub(crate) mod index;
21pub(crate) mod manifest_cache;
22#[cfg(test)]
23pub(crate) mod test_util;
24pub(crate) mod write_cache;
25
26use std::mem;
27use std::ops::Range;
28use std::sync::Arc;
29
30use bytes::Bytes;
31use common_telemetry::warn;
32use datatypes::arrow::record_batch::RecordBatch;
33use datatypes::value::Value;
34use datatypes::vectors::VectorRef;
35use index::bloom_filter_index::{BloomFilterIndexCache, BloomFilterIndexCacheRef};
36use index::result_cache::IndexResultCache;
37use moka::notification::RemovalCause;
38use moka::sync::Cache;
39use object_store::ObjectStore;
40use parquet::file::metadata::{FileMetaData, PageIndexPolicy, ParquetMetaData};
41use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef};
42use snafu::{OptionExt, ResultExt};
43use store_api::metadata::RegionMetadataRef;
44use store_api::storage::{ConcreteDataType, FileId, RegionId, TimeSeriesRowSelector};
45
46use crate::cache::cache_size::parquet_meta_size;
47use crate::cache::file_cache::{FileType, IndexKey};
48use crate::cache::index::inverted_index::{InvertedIndexCache, InvertedIndexCacheRef};
49#[cfg(feature = "vector_index")]
50use crate::cache::index::vector_index::{VectorIndexCache, VectorIndexCacheRef};
51use crate::cache::write_cache::WriteCacheRef;
52use crate::error::{InvalidMetadataSnafu, InvalidParquetSnafu, Result};
53use crate::memtable::record_batch_estimated_size;
54use crate::metrics::{CACHE_BYTES, CACHE_EVICTION, CACHE_HIT, CACHE_MISS};
55use crate::read::Batch;
56use crate::read::range_cache::{RangeScanCacheKey, RangeScanCacheValue};
57use crate::sst::file::{RegionFileId, RegionIndexId};
58use crate::sst::parquet::PARQUET_METADATA_KEY;
59use crate::sst::parquet::reader::MetadataCacheMetrics;
60
61const SST_META_TYPE: &str = "sst_meta";
63const VECTOR_TYPE: &str = "vector";
65const PAGE_TYPE: &str = "page";
67const FILE_TYPE: &str = "file";
69const INDEX_TYPE: &str = "index";
71const SELECTOR_RESULT_TYPE: &str = "selector_result";
73const RANGE_RESULT_TYPE: &str = "range_result";
75
76#[derive(Debug)]
81pub(crate) struct CachedSstMeta {
82 parquet_metadata: Arc<ParquetMetaData>,
83 region_metadata: RegionMetadataRef,
84 region_metadata_weight: usize,
85}
86
87impl CachedSstMeta {
88 pub(crate) fn try_new(file_path: &str, parquet_metadata: ParquetMetaData) -> Result<Self> {
89 Self::try_new_with_region_metadata(file_path, parquet_metadata, None)
90 }
91
92 pub(crate) fn try_new_with_region_metadata(
93 file_path: &str,
94 parquet_metadata: ParquetMetaData,
95 region_metadata: Option<RegionMetadataRef>,
96 ) -> Result<Self> {
97 let file_metadata = parquet_metadata.file_metadata();
98 let key_values = file_metadata
99 .key_value_metadata()
100 .context(InvalidParquetSnafu {
101 file: file_path,
102 reason: "missing key value meta",
103 })?;
104 let meta_value = key_values
105 .iter()
106 .find(|kv| kv.key == PARQUET_METADATA_KEY)
107 .with_context(|| InvalidParquetSnafu {
108 file: file_path,
109 reason: format!("key {} not found", PARQUET_METADATA_KEY),
110 })?;
111 let json = meta_value
112 .value
113 .as_ref()
114 .with_context(|| InvalidParquetSnafu {
115 file: file_path,
116 reason: format!("No value for key {}", PARQUET_METADATA_KEY),
117 })?;
118 let region_metadata = match region_metadata {
119 Some(region_metadata) => region_metadata,
120 None => Arc::new(
121 store_api::metadata::RegionMetadata::from_json(json)
122 .context(InvalidMetadataSnafu)?,
123 ),
124 };
125 let region_metadata_weight = region_metadata.estimated_size().max(json.len());
127 let parquet_metadata = Arc::new(strip_region_metadata_from_parquet(parquet_metadata));
128
129 Ok(Self {
130 parquet_metadata,
131 region_metadata,
132 region_metadata_weight,
133 })
134 }
135
136 pub(crate) fn parquet_metadata(&self) -> Arc<ParquetMetaData> {
137 self.parquet_metadata.clone()
138 }
139
140 pub(crate) fn region_metadata(&self) -> RegionMetadataRef {
141 self.region_metadata.clone()
142 }
143}
144
145fn strip_region_metadata_from_parquet(parquet_metadata: ParquetMetaData) -> ParquetMetaData {
146 let file_metadata = parquet_metadata.file_metadata();
147 let filtered_key_values = file_metadata.key_value_metadata().and_then(|key_values| {
148 let filtered = key_values
149 .iter()
150 .filter(|kv| kv.key != PARQUET_METADATA_KEY)
151 .cloned()
152 .collect::<Vec<_>>();
153 (!filtered.is_empty()).then_some(filtered)
154 });
155 let stripped_file_metadata = FileMetaData::new(
156 file_metadata.version(),
157 file_metadata.num_rows(),
158 file_metadata.created_by().map(ToString::to_string),
159 filtered_key_values,
160 file_metadata.schema_descr_ptr(),
161 file_metadata.column_orders().cloned(),
162 );
163
164 let mut builder = parquet_metadata.into_builder();
165 let row_groups = builder.take_row_groups();
166 let column_index = builder.take_column_index();
167 let offset_index = builder.take_offset_index();
168
169 parquet::file::metadata::ParquetMetaDataBuilder::new(stripped_file_metadata)
170 .set_row_groups(row_groups)
171 .set_column_index(column_index)
172 .set_offset_index(offset_index)
173 .build()
174}
175
176#[derive(Clone)]
178pub enum CacheStrategy {
179 EnableAll(CacheManagerRef),
182 Compaction(CacheManagerRef),
187 Disabled,
189}
190
191impl CacheStrategy {
192 pub(crate) async fn get_sst_meta_data(
194 &self,
195 file_id: RegionFileId,
196 metrics: &mut MetadataCacheMetrics,
197 page_index_policy: PageIndexPolicy,
198 ) -> Option<Arc<CachedSstMeta>> {
199 match self {
200 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
201 cache_manager
202 .get_sst_meta_data(file_id, metrics, page_index_policy)
203 .await
204 }
205 CacheStrategy::Disabled => {
206 metrics.cache_miss += 1;
207 None
208 }
209 }
210 }
211
212 pub(crate) fn get_sst_meta_data_from_mem_cache(
214 &self,
215 file_id: RegionFileId,
216 ) -> Option<Arc<CachedSstMeta>> {
217 match self {
218 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
219 cache_manager.get_sst_meta_data_from_mem_cache(file_id)
220 }
221 CacheStrategy::Disabled => None,
222 }
223 }
224
225 pub fn get_parquet_meta_data_from_mem_cache(
227 &self,
228 file_id: RegionFileId,
229 ) -> Option<Arc<ParquetMetaData>> {
230 self.get_sst_meta_data_from_mem_cache(file_id)
231 .map(|metadata| metadata.parquet_metadata())
232 }
233
234 pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
236 match self {
237 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
238 cache_manager.put_sst_meta_data(file_id, metadata);
239 }
240 CacheStrategy::Disabled => {}
241 }
242 }
243
244 pub fn put_parquet_meta_data(
246 &self,
247 file_id: RegionFileId,
248 metadata: Arc<ParquetMetaData>,
249 region_metadata: Option<RegionMetadataRef>,
250 ) {
251 match self {
252 CacheStrategy::EnableAll(cache_manager) | CacheStrategy::Compaction(cache_manager) => {
253 cache_manager.put_parquet_meta_data(file_id, metadata, region_metadata);
254 }
255 CacheStrategy::Disabled => {}
256 }
257 }
258
259 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
261 match self {
262 CacheStrategy::EnableAll(cache_manager) => {
263 cache_manager.remove_parquet_meta_data(file_id);
264 }
265 CacheStrategy::Compaction(cache_manager) => {
266 cache_manager.remove_parquet_meta_data(file_id);
267 }
268 CacheStrategy::Disabled => {}
269 }
270 }
271
272 pub fn get_repeated_vector(
275 &self,
276 data_type: &ConcreteDataType,
277 value: &Value,
278 ) -> Option<VectorRef> {
279 match self {
280 CacheStrategy::EnableAll(cache_manager) => {
281 cache_manager.get_repeated_vector(data_type, value)
282 }
283 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
284 }
285 }
286
287 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
290 if let CacheStrategy::EnableAll(cache_manager) = self {
291 cache_manager.put_repeated_vector(value, vector);
292 }
293 }
294
295 pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
298 match self {
299 CacheStrategy::EnableAll(cache_manager) => cache_manager.get_pages(page_key),
300 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
301 }
302 }
303
304 pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
307 if let CacheStrategy::EnableAll(cache_manager) = self {
308 cache_manager.put_pages(page_key, pages);
309 }
310 }
311
312 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
314 match self {
315 CacheStrategy::EnableAll(cache_manager) => {
316 cache_manager.evict_puffin_cache(file_id).await
317 }
318 CacheStrategy::Compaction(cache_manager) => {
319 cache_manager.evict_puffin_cache(file_id).await
320 }
321 CacheStrategy::Disabled => {}
322 }
323 }
324
325 pub fn get_selector_result(
328 &self,
329 selector_key: &SelectorResultKey,
330 ) -> Option<Arc<SelectorResultValue>> {
331 match self {
332 CacheStrategy::EnableAll(cache_manager) => {
333 cache_manager.get_selector_result(selector_key)
334 }
335 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
336 }
337 }
338
339 pub fn put_selector_result(
342 &self,
343 selector_key: SelectorResultKey,
344 result: Arc<SelectorResultValue>,
345 ) {
346 if let CacheStrategy::EnableAll(cache_manager) = self {
347 cache_manager.put_selector_result(selector_key, result);
348 }
349 }
350
351 #[cfg_attr(not(test), allow(dead_code))]
354 pub(crate) fn get_range_result(
355 &self,
356 key: &RangeScanCacheKey,
357 ) -> Option<Arc<RangeScanCacheValue>> {
358 match self {
359 CacheStrategy::EnableAll(cache_manager) => cache_manager.get_range_result(key),
360 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
361 }
362 }
363
364 #[cfg_attr(not(test), allow(dead_code))]
367 pub(crate) fn put_range_result(
368 &self,
369 key: RangeScanCacheKey,
370 result: Arc<RangeScanCacheValue>,
371 ) {
372 if let CacheStrategy::EnableAll(cache_manager) = self {
373 cache_manager.put_range_result(key, result);
374 }
375 }
376
377 pub fn write_cache(&self) -> Option<&WriteCacheRef> {
380 match self {
381 CacheStrategy::EnableAll(cache_manager) => cache_manager.write_cache(),
382 CacheStrategy::Compaction(cache_manager) => cache_manager.write_cache(),
383 CacheStrategy::Disabled => None,
384 }
385 }
386
387 pub fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
390 match self {
391 CacheStrategy::EnableAll(cache_manager) => cache_manager.inverted_index_cache(),
392 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
393 }
394 }
395
396 pub fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
399 match self {
400 CacheStrategy::EnableAll(cache_manager) => cache_manager.bloom_filter_index_cache(),
401 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
402 }
403 }
404
405 #[cfg(feature = "vector_index")]
408 pub fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
409 match self {
410 CacheStrategy::EnableAll(cache_manager) => cache_manager.vector_index_cache(),
411 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
412 }
413 }
414
415 pub fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
418 match self {
419 CacheStrategy::EnableAll(cache_manager) => cache_manager.puffin_metadata_cache(),
420 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
421 }
422 }
423
424 pub fn index_result_cache(&self) -> Option<&IndexResultCache> {
427 match self {
428 CacheStrategy::EnableAll(cache_manager) => cache_manager.index_result_cache(),
429 CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
430 }
431 }
432
433 pub fn maybe_download_background(
435 &self,
436 index_key: IndexKey,
437 remote_path: String,
438 remote_store: ObjectStore,
439 file_size: u64,
440 ) {
441 if let CacheStrategy::EnableAll(cache_manager) = self
442 && let Some(write_cache) = cache_manager.write_cache()
443 {
444 write_cache.file_cache().maybe_download_background(
445 index_key,
446 remote_path,
447 remote_store,
448 file_size,
449 );
450 }
451 }
452}
453
454#[derive(Default)]
458pub struct CacheManager {
459 sst_meta_cache: Option<SstMetaCache>,
461 vector_cache: Option<VectorCache>,
463 page_cache: Option<PageCache>,
465 write_cache: Option<WriteCacheRef>,
467 inverted_index_cache: Option<InvertedIndexCacheRef>,
469 bloom_filter_index_cache: Option<BloomFilterIndexCacheRef>,
471 #[cfg(feature = "vector_index")]
473 vector_index_cache: Option<VectorIndexCacheRef>,
474 puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
476 selector_result_cache: Option<SelectorResultCache>,
478 #[cfg_attr(not(test), allow(dead_code))]
480 range_result_cache: Option<RangeResultCache>,
481 index_result_cache: Option<IndexResultCache>,
483}
484
485pub type CacheManagerRef = Arc<CacheManager>;
486
487impl CacheManager {
488 pub fn builder() -> CacheManagerBuilder {
490 CacheManagerBuilder::default()
491 }
492
493 pub(crate) async fn get_sst_meta_data(
496 &self,
497 file_id: RegionFileId,
498 metrics: &mut MetadataCacheMetrics,
499 page_index_policy: PageIndexPolicy,
500 ) -> Option<Arc<CachedSstMeta>> {
501 if let Some(metadata) = self.get_sst_meta_data_from_mem_cache(file_id) {
502 metrics.mem_cache_hit += 1;
503 return Some(metadata);
504 }
505
506 let key = IndexKey::new(file_id.region_id(), file_id.file_id(), FileType::Parquet);
507 if let Some(write_cache) = &self.write_cache
508 && let Some(metadata) = write_cache
509 .file_cache()
510 .get_sst_meta_data(key, metrics, page_index_policy)
511 .await
512 {
513 metrics.file_cache_hit += 1;
514 self.put_sst_meta_data(file_id, metadata.clone());
515 return Some(metadata);
516 }
517
518 metrics.cache_miss += 1;
519 None
520 }
521
522 pub(crate) async fn get_parquet_meta_data(
525 &self,
526 file_id: RegionFileId,
527 metrics: &mut MetadataCacheMetrics,
528 page_index_policy: PageIndexPolicy,
529 ) -> Option<Arc<ParquetMetaData>> {
530 self.get_sst_meta_data(file_id, metrics, page_index_policy)
531 .await
532 .map(|metadata| metadata.parquet_metadata())
533 }
534
535 pub(crate) fn get_sst_meta_data_from_mem_cache(
538 &self,
539 file_id: RegionFileId,
540 ) -> Option<Arc<CachedSstMeta>> {
541 self.sst_meta_cache.as_ref().and_then(|sst_meta_cache| {
542 let value = sst_meta_cache.get(&SstMetaKey(file_id.region_id(), file_id.file_id()));
543 update_hit_miss(value, SST_META_TYPE)
544 })
545 }
546
547 pub fn get_parquet_meta_data_from_mem_cache(
550 &self,
551 file_id: RegionFileId,
552 ) -> Option<Arc<ParquetMetaData>> {
553 self.get_sst_meta_data_from_mem_cache(file_id)
554 .map(|metadata| metadata.parquet_metadata())
555 }
556
557 pub(crate) fn put_sst_meta_data(&self, file_id: RegionFileId, metadata: Arc<CachedSstMeta>) {
559 if let Some(cache) = &self.sst_meta_cache {
560 let key = SstMetaKey(file_id.region_id(), file_id.file_id());
561 CACHE_BYTES
562 .with_label_values(&[SST_META_TYPE])
563 .add(meta_cache_weight(&key, &metadata).into());
564 cache.insert(key, metadata);
565 }
566 }
567
568 pub fn put_parquet_meta_data(
570 &self,
571 file_id: RegionFileId,
572 metadata: Arc<ParquetMetaData>,
573 region_metadata: Option<RegionMetadataRef>,
574 ) {
575 if self.sst_meta_cache.is_some() {
576 let file_path = format!(
577 "region_id={}, file_id={}",
578 file_id.region_id(),
579 file_id.file_id()
580 );
581 match CachedSstMeta::try_new_with_region_metadata(
582 &file_path,
583 Arc::unwrap_or_clone(metadata),
584 region_metadata,
585 ) {
586 Ok(metadata) => self.put_sst_meta_data(file_id, Arc::new(metadata)),
587 Err(err) => warn!(
588 err; "Failed to decode region metadata while caching parquet metadata, region_id: {}, file_id: {}",
589 file_id.region_id(),
590 file_id.file_id()
591 ),
592 }
593 }
594 }
595
596 pub fn remove_parquet_meta_data(&self, file_id: RegionFileId) {
598 if let Some(cache) = &self.sst_meta_cache {
599 cache.remove(&SstMetaKey(file_id.region_id(), file_id.file_id()));
600 }
601 }
602
603 pub(crate) fn sst_meta_cache_weighted_size(&self) -> u64 {
605 self.sst_meta_cache
606 .as_ref()
607 .map(|cache| cache.weighted_size())
608 .unwrap_or(0)
609 }
610
611 pub(crate) fn sst_meta_cache_enabled(&self) -> bool {
613 self.sst_meta_cache.is_some()
614 }
615
616 pub fn get_repeated_vector(
618 &self,
619 data_type: &ConcreteDataType,
620 value: &Value,
621 ) -> Option<VectorRef> {
622 self.vector_cache.as_ref().and_then(|vector_cache| {
623 let value = vector_cache.get(&(data_type.clone(), value.clone()));
624 update_hit_miss(value, VECTOR_TYPE)
625 })
626 }
627
628 pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
630 if let Some(cache) = &self.vector_cache {
631 let key = (vector.data_type(), value);
632 CACHE_BYTES
633 .with_label_values(&[VECTOR_TYPE])
634 .add(vector_cache_weight(&key, &vector).into());
635 cache.insert(key, vector);
636 }
637 }
638
639 pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
641 self.page_cache.as_ref().and_then(|page_cache| {
642 let value = page_cache.get(page_key);
643 update_hit_miss(value, PAGE_TYPE)
644 })
645 }
646
647 pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
649 if let Some(cache) = &self.page_cache {
650 CACHE_BYTES
651 .with_label_values(&[PAGE_TYPE])
652 .add(page_cache_weight(&page_key, &pages).into());
653 cache.insert(page_key, pages);
654 }
655 }
656
657 pub async fn evict_puffin_cache(&self, file_id: RegionIndexId) {
659 if let Some(cache) = &self.bloom_filter_index_cache {
660 cache.invalidate_file(file_id.file_id());
661 }
662
663 if let Some(cache) = &self.inverted_index_cache {
664 cache.invalidate_file(file_id.file_id());
665 }
666
667 if let Some(cache) = &self.index_result_cache {
668 cache.invalidate_file(file_id.file_id());
669 }
670
671 #[cfg(feature = "vector_index")]
672 if let Some(cache) = &self.vector_index_cache {
673 cache.invalidate_file(file_id.file_id());
674 }
675
676 if let Some(cache) = &self.puffin_metadata_cache {
677 cache.remove(&file_id.to_string());
678 }
679
680 if let Some(write_cache) = &self.write_cache {
681 write_cache
682 .remove(IndexKey::new(
683 file_id.region_id(),
684 file_id.file_id(),
685 FileType::Puffin(file_id.version),
686 ))
687 .await;
688 }
689 }
690
691 pub fn get_selector_result(
693 &self,
694 selector_key: &SelectorResultKey,
695 ) -> Option<Arc<SelectorResultValue>> {
696 self.selector_result_cache
697 .as_ref()
698 .and_then(|selector_result_cache| selector_result_cache.get(selector_key))
699 }
700
701 pub fn put_selector_result(
703 &self,
704 selector_key: SelectorResultKey,
705 result: Arc<SelectorResultValue>,
706 ) {
707 if let Some(cache) = &self.selector_result_cache {
708 CACHE_BYTES
709 .with_label_values(&[SELECTOR_RESULT_TYPE])
710 .add(selector_result_cache_weight(&selector_key, &result).into());
711 cache.insert(selector_key, result);
712 }
713 }
714
715 #[cfg_attr(not(test), allow(dead_code))]
717 pub(crate) fn get_range_result(
718 &self,
719 key: &RangeScanCacheKey,
720 ) -> Option<Arc<RangeScanCacheValue>> {
721 self.range_result_cache
722 .as_ref()
723 .and_then(|cache| update_hit_miss(cache.get(key), RANGE_RESULT_TYPE))
724 }
725
726 #[cfg_attr(not(test), allow(dead_code))]
728 pub(crate) fn put_range_result(
729 &self,
730 key: RangeScanCacheKey,
731 result: Arc<RangeScanCacheValue>,
732 ) {
733 if let Some(cache) = &self.range_result_cache {
734 CACHE_BYTES
735 .with_label_values(&[RANGE_RESULT_TYPE])
736 .add(range_result_cache_weight(&key, &result).into());
737 cache.insert(key, result);
738 }
739 }
740
741 pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
743 self.write_cache.as_ref()
744 }
745
746 pub(crate) fn inverted_index_cache(&self) -> Option<&InvertedIndexCacheRef> {
747 self.inverted_index_cache.as_ref()
748 }
749
750 pub(crate) fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
751 self.bloom_filter_index_cache.as_ref()
752 }
753
754 #[cfg(feature = "vector_index")]
755 pub(crate) fn vector_index_cache(&self) -> Option<&VectorIndexCacheRef> {
756 self.vector_index_cache.as_ref()
757 }
758
759 pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
760 self.puffin_metadata_cache.as_ref()
761 }
762
763 pub(crate) fn index_result_cache(&self) -> Option<&IndexResultCache> {
764 self.index_result_cache.as_ref()
765 }
766}
767
768pub fn selector_result_cache_miss() {
770 CACHE_MISS.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
771}
772
773pub fn selector_result_cache_hit() {
775 CACHE_HIT.with_label_values(&[SELECTOR_RESULT_TYPE]).inc()
776}
777
778#[derive(Default)]
780pub struct CacheManagerBuilder {
781 sst_meta_cache_size: u64,
782 vector_cache_size: u64,
783 page_cache_size: u64,
784 index_metadata_size: u64,
785 index_content_size: u64,
786 index_content_page_size: u64,
787 index_result_cache_size: u64,
788 puffin_metadata_size: u64,
789 write_cache: Option<WriteCacheRef>,
790 selector_result_cache_size: u64,
791 range_result_cache_size: u64,
792}
793
794impl CacheManagerBuilder {
795 pub fn sst_meta_cache_size(mut self, bytes: u64) -> Self {
797 self.sst_meta_cache_size = bytes;
798 self
799 }
800
801 pub fn vector_cache_size(mut self, bytes: u64) -> Self {
803 self.vector_cache_size = bytes;
804 self
805 }
806
807 pub fn page_cache_size(mut self, bytes: u64) -> Self {
809 self.page_cache_size = bytes;
810 self
811 }
812
813 pub fn write_cache(mut self, cache: Option<WriteCacheRef>) -> Self {
815 self.write_cache = cache;
816 self
817 }
818
819 pub fn index_metadata_size(mut self, bytes: u64) -> Self {
821 self.index_metadata_size = bytes;
822 self
823 }
824
825 pub fn index_content_size(mut self, bytes: u64) -> Self {
827 self.index_content_size = bytes;
828 self
829 }
830
831 pub fn index_content_page_size(mut self, bytes: u64) -> Self {
833 self.index_content_page_size = bytes;
834 self
835 }
836
837 pub fn index_result_cache_size(mut self, bytes: u64) -> Self {
839 self.index_result_cache_size = bytes;
840 self
841 }
842
843 pub fn puffin_metadata_size(mut self, bytes: u64) -> Self {
845 self.puffin_metadata_size = bytes;
846 self
847 }
848
849 pub fn selector_result_cache_size(mut self, bytes: u64) -> Self {
851 self.selector_result_cache_size = bytes;
852 self
853 }
854
855 pub fn range_result_cache_size(mut self, bytes: u64) -> Self {
857 self.range_result_cache_size = bytes;
858 self
859 }
860
861 pub fn build(self) -> CacheManager {
863 fn to_str(cause: RemovalCause) -> &'static str {
864 match cause {
865 RemovalCause::Expired => "expired",
866 RemovalCause::Explicit => "explicit",
867 RemovalCause::Replaced => "replaced",
868 RemovalCause::Size => "size",
869 }
870 }
871
872 let sst_meta_cache = (self.sst_meta_cache_size != 0).then(|| {
873 Cache::builder()
874 .max_capacity(self.sst_meta_cache_size)
875 .weigher(meta_cache_weight)
876 .eviction_listener(|k, v, cause| {
877 let size = meta_cache_weight(&k, &v);
878 CACHE_BYTES
879 .with_label_values(&[SST_META_TYPE])
880 .sub(size.into());
881 CACHE_EVICTION
882 .with_label_values(&[SST_META_TYPE, to_str(cause)])
883 .inc();
884 })
885 .build()
886 });
887 let vector_cache = (self.vector_cache_size != 0).then(|| {
888 Cache::builder()
889 .max_capacity(self.vector_cache_size)
890 .weigher(vector_cache_weight)
891 .eviction_listener(|k, v, cause| {
892 let size = vector_cache_weight(&k, &v);
893 CACHE_BYTES
894 .with_label_values(&[VECTOR_TYPE])
895 .sub(size.into());
896 CACHE_EVICTION
897 .with_label_values(&[VECTOR_TYPE, to_str(cause)])
898 .inc();
899 })
900 .build()
901 });
902 let page_cache = (self.page_cache_size != 0).then(|| {
903 Cache::builder()
904 .max_capacity(self.page_cache_size)
905 .weigher(page_cache_weight)
906 .eviction_listener(|k, v, cause| {
907 let size = page_cache_weight(&k, &v);
908 CACHE_BYTES.with_label_values(&[PAGE_TYPE]).sub(size.into());
909 CACHE_EVICTION
910 .with_label_values(&[PAGE_TYPE, to_str(cause)])
911 .inc();
912 })
913 .build()
914 });
915 let inverted_index_cache = InvertedIndexCache::new(
916 self.index_metadata_size,
917 self.index_content_size,
918 self.index_content_page_size,
919 );
920 let bloom_filter_index_cache = BloomFilterIndexCache::new(
922 self.index_metadata_size,
923 self.index_content_size,
924 self.index_content_page_size,
925 );
926 #[cfg(feature = "vector_index")]
927 let vector_index_cache = (self.index_content_size != 0)
928 .then(|| Arc::new(VectorIndexCache::new(self.index_content_size)));
929 let index_result_cache = (self.index_result_cache_size != 0)
930 .then(|| IndexResultCache::new(self.index_result_cache_size));
931 let puffin_metadata_cache =
932 PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES);
933 let selector_result_cache = (self.selector_result_cache_size != 0).then(|| {
934 Cache::builder()
935 .max_capacity(self.selector_result_cache_size)
936 .weigher(selector_result_cache_weight)
937 .eviction_listener(|k, v, cause| {
938 let size = selector_result_cache_weight(&k, &v);
939 CACHE_BYTES
940 .with_label_values(&[SELECTOR_RESULT_TYPE])
941 .sub(size.into());
942 CACHE_EVICTION
943 .with_label_values(&[SELECTOR_RESULT_TYPE, to_str(cause)])
944 .inc();
945 })
946 .build()
947 });
948 let range_result_cache = (self.range_result_cache_size != 0).then(|| {
949 Cache::builder()
950 .max_capacity(self.range_result_cache_size)
951 .weigher(range_result_cache_weight)
952 .eviction_listener(|k, v, cause| {
953 let size = range_result_cache_weight(&k, &v);
954 CACHE_BYTES
955 .with_label_values(&[RANGE_RESULT_TYPE])
956 .sub(size.into());
957 CACHE_EVICTION
958 .with_label_values(&[RANGE_RESULT_TYPE, to_str(cause)])
959 .inc();
960 })
961 .build()
962 });
963 CacheManager {
964 sst_meta_cache,
965 vector_cache,
966 page_cache,
967 write_cache: self.write_cache,
968 inverted_index_cache: Some(Arc::new(inverted_index_cache)),
969 bloom_filter_index_cache: Some(Arc::new(bloom_filter_index_cache)),
970 #[cfg(feature = "vector_index")]
971 vector_index_cache,
972 puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)),
973 selector_result_cache,
974 range_result_cache,
975 index_result_cache,
976 }
977 }
978}
979
980fn meta_cache_weight(k: &SstMetaKey, v: &Arc<CachedSstMeta>) -> u32 {
981 (k.estimated_size() + parquet_meta_size(&v.parquet_metadata) + v.region_metadata_weight) as u32
983}
984
985fn vector_cache_weight(_k: &(ConcreteDataType, Value), v: &VectorRef) -> u32 {
986 (mem::size_of::<ConcreteDataType>() + mem::size_of::<Value>() + v.memory_size()) as u32
988}
989
990fn page_cache_weight(k: &PageKey, v: &Arc<PageValue>) -> u32 {
991 (k.estimated_size() + v.estimated_size()) as u32
992}
993
994fn selector_result_cache_weight(k: &SelectorResultKey, v: &Arc<SelectorResultValue>) -> u32 {
995 (mem::size_of_val(k) + v.estimated_size()) as u32
996}
997
998fn range_result_cache_weight(k: &RangeScanCacheKey, v: &Arc<RangeScanCacheValue>) -> u32 {
999 (k.estimated_size() + v.estimated_size()) as u32
1000}
1001
1002fn update_hit_miss<T>(value: Option<T>, cache_type: &str) -> Option<T> {
1004 if value.is_some() {
1005 CACHE_HIT.with_label_values(&[cache_type]).inc();
1006 } else {
1007 CACHE_MISS.with_label_values(&[cache_type]).inc();
1008 }
1009 value
1010}
1011
1012#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1014struct SstMetaKey(RegionId, FileId);
1015
1016impl SstMetaKey {
1017 fn estimated_size(&self) -> usize {
1019 mem::size_of::<Self>()
1020 }
1021}
1022
1023#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1025pub struct ColumnPagePath {
1026 region_id: RegionId,
1028 file_id: FileId,
1030 row_group_idx: usize,
1032 column_idx: usize,
1034}
1035
1036#[derive(Debug, Clone, PartialEq, Eq, Hash)]
1041pub struct PageKey {
1042 file_id: FileId,
1044 row_group_idx: usize,
1046 ranges: Vec<Range<u64>>,
1048}
1049
1050impl PageKey {
1051 pub fn new(file_id: FileId, row_group_idx: usize, ranges: Vec<Range<u64>>) -> PageKey {
1053 PageKey {
1054 file_id,
1055 row_group_idx,
1056 ranges,
1057 }
1058 }
1059
1060 fn estimated_size(&self) -> usize {
1062 mem::size_of::<Self>() + mem::size_of_val(self.ranges.as_slice())
1063 }
1064}
1065
1066#[derive(Default)]
1069pub struct PageValue {
1070 pub compressed: Vec<Bytes>,
1072 pub page_size: u64,
1074}
1075
1076impl PageValue {
1077 pub fn new(bytes: Vec<Bytes>, page_size: u64) -> PageValue {
1079 PageValue {
1080 compressed: bytes,
1081 page_size,
1082 }
1083 }
1084
1085 fn estimated_size(&self) -> usize {
1087 mem::size_of::<Self>()
1088 + self.page_size as usize
1089 + self.compressed.iter().map(mem::size_of_val).sum::<usize>()
1090 }
1091}
1092
1093#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1095pub struct SelectorResultKey {
1096 pub file_id: FileId,
1098 pub row_group_idx: usize,
1100 pub selector: TimeSeriesRowSelector,
1102}
1103
1104pub enum SelectorResult {
1106 PrimaryKey(Vec<Batch>),
1108 Flat(Vec<RecordBatch>),
1110}
1111
1112pub struct SelectorResultValue {
1114 pub result: SelectorResult,
1116 pub projection: Vec<usize>,
1118}
1119
1120impl SelectorResultValue {
1121 pub fn new(result: Vec<Batch>, projection: Vec<usize>) -> SelectorResultValue {
1123 SelectorResultValue {
1124 result: SelectorResult::PrimaryKey(result),
1125 projection,
1126 }
1127 }
1128
1129 pub fn new_flat(result: Vec<RecordBatch>, projection: Vec<usize>) -> SelectorResultValue {
1131 SelectorResultValue {
1132 result: SelectorResult::Flat(result),
1133 projection,
1134 }
1135 }
1136
1137 fn estimated_size(&self) -> usize {
1139 match &self.result {
1140 SelectorResult::PrimaryKey(batches) => {
1141 batches.iter().map(|batch| batch.memory_size()).sum()
1142 }
1143 SelectorResult::Flat(batches) => batches.iter().map(record_batch_estimated_size).sum(),
1144 }
1145 }
1146}
1147
1148type SstMetaCache = Cache<SstMetaKey, Arc<CachedSstMeta>>;
1150type VectorCache = Cache<(ConcreteDataType, Value), VectorRef>;
1154type PageCache = Cache<PageKey, Arc<PageValue>>;
1156type SelectorResultCache = Cache<SelectorResultKey, Arc<SelectorResultValue>>;
1158type RangeResultCache = Cache<RangeScanCacheKey, Arc<RangeScanCacheValue>>;
1160
1161#[cfg(test)]
1162mod tests {
1163 use std::sync::Arc;
1164
1165 use api::v1::SemanticType;
1166 use api::v1::index::{BloomFilterMeta, InvertedIndexMetas};
1167 use datatypes::schema::ColumnSchema;
1168 use datatypes::vectors::Int64Vector;
1169 use puffin::file_metadata::FileMetadata;
1170 use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
1171 use store_api::storage::ColumnId;
1172
1173 use super::*;
1174 use crate::cache::index::bloom_filter_index::Tag;
1175 use crate::cache::index::result_cache::PredicateKey;
1176 use crate::cache::test_util::{
1177 parquet_meta, sst_parquet_meta, sst_parquet_meta_with_region_metadata,
1178 };
1179 use crate::read::range_cache::{
1180 RangeScanCacheKey, RangeScanCacheValue, ScanRequestFingerprintBuilder,
1181 };
1182 use crate::sst::parquet::row_selection::RowGroupSelection;
1183
1184 #[tokio::test]
1185 async fn test_disable_cache() {
1186 let cache = CacheManager::default();
1187 assert!(cache.sst_meta_cache.is_none());
1188 assert!(cache.vector_cache.is_none());
1189 assert!(cache.page_cache.is_none());
1190
1191 let region_id = RegionId::new(1, 1);
1192 let file_id = RegionFileId::new(region_id, FileId::random());
1193 let metadata = parquet_meta();
1194 let mut metrics = MetadataCacheMetrics::default();
1195 cache.put_parquet_meta_data(file_id, metadata, None);
1196 assert!(
1197 cache
1198 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1199 .await
1200 .is_none()
1201 );
1202
1203 let value = Value::Int64(10);
1204 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1205 cache.put_repeated_vector(value.clone(), vector.clone());
1206 assert!(
1207 cache
1208 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1209 .is_none()
1210 );
1211
1212 let key = PageKey::new(file_id.file_id(), 1, vec![Range { start: 0, end: 5 }]);
1213 let pages = Arc::new(PageValue::default());
1214 cache.put_pages(key.clone(), pages);
1215 assert!(cache.get_pages(&key).is_none());
1216
1217 assert!(cache.write_cache().is_none());
1218 }
1219
1220 #[tokio::test]
1221 async fn test_parquet_meta_cache() {
1222 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1223 let mut metrics = MetadataCacheMetrics::default();
1224 let region_id = RegionId::new(1, 1);
1225 let file_id = RegionFileId::new(region_id, FileId::random());
1226 assert!(
1227 cache
1228 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1229 .await
1230 .is_none()
1231 );
1232 let (metadata, region_metadata) = sst_parquet_meta();
1233 cache.put_parquet_meta_data(file_id, metadata, None);
1234 let cached = cache
1235 .get_sst_meta_data(file_id, &mut metrics, Default::default())
1236 .await
1237 .unwrap();
1238 assert_eq!(region_metadata, cached.region_metadata());
1239 assert!(
1240 cached
1241 .parquet_metadata()
1242 .file_metadata()
1243 .key_value_metadata()
1244 .is_none_or(|key_values| {
1245 key_values
1246 .iter()
1247 .all(|key_value| key_value.key != PARQUET_METADATA_KEY)
1248 })
1249 );
1250 cache.remove_parquet_meta_data(file_id);
1251 assert!(
1252 cache
1253 .get_parquet_meta_data(file_id, &mut metrics, Default::default())
1254 .await
1255 .is_none()
1256 );
1257 }
1258
1259 #[tokio::test]
1260 async fn test_parquet_meta_cache_with_provided_region_metadata() {
1261 let cache = CacheManager::builder().sst_meta_cache_size(2000).build();
1262 let mut metrics = MetadataCacheMetrics::default();
1263 let region_id = RegionId::new(1, 1);
1264 let file_id = RegionFileId::new(region_id, FileId::random());
1265 let (metadata, region_metadata) = sst_parquet_meta();
1266
1267 cache.put_parquet_meta_data(file_id, metadata, Some(region_metadata.clone()));
1268
1269 let cached = cache
1270 .get_sst_meta_data(file_id, &mut metrics, Default::default())
1271 .await
1272 .unwrap();
1273 assert!(Arc::ptr_eq(®ion_metadata, &cached.region_metadata()));
1274 }
1275
1276 #[test]
1277 fn test_meta_cache_weight_accounts_for_decoded_region_metadata() {
1278 let region_metadata = Arc::new(wide_region_metadata(128));
1279 let json_len = region_metadata.to_json().unwrap().len();
1280 let metadata = sst_parquet_meta_with_region_metadata(region_metadata.clone());
1281 let cached = Arc::new(
1282 CachedSstMeta::try_new("test.parquet", Arc::unwrap_or_clone(metadata)).unwrap(),
1283 );
1284 let key = SstMetaKey(region_metadata.region_id, FileId::random());
1285
1286 assert!(cached.region_metadata_weight > json_len);
1287 assert_eq!(
1288 meta_cache_weight(&key, &cached) as usize,
1289 key.estimated_size()
1290 + parquet_meta_size(&cached.parquet_metadata)
1291 + cached.region_metadata_weight
1292 );
1293 }
1294
1295 #[test]
1296 fn test_repeated_vector_cache() {
1297 let cache = CacheManager::builder().vector_cache_size(4096).build();
1298 let value = Value::Int64(10);
1299 assert!(
1300 cache
1301 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1302 .is_none()
1303 );
1304 let vector: VectorRef = Arc::new(Int64Vector::from_slice([10, 10, 10, 10]));
1305 cache.put_repeated_vector(value.clone(), vector.clone());
1306 let cached = cache
1307 .get_repeated_vector(&ConcreteDataType::int64_datatype(), &value)
1308 .unwrap();
1309 assert_eq!(vector, cached);
1310 }
1311
1312 #[test]
1313 fn test_page_cache() {
1314 let cache = CacheManager::builder().page_cache_size(1000).build();
1315 let file_id = FileId::random();
1316 let key = PageKey::new(file_id, 0, vec![(0..10), (10..20)]);
1317 assert!(cache.get_pages(&key).is_none());
1318 let pages = Arc::new(PageValue::default());
1319 cache.put_pages(key.clone(), pages);
1320 assert!(cache.get_pages(&key).is_some());
1321 }
1322
1323 #[test]
1324 fn test_selector_result_cache() {
1325 let cache = CacheManager::builder()
1326 .selector_result_cache_size(1000)
1327 .build();
1328 let file_id = FileId::random();
1329 let key = SelectorResultKey {
1330 file_id,
1331 row_group_idx: 0,
1332 selector: TimeSeriesRowSelector::LastRow,
1333 };
1334 assert!(cache.get_selector_result(&key).is_none());
1335 let result = Arc::new(SelectorResultValue::new(Vec::new(), Vec::new()));
1336 cache.put_selector_result(key, result);
1337 assert!(cache.get_selector_result(&key).is_some());
1338 }
1339
1340 #[test]
1341 fn test_range_result_cache() {
1342 let cache = Arc::new(
1343 CacheManager::builder()
1344 .range_result_cache_size(1024 * 1024)
1345 .build(),
1346 );
1347
1348 let key = RangeScanCacheKey {
1349 region_id: RegionId::new(1, 1),
1350 row_groups: vec![(FileId::random(), 0)],
1351 scan: ScanRequestFingerprintBuilder {
1352 read_column_ids: vec![],
1353 read_column_types: vec![],
1354 filters: vec!["tag_0 = 1".to_string()],
1355 time_filters: vec![],
1356 series_row_selector: None,
1357 append_mode: false,
1358 filter_deleted: true,
1359 merge_mode: crate::region::options::MergeMode::LastRow,
1360 partition_expr_version: 0,
1361 }
1362 .build(),
1363 };
1364 let value = Arc::new(RangeScanCacheValue::new(Vec::new()));
1365
1366 assert!(cache.get_range_result(&key).is_none());
1367 cache.put_range_result(key.clone(), value.clone());
1368 assert!(cache.get_range_result(&key).is_some());
1369
1370 let enable_all = CacheStrategy::EnableAll(cache.clone());
1371 assert!(enable_all.get_range_result(&key).is_some());
1372
1373 let compaction = CacheStrategy::Compaction(cache.clone());
1374 assert!(compaction.get_range_result(&key).is_none());
1375 compaction.put_range_result(key.clone(), value.clone());
1376 assert!(cache.get_range_result(&key).is_some());
1377
1378 let disabled = CacheStrategy::Disabled;
1379 assert!(disabled.get_range_result(&key).is_none());
1380 disabled.put_range_result(key.clone(), value);
1381 assert!(cache.get_range_result(&key).is_some());
1382 }
1383
1384 #[tokio::test]
1385 async fn test_evict_puffin_cache_clears_all_entries() {
1386 use std::collections::{BTreeMap, HashMap};
1387
1388 let cache = CacheManager::builder()
1389 .index_metadata_size(128)
1390 .index_content_size(128)
1391 .index_content_page_size(64)
1392 .index_result_cache_size(128)
1393 .puffin_metadata_size(128)
1394 .build();
1395 let cache = Arc::new(cache);
1396
1397 let region_id = RegionId::new(1, 1);
1398 let index_id = RegionIndexId::new(RegionFileId::new(region_id, FileId::random()), 0);
1399 let column_id: ColumnId = 1;
1400
1401 let bloom_cache = cache.bloom_filter_index_cache().unwrap().clone();
1402 let inverted_cache = cache.inverted_index_cache().unwrap().clone();
1403 let result_cache = cache.index_result_cache().unwrap();
1404 let puffin_metadata_cache = cache.puffin_metadata_cache().unwrap().clone();
1405
1406 let bloom_key = (
1407 index_id.file_id(),
1408 index_id.version,
1409 column_id,
1410 Tag::Skipping,
1411 );
1412 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1413 inverted_cache.put_metadata(
1414 (index_id.file_id(), index_id.version),
1415 Arc::new(InvertedIndexMetas::default()),
1416 );
1417 let predicate = PredicateKey::new_bloom(Arc::new(BTreeMap::new()));
1418 let selection = Arc::new(RowGroupSelection::default());
1419 result_cache.put(predicate.clone(), index_id.file_id(), selection);
1420 let file_id_str = index_id.to_string();
1421 let metadata = Arc::new(FileMetadata {
1422 blobs: Vec::new(),
1423 properties: HashMap::new(),
1424 });
1425 puffin_metadata_cache.put_metadata(file_id_str.clone(), metadata);
1426
1427 assert!(bloom_cache.get_metadata(bloom_key).is_some());
1428 assert!(
1429 inverted_cache
1430 .get_metadata((index_id.file_id(), index_id.version))
1431 .is_some()
1432 );
1433 assert!(result_cache.get(&predicate, index_id.file_id()).is_some());
1434 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_some());
1435
1436 cache.evict_puffin_cache(index_id).await;
1437
1438 assert!(bloom_cache.get_metadata(bloom_key).is_none());
1439 assert!(
1440 inverted_cache
1441 .get_metadata((index_id.file_id(), index_id.version))
1442 .is_none()
1443 );
1444 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
1445 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
1446
1447 bloom_cache.put_metadata(bloom_key, Arc::new(BloomFilterMeta::default()));
1449 inverted_cache.put_metadata(
1450 (index_id.file_id(), index_id.version),
1451 Arc::new(InvertedIndexMetas::default()),
1452 );
1453 result_cache.put(
1454 predicate.clone(),
1455 index_id.file_id(),
1456 Arc::new(RowGroupSelection::default()),
1457 );
1458 puffin_metadata_cache.put_metadata(
1459 file_id_str.clone(),
1460 Arc::new(FileMetadata {
1461 blobs: Vec::new(),
1462 properties: HashMap::new(),
1463 }),
1464 );
1465
1466 let strategy = CacheStrategy::EnableAll(cache.clone());
1467 strategy.evict_puffin_cache(index_id).await;
1468
1469 assert!(bloom_cache.get_metadata(bloom_key).is_none());
1470 assert!(
1471 inverted_cache
1472 .get_metadata((index_id.file_id(), index_id.version))
1473 .is_none()
1474 );
1475 assert!(result_cache.get(&predicate, index_id.file_id()).is_none());
1476 assert!(puffin_metadata_cache.get_metadata(&file_id_str).is_none());
1477 }
1478
1479 fn wide_region_metadata(column_count: u32) -> RegionMetadata {
1480 let region_id = RegionId::new(1024, 7);
1481 let mut builder = RegionMetadataBuilder::new(region_id);
1482 let mut primary_key = Vec::new();
1483
1484 for column_id in 0..column_count {
1485 let semantic_type = if column_id < 32 {
1486 primary_key.push(column_id);
1487 SemanticType::Tag
1488 } else {
1489 SemanticType::Field
1490 };
1491 let mut column_schema = ColumnSchema::new(
1492 format!("wide_column_{column_id}"),
1493 ConcreteDataType::string_datatype(),
1494 true,
1495 );
1496 column_schema
1497 .mut_metadata()
1498 .insert(format!("cache_key_{column_id}"), "cache_value".repeat(4));
1499 builder.push_column_metadata(ColumnMetadata {
1500 column_schema,
1501 semantic_type,
1502 column_id,
1503 });
1504 }
1505
1506 builder.push_column_metadata(ColumnMetadata {
1507 column_schema: ColumnSchema::new(
1508 "ts",
1509 ConcreteDataType::timestamp_millisecond_datatype(),
1510 false,
1511 ),
1512 semantic_type: SemanticType::Timestamp,
1513 column_id: column_count,
1514 });
1515 builder.primary_key(primary_key);
1516
1517 builder.build().unwrap()
1518 }
1519}