1use std::cmp;
18use std::path::Path;
19use std::time::Duration;
20
21use common_base::memory_limit::MemoryLimit;
22use common_base::readable_size::ReadableSize;
23use common_memory_manager::OnExhaustedPolicy;
24use common_stat::{get_total_cpu_cores, get_total_memory_readable};
25use common_telemetry::warn;
26use serde::{Deserialize, Serialize};
27use serde_with::serde_as;
28
29use crate::cache::file_cache::DEFAULT_INDEX_CACHE_PERCENT;
30use crate::error::Result;
31use crate::gc::GcConfig;
32use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
33
34const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
35pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 384;
37
38const GLOBAL_WRITE_BUFFER_SIZE_FACTOR: u64 = 8;
40const SST_META_CACHE_SIZE_FACTOR: u64 = 32;
42const MEM_CACHE_SIZE_FACTOR: u64 = 16;
44const PAGE_CACHE_SIZE_FACTOR: u64 = 8;
46const INDEX_CREATE_MEM_THRESHOLD_FACTOR: u64 = 16;
48
49pub(crate) const FETCH_OPTION_TIMEOUT: Duration = Duration::from_secs(3);
51
52#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
55#[serde(default)]
56pub struct MitoConfig {
57 pub num_workers: usize,
61 pub worker_channel_size: usize,
63 pub worker_request_batch_size: usize,
65
66 pub manifest_checkpoint_distance: u64,
70 pub experimental_manifest_keep_removed_file_count: usize,
75 #[serde(with = "humantime_serde")]
80 pub experimental_manifest_keep_removed_file_ttl: Duration,
81 pub compress_manifest: bool,
83
84 pub max_background_index_builds: usize,
87 pub max_background_flushes: usize,
89 pub max_background_compactions: usize,
91 pub max_background_purges: usize,
93 pub experimental_compaction_memory_limit: MemoryLimit,
97 pub experimental_compaction_on_exhausted: OnExhaustedPolicy,
99
100 #[serde(with = "humantime_serde")]
103 pub auto_flush_interval: Duration,
104 pub global_write_buffer_size: ReadableSize,
106 pub global_write_buffer_reject_size: ReadableSize,
108
109 pub sst_meta_cache_size: ReadableSize,
112 pub vector_cache_size: ReadableSize,
114 pub page_cache_size: ReadableSize,
116 pub selector_result_cache_size: ReadableSize,
118 pub range_result_cache_size: ReadableSize,
120 pub enable_write_cache: bool,
122 pub write_cache_path: String,
124 pub write_cache_size: ReadableSize,
126 #[serde(with = "humantime_serde")]
128 pub write_cache_ttl: Option<Duration>,
129 pub preload_index_cache: bool,
131 pub index_cache_percent: u8,
135 pub enable_refill_cache_on_read: bool,
138 pub manifest_cache_size: ReadableSize,
140
141 pub sst_write_buffer_size: ReadableSize,
144 pub max_concurrent_scan_files: usize,
146 pub allow_stale_entries: bool,
148 pub scan_memory_limit: MemoryLimit,
151 pub scan_memory_on_exhausted: OnExhaustedPolicy,
156
157 pub index: IndexConfig,
159 pub inverted_index: InvertedIndexConfig,
161 pub fulltext_index: FulltextIndexConfig,
163 pub bloom_filter_index: BloomFilterConfig,
165 #[cfg(feature = "vector_index")]
167 pub vector_index: VectorIndexConfig,
168
169 #[serde(with = "humantime_serde")]
172 pub min_compaction_interval: Duration,
173
174 pub default_flat_format: bool,
177
178 pub gc: GcConfig,
179}
180
181impl Default for MitoConfig {
182 fn default() -> Self {
183 let mut mito_config = MitoConfig {
184 num_workers: divide_num_cpus(2),
185 worker_channel_size: 128,
186 worker_request_batch_size: 64,
187 manifest_checkpoint_distance: 10,
188 experimental_manifest_keep_removed_file_count: 256,
189 experimental_manifest_keep_removed_file_ttl: Duration::from_secs(60 * 60),
190 compress_manifest: false,
191 max_background_index_builds: divide_num_cpus(8),
192 max_background_flushes: divide_num_cpus(2),
193 max_background_compactions: divide_num_cpus(4),
194 max_background_purges: get_total_cpu_cores(),
195 experimental_compaction_memory_limit: MemoryLimit::Unlimited,
196 experimental_compaction_on_exhausted: OnExhaustedPolicy::default(),
197 auto_flush_interval: Duration::from_secs(30 * 60),
198 global_write_buffer_size: ReadableSize::gb(1),
199 global_write_buffer_reject_size: ReadableSize::gb(2),
200 sst_meta_cache_size: ReadableSize::mb(128),
201 vector_cache_size: ReadableSize::mb(512),
202 page_cache_size: ReadableSize::mb(512),
203 selector_result_cache_size: ReadableSize::mb(512),
204 range_result_cache_size: ReadableSize::mb(512),
205 enable_write_cache: false,
206 write_cache_path: String::new(),
207 write_cache_size: ReadableSize::gb(5),
208 write_cache_ttl: None,
209 preload_index_cache: true,
210 index_cache_percent: DEFAULT_INDEX_CACHE_PERCENT,
211 enable_refill_cache_on_read: true,
212 manifest_cache_size: ReadableSize::mb(256),
213 sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
214 max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
215 allow_stale_entries: false,
216 scan_memory_limit: MemoryLimit::default(),
217 scan_memory_on_exhausted: OnExhaustedPolicy::Fail,
218 index: IndexConfig::default(),
219 inverted_index: InvertedIndexConfig::default(),
220 fulltext_index: FulltextIndexConfig::default(),
221 bloom_filter_index: BloomFilterConfig::default(),
222 #[cfg(feature = "vector_index")]
223 vector_index: VectorIndexConfig::default(),
224 min_compaction_interval: Duration::from_secs(0),
225 default_flat_format: true,
226 gc: GcConfig::default(),
227 };
228
229 if let Some(sys_memory) = get_total_memory_readable() {
231 mito_config.adjust_buffer_and_cache_size(sys_memory);
232 }
233
234 mito_config
235 }
236}
237
238impl MitoConfig {
239 pub fn sanitize(&mut self, data_home: &str) -> Result<()> {
243 if self.num_workers == 0 {
245 self.num_workers = divide_num_cpus(2);
246 }
247
248 if self.worker_channel_size == 0 {
250 warn!("Sanitize channel size 0 to 1");
251 self.worker_channel_size = 1;
252 }
253
254 if self.max_background_flushes == 0 {
255 warn!(
256 "Sanitize max background flushes 0 to {}",
257 divide_num_cpus(2)
258 );
259 self.max_background_flushes = divide_num_cpus(2);
260 }
261 if self.max_background_compactions == 0 {
262 warn!(
263 "Sanitize max background compactions 0 to {}",
264 divide_num_cpus(4)
265 );
266 self.max_background_compactions = divide_num_cpus(4);
267 }
268 if self.max_background_purges == 0 {
269 let cpu_cores = get_total_cpu_cores();
270 warn!("Sanitize max background purges 0 to {}", cpu_cores);
271 self.max_background_purges = cpu_cores;
272 }
273
274 if self.global_write_buffer_reject_size <= self.global_write_buffer_size {
275 self.global_write_buffer_reject_size = self.global_write_buffer_size * 2;
276 warn!(
277 "Sanitize global write buffer reject size to {}",
278 self.global_write_buffer_reject_size
279 );
280 }
281
282 if self.sst_write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
283 self.sst_write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
284 warn!(
285 "Sanitize sst write buffer size to {}",
286 self.sst_write_buffer_size
287 );
288 }
289
290 if self.write_cache_path.trim().is_empty() {
292 self.write_cache_path = data_home.to_string();
293 }
294
295 if self.index_cache_percent == 0 || self.index_cache_percent >= 100 {
297 warn!(
298 "Invalid index_cache_percent {}, resetting to default {}",
299 self.index_cache_percent, DEFAULT_INDEX_CACHE_PERCENT
300 );
301 self.index_cache_percent = DEFAULT_INDEX_CACHE_PERCENT;
302 }
303
304 self.index.sanitize(data_home, &self.inverted_index)?;
305
306 Ok(())
307 }
308
309 fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
310 let global_write_buffer_size = cmp::min(
312 sys_memory / GLOBAL_WRITE_BUFFER_SIZE_FACTOR,
313 ReadableSize::gb(1),
314 );
315 let global_write_buffer_reject_size = global_write_buffer_size * 2;
317 let sst_meta_cache_size = cmp::min(
319 sys_memory / SST_META_CACHE_SIZE_FACTOR,
320 ReadableSize::mb(128),
321 );
322 let mem_cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(512));
324 let page_cache_size = sys_memory / PAGE_CACHE_SIZE_FACTOR;
325
326 self.global_write_buffer_size = global_write_buffer_size;
327 self.global_write_buffer_reject_size = global_write_buffer_reject_size;
328 self.sst_meta_cache_size = sst_meta_cache_size;
329 self.vector_cache_size = mem_cache_size;
330 self.page_cache_size = page_cache_size;
331 self.selector_result_cache_size = mem_cache_size;
332 self.range_result_cache_size = mem_cache_size;
333
334 self.index.adjust_buffer_and_cache_size(sys_memory);
335 }
336
337 #[cfg(test)]
339 pub fn enable_write_cache(
340 mut self,
341 path: String,
342 size: ReadableSize,
343 ttl: Option<Duration>,
344 ) -> Self {
345 self.enable_write_cache = true;
346 self.write_cache_path = path;
347 self.write_cache_size = size;
348 self.write_cache_ttl = ttl;
349 self
350 }
351}
352
353#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
355#[serde(rename_all = "snake_case")]
356pub enum IndexBuildMode {
357 #[default]
359 Sync,
360 Async,
362}
363
364#[serde_as]
365#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
366#[serde(default)]
367pub struct IndexConfig {
368 pub aux_path: String,
378
379 pub staging_size: ReadableSize,
381 #[serde(with = "humantime_serde")]
385 pub staging_ttl: Option<Duration>,
386
387 pub build_mode: IndexBuildMode,
389
390 pub write_buffer_size: ReadableSize,
392
393 pub metadata_cache_size: ReadableSize,
395 pub content_cache_size: ReadableSize,
397 pub content_cache_page_size: ReadableSize,
399 pub result_cache_size: ReadableSize,
401}
402
403impl Default for IndexConfig {
404 fn default() -> Self {
405 Self {
406 aux_path: String::new(),
407 staging_size: ReadableSize::gb(2),
408 staging_ttl: Some(Duration::from_secs(7 * 24 * 60 * 60)),
409 build_mode: IndexBuildMode::default(),
410 write_buffer_size: ReadableSize::mb(8),
411 metadata_cache_size: ReadableSize::mb(64),
412 content_cache_size: ReadableSize::mb(128),
413 content_cache_page_size: ReadableSize::kb(64),
414 result_cache_size: ReadableSize::mb(128),
415 }
416 }
417}
418
419impl IndexConfig {
420 pub fn sanitize(
421 &mut self,
422 data_home: &str,
423 inverted_index: &InvertedIndexConfig,
424 ) -> Result<()> {
425 #[allow(deprecated)]
426 if self.aux_path.is_empty() && !inverted_index.intermediate_path.is_empty() {
427 self.aux_path.clone_from(&inverted_index.intermediate_path);
428 warn!(
429 "`inverted_index.intermediate_path` is deprecated, use
430 `index.aux_path` instead. Set `index.aux_path` to {}",
431 &inverted_index.intermediate_path
432 )
433 }
434 if self.aux_path.is_empty() {
435 let path = Path::new(data_home).join("index_intermediate");
436 self.aux_path = path.as_os_str().to_string_lossy().to_string();
437 }
438
439 if self.write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
440 self.write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
441 warn!(
442 "Sanitize index write buffer size to {}",
443 self.write_buffer_size
444 );
445 }
446
447 if self.staging_ttl.map(|ttl| ttl.is_zero()).unwrap_or(false) {
448 self.staging_ttl = None;
449 }
450
451 Ok(())
452 }
453
454 pub fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
455 let cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(128));
456 self.result_cache_size = cmp::min(self.result_cache_size, cache_size);
457 self.content_cache_size = cmp::min(self.content_cache_size, cache_size);
458
459 let metadata_cache_size = cmp::min(
460 sys_memory / SST_META_CACHE_SIZE_FACTOR,
461 ReadableSize::mb(64),
462 );
463 self.metadata_cache_size = cmp::min(self.metadata_cache_size, metadata_cache_size);
464 }
465}
466
467#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
469#[serde(rename_all = "snake_case")]
470pub enum Mode {
471 #[default]
473 Auto,
474 Disable,
476}
477
478impl Mode {
479 pub fn disabled(&self) -> bool {
481 matches!(self, Mode::Disable)
482 }
483
484 pub fn auto(&self) -> bool {
486 matches!(self, Mode::Auto)
487 }
488}
489
490#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
492#[serde(rename_all = "snake_case")]
493pub enum MemoryThreshold {
494 #[default]
496 Auto,
497 Unlimited,
499 #[serde(untagged)]
501 Size(ReadableSize),
502}
503
504#[serde_as]
506#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
507#[serde(default)]
508pub struct InvertedIndexConfig {
509 pub create_on_flush: Mode,
511 pub create_on_compaction: Mode,
513 pub apply_on_query: Mode,
515
516 pub mem_threshold_on_create: MemoryThreshold,
518
519 #[deprecated = "use [IndexConfig::aux_path] instead"]
520 #[serde(skip_serializing)]
521 pub intermediate_path: String,
522
523 #[deprecated = "use [IndexConfig::write_buffer_size] instead"]
524 #[serde(skip_serializing)]
525 pub write_buffer_size: ReadableSize,
526}
527
528impl Default for InvertedIndexConfig {
529 #[allow(deprecated)]
530 fn default() -> Self {
531 Self {
532 create_on_flush: Mode::Auto,
533 create_on_compaction: Mode::Auto,
534 apply_on_query: Mode::Auto,
535 mem_threshold_on_create: MemoryThreshold::Auto,
536 write_buffer_size: ReadableSize::mb(8),
537 intermediate_path: String::new(),
538 }
539 }
540}
541
542impl InvertedIndexConfig {
543 pub fn mem_threshold_on_create(&self) -> Option<usize> {
544 match self.mem_threshold_on_create {
545 MemoryThreshold::Auto => {
546 if let Some(sys_memory) = get_total_memory_readable() {
547 Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
548 } else {
549 Some(ReadableSize::mb(64).as_bytes() as usize)
550 }
551 }
552 MemoryThreshold::Unlimited => None,
553 MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
554 }
555 }
556}
557
558#[serde_as]
560#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
561#[serde(default)]
562pub struct FulltextIndexConfig {
563 pub create_on_flush: Mode,
565 pub create_on_compaction: Mode,
567 pub apply_on_query: Mode,
569 pub mem_threshold_on_create: MemoryThreshold,
571 pub compress: bool,
573}
574
575impl Default for FulltextIndexConfig {
576 fn default() -> Self {
577 Self {
578 create_on_flush: Mode::Auto,
579 create_on_compaction: Mode::Auto,
580 apply_on_query: Mode::Auto,
581 mem_threshold_on_create: MemoryThreshold::Auto,
582 compress: true,
583 }
584 }
585}
586
587impl FulltextIndexConfig {
588 pub fn mem_threshold_on_create(&self) -> usize {
589 match self.mem_threshold_on_create {
590 MemoryThreshold::Auto => {
591 if let Some(sys_memory) = get_total_memory_readable() {
592 (sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as _
593 } else {
594 ReadableSize::mb(64).as_bytes() as _
595 }
596 }
597 MemoryThreshold::Unlimited => usize::MAX,
598 MemoryThreshold::Size(size) => size.as_bytes() as _,
599 }
600 }
601}
602
603#[serde_as]
605#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
606#[serde(default)]
607pub struct BloomFilterConfig {
608 pub create_on_flush: Mode,
610 pub create_on_compaction: Mode,
612 pub apply_on_query: Mode,
614 pub mem_threshold_on_create: MemoryThreshold,
616}
617
618impl Default for BloomFilterConfig {
619 fn default() -> Self {
620 Self {
621 create_on_flush: Mode::Auto,
622 create_on_compaction: Mode::Auto,
623 apply_on_query: Mode::Auto,
624 mem_threshold_on_create: MemoryThreshold::Auto,
625 }
626 }
627}
628
629impl BloomFilterConfig {
630 pub fn mem_threshold_on_create(&self) -> Option<usize> {
631 match self.mem_threshold_on_create {
632 MemoryThreshold::Auto => {
633 if let Some(sys_memory) = get_total_memory_readable() {
634 Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
635 } else {
636 Some(ReadableSize::mb(64).as_bytes() as usize)
637 }
638 }
639 MemoryThreshold::Unlimited => None,
640 MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
641 }
642 }
643}
644
645#[cfg(feature = "vector_index")]
647#[serde_as]
648#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
649#[serde(default)]
650pub struct VectorIndexConfig {
651 pub create_on_flush: Mode,
653 pub create_on_compaction: Mode,
655 pub apply_on_query: Mode,
657 pub mem_threshold_on_create: MemoryThreshold,
659}
660
661#[cfg(feature = "vector_index")]
662impl Default for VectorIndexConfig {
663 fn default() -> Self {
664 Self {
665 create_on_flush: Mode::Auto,
666 create_on_compaction: Mode::Auto,
667 apply_on_query: Mode::Auto,
668 mem_threshold_on_create: MemoryThreshold::Auto,
669 }
670 }
671}
672
673#[cfg(feature = "vector_index")]
674impl VectorIndexConfig {
675 pub fn mem_threshold_on_create(&self) -> Option<usize> {
676 match self.mem_threshold_on_create {
677 MemoryThreshold::Auto => {
678 if let Some(sys_memory) = get_total_memory_readable() {
679 Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
680 } else {
681 Some(ReadableSize::mb(64).as_bytes() as usize)
682 }
683 }
684 MemoryThreshold::Unlimited => None,
685 MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
686 }
687 }
688}
689
690fn divide_num_cpus(divisor: usize) -> usize {
692 debug_assert!(divisor > 0);
693 let cores = get_total_cpu_cores();
694 debug_assert!(cores > 0);
695
696 cores.div_ceil(divisor)
697}