Skip to main content

mito2/
config.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Configurations.
16
17use std::cmp;
18use std::path::Path;
19use std::time::Duration;
20
21use common_base::memory_limit::MemoryLimit;
22use common_base::readable_size::ReadableSize;
23use common_memory_manager::OnExhaustedPolicy;
24use common_stat::{get_total_cpu_cores, get_total_memory_readable};
25use common_telemetry::warn;
26use serde::{Deserialize, Serialize};
27use serde_with::serde_as;
28
29use crate::cache::file_cache::DEFAULT_INDEX_CACHE_PERCENT;
30use crate::error::Result;
31use crate::gc::GcConfig;
32use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
33
34const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
35/// Default maximum number of SST files to scan concurrently.
36pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 384;
37
38// Use `1/GLOBAL_WRITE_BUFFER_SIZE_FACTOR` of OS memory as global write buffer size in default mode
39const GLOBAL_WRITE_BUFFER_SIZE_FACTOR: u64 = 8;
40/// Use `1/SST_META_CACHE_SIZE_FACTOR` of OS memory size as SST meta cache size in default mode
41const SST_META_CACHE_SIZE_FACTOR: u64 = 32;
42/// Use `1/MEM_CACHE_SIZE_FACTOR` of OS memory size as mem cache size in default mode
43const MEM_CACHE_SIZE_FACTOR: u64 = 16;
44/// Use `1/PAGE_CACHE_SIZE_FACTOR` of OS memory size as page cache size in default mode
45const PAGE_CACHE_SIZE_FACTOR: u64 = 8;
46/// Use `1/INDEX_CREATE_MEM_THRESHOLD_FACTOR` of OS memory size as mem threshold for creating index
47const INDEX_CREATE_MEM_THRESHOLD_FACTOR: u64 = 16;
48
49/// Fetch option timeout
50pub(crate) const FETCH_OPTION_TIMEOUT: Duration = Duration::from_secs(3);
51
52/// Configuration for [MitoEngine](crate::engine::MitoEngine).
53/// Before using the config, make sure to call `MitoConfig::validate()` to check if the config is valid.
54#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
55#[serde(default)]
56pub struct MitoConfig {
57    // Worker configs:
58    /// Number of region workers (default: 1/2 of cpu cores).
59    /// Sets to 0 to use the default value.
60    pub num_workers: usize,
61    /// Request channel size of each worker (default 128).
62    pub worker_channel_size: usize,
63    /// Max batch size for a worker to handle requests (default 64).
64    pub worker_request_batch_size: usize,
65
66    // Manifest configs:
67    /// Number of meta action updated to trigger a new checkpoint
68    /// for the manifest (default 10).
69    pub manifest_checkpoint_distance: u64,
70    /// Number of removed files to keep in manifest's `removed_files` field before also
71    /// remove them from `removed_files`. Mostly for debugging purpose.
72    /// If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files
73    /// from `removed_files` field.
74    pub experimental_manifest_keep_removed_file_count: usize,
75    /// How long to keep removed files in the `removed_files` field of manifest
76    /// after they are removed from manifest.
77    /// files will only be removed from `removed_files` field
78    /// if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached.
79    #[serde(with = "humantime_serde")]
80    pub experimental_manifest_keep_removed_file_ttl: Duration,
81    /// Whether to compress manifest and checkpoint file by gzip (default false).
82    pub compress_manifest: bool,
83
84    // Background job configs:
85    /// Max number of running background index build jobs (default: 1/8 of cpu cores).
86    pub max_background_index_builds: usize,
87    /// Max number of running background flush jobs (default: 1/2 of cpu cores).
88    pub max_background_flushes: usize,
89    /// Max number of running background compaction jobs (default: 1/4 of cpu cores).
90    pub max_background_compactions: usize,
91    /// Max number of running background purge jobs (default: number of cpu cores).
92    pub max_background_purges: usize,
93    /// Memory budget for compaction tasks.
94    /// Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").
95    /// Setting it to 0 or "unlimited" disables the limit.
96    pub experimental_compaction_memory_limit: MemoryLimit,
97    /// Behavior when compaction cannot acquire memory from the budget.
98    pub experimental_compaction_on_exhausted: OnExhaustedPolicy,
99
100    // Flush configs:
101    /// Interval to auto flush a region if it has not flushed yet (default 30 min).
102    #[serde(with = "humantime_serde")]
103    pub auto_flush_interval: Duration,
104    /// Global write buffer size threshold to trigger flush.
105    pub global_write_buffer_size: ReadableSize,
106    /// Global write buffer size threshold to reject write requests.
107    pub global_write_buffer_reject_size: ReadableSize,
108
109    // Cache configs:
110    /// Cache size for SST metadata. Setting it to 0 to disable the cache.
111    pub sst_meta_cache_size: ReadableSize,
112    /// Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
113    pub vector_cache_size: ReadableSize,
114    /// Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
115    pub page_cache_size: ReadableSize,
116    /// Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
117    pub selector_result_cache_size: ReadableSize,
118    /// Cache size for flat range scan results. Setting it to 0 to disable the cache.
119    pub range_result_cache_size: ReadableSize,
120    /// Whether to enable the write cache.
121    pub enable_write_cache: bool,
122    /// File system path for write cache dir's root, defaults to `{data_home}`.
123    pub write_cache_path: String,
124    /// Capacity for write cache.
125    pub write_cache_size: ReadableSize,
126    /// TTL for write cache.
127    #[serde(with = "humantime_serde")]
128    pub write_cache_ttl: Option<Duration>,
129    /// Preload index (puffin) files into cache on region open (default: true).
130    pub preload_index_cache: bool,
131    /// Percentage of write cache capacity allocated for index (puffin) files (default: 20).
132    /// The remaining capacity is used for data (parquet) files.
133    /// Must be between 0 and 100 (exclusive).
134    pub index_cache_percent: u8,
135    /// Enable background downloading of files to the local cache when accessed during queries (default: true).
136    /// When enabled, files will be asynchronously downloaded to improve performance for subsequent reads.
137    pub enable_refill_cache_on_read: bool,
138    /// Capacity for manifest cache (default: 256MB).
139    pub manifest_cache_size: ReadableSize,
140
141    // Other configs:
142    /// Buffer size for SST writing.
143    pub sst_write_buffer_size: ReadableSize,
144    /// Maximum number of SST files to scan concurrently (default 384).
145    pub max_concurrent_scan_files: usize,
146    /// Whether to allow stale entries read during replay.
147    pub allow_stale_entries: bool,
148    /// Memory limit for table scans across all queries. Setting it to 0 disables the limit.
149    /// Supports absolute size (e.g., "2GB") or percentage (e.g., "50%").
150    pub scan_memory_limit: MemoryLimit,
151    /// Behavior when scan memory tracking cannot acquire memory from the budget.
152    /// `wait` means `wait(10s)`, not unlimited waiting.
153    /// Defaults to [`OnExhaustedPolicy::Fail`], which intentionally differs from
154    /// [`OnExhaustedPolicy::default()`].
155    pub scan_memory_on_exhausted: OnExhaustedPolicy,
156
157    /// Index configs.
158    pub index: IndexConfig,
159    /// Inverted index configs.
160    pub inverted_index: InvertedIndexConfig,
161    /// Full-text index configs.
162    pub fulltext_index: FulltextIndexConfig,
163    /// Bloom filter index configs.
164    pub bloom_filter_index: BloomFilterConfig,
165    /// Vector index configs (HNSW).
166    #[cfg(feature = "vector_index")]
167    pub vector_index: VectorIndexConfig,
168
169    /// Minimum time interval between two compactions.
170    /// To align with the old behavior, the default value is 0 (no restrictions).
171    #[serde(with = "humantime_serde")]
172    pub min_compaction_interval: Duration,
173
174    /// Whether to enable flat format as the default SST format.
175    /// When enabled, forces using BulkMemtable and BulkMemtableBuilder.
176    pub default_flat_format: bool,
177
178    pub gc: GcConfig,
179}
180
181impl Default for MitoConfig {
182    fn default() -> Self {
183        let mut mito_config = MitoConfig {
184            num_workers: divide_num_cpus(2),
185            worker_channel_size: 128,
186            worker_request_batch_size: 64,
187            manifest_checkpoint_distance: 10,
188            experimental_manifest_keep_removed_file_count: 256,
189            experimental_manifest_keep_removed_file_ttl: Duration::from_secs(60 * 60),
190            compress_manifest: false,
191            max_background_index_builds: divide_num_cpus(8),
192            max_background_flushes: divide_num_cpus(2),
193            max_background_compactions: divide_num_cpus(4),
194            max_background_purges: get_total_cpu_cores(),
195            experimental_compaction_memory_limit: MemoryLimit::Unlimited,
196            experimental_compaction_on_exhausted: OnExhaustedPolicy::default(),
197            auto_flush_interval: Duration::from_secs(30 * 60),
198            global_write_buffer_size: ReadableSize::gb(1),
199            global_write_buffer_reject_size: ReadableSize::gb(2),
200            sst_meta_cache_size: ReadableSize::mb(128),
201            vector_cache_size: ReadableSize::mb(512),
202            page_cache_size: ReadableSize::mb(512),
203            selector_result_cache_size: ReadableSize::mb(512),
204            range_result_cache_size: ReadableSize::mb(512),
205            enable_write_cache: false,
206            write_cache_path: String::new(),
207            write_cache_size: ReadableSize::gb(5),
208            write_cache_ttl: None,
209            preload_index_cache: true,
210            index_cache_percent: DEFAULT_INDEX_CACHE_PERCENT,
211            enable_refill_cache_on_read: true,
212            manifest_cache_size: ReadableSize::mb(256),
213            sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
214            max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
215            allow_stale_entries: false,
216            scan_memory_limit: MemoryLimit::default(),
217            scan_memory_on_exhausted: OnExhaustedPolicy::Fail,
218            index: IndexConfig::default(),
219            inverted_index: InvertedIndexConfig::default(),
220            fulltext_index: FulltextIndexConfig::default(),
221            bloom_filter_index: BloomFilterConfig::default(),
222            #[cfg(feature = "vector_index")]
223            vector_index: VectorIndexConfig::default(),
224            min_compaction_interval: Duration::from_secs(0),
225            default_flat_format: true,
226            gc: GcConfig::default(),
227        };
228
229        // Adjust buffer and cache size according to system memory if we can.
230        if let Some(sys_memory) = get_total_memory_readable() {
231            mito_config.adjust_buffer_and_cache_size(sys_memory);
232        }
233
234        mito_config
235    }
236}
237
238impl MitoConfig {
239    /// Sanitize incorrect configurations.
240    ///
241    /// Returns an error if there is a configuration that unable to sanitize.
242    pub fn sanitize(&mut self, data_home: &str) -> Result<()> {
243        // Use default value if `num_workers` is 0.
244        if self.num_workers == 0 {
245            self.num_workers = divide_num_cpus(2);
246        }
247
248        // Sanitize channel size.
249        if self.worker_channel_size == 0 {
250            warn!("Sanitize channel size 0 to 1");
251            self.worker_channel_size = 1;
252        }
253
254        if self.max_background_flushes == 0 {
255            warn!(
256                "Sanitize max background flushes 0 to {}",
257                divide_num_cpus(2)
258            );
259            self.max_background_flushes = divide_num_cpus(2);
260        }
261        if self.max_background_compactions == 0 {
262            warn!(
263                "Sanitize max background compactions 0 to {}",
264                divide_num_cpus(4)
265            );
266            self.max_background_compactions = divide_num_cpus(4);
267        }
268        if self.max_background_purges == 0 {
269            let cpu_cores = get_total_cpu_cores();
270            warn!("Sanitize max background purges 0 to {}", cpu_cores);
271            self.max_background_purges = cpu_cores;
272        }
273
274        if self.global_write_buffer_reject_size <= self.global_write_buffer_size {
275            self.global_write_buffer_reject_size = self.global_write_buffer_size * 2;
276            warn!(
277                "Sanitize global write buffer reject size to {}",
278                self.global_write_buffer_reject_size
279            );
280        }
281
282        if self.sst_write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
283            self.sst_write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
284            warn!(
285                "Sanitize sst write buffer size to {}",
286                self.sst_write_buffer_size
287            );
288        }
289
290        // Sets write cache path if it is empty.
291        if self.write_cache_path.trim().is_empty() {
292            self.write_cache_path = data_home.to_string();
293        }
294
295        // Validate index_cache_percent is within valid range (0, 100)
296        if self.index_cache_percent == 0 || self.index_cache_percent >= 100 {
297            warn!(
298                "Invalid index_cache_percent {}, resetting to default {}",
299                self.index_cache_percent, DEFAULT_INDEX_CACHE_PERCENT
300            );
301            self.index_cache_percent = DEFAULT_INDEX_CACHE_PERCENT;
302        }
303
304        self.index.sanitize(data_home, &self.inverted_index)?;
305
306        Ok(())
307    }
308
309    fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
310        // shouldn't be greater than 1G in default mode.
311        let global_write_buffer_size = cmp::min(
312            sys_memory / GLOBAL_WRITE_BUFFER_SIZE_FACTOR,
313            ReadableSize::gb(1),
314        );
315        // Use 2x of global write buffer size as global write buffer reject size.
316        let global_write_buffer_reject_size = global_write_buffer_size * 2;
317        // shouldn't be greater than 128MB in default mode.
318        let sst_meta_cache_size = cmp::min(
319            sys_memory / SST_META_CACHE_SIZE_FACTOR,
320            ReadableSize::mb(128),
321        );
322        // shouldn't be greater than 512MB in default mode.
323        let mem_cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(512));
324        let page_cache_size = sys_memory / PAGE_CACHE_SIZE_FACTOR;
325
326        self.global_write_buffer_size = global_write_buffer_size;
327        self.global_write_buffer_reject_size = global_write_buffer_reject_size;
328        self.sst_meta_cache_size = sst_meta_cache_size;
329        self.vector_cache_size = mem_cache_size;
330        self.page_cache_size = page_cache_size;
331        self.selector_result_cache_size = mem_cache_size;
332        self.range_result_cache_size = mem_cache_size;
333
334        self.index.adjust_buffer_and_cache_size(sys_memory);
335    }
336
337    /// Enable write cache.
338    #[cfg(test)]
339    pub fn enable_write_cache(
340        mut self,
341        path: String,
342        size: ReadableSize,
343        ttl: Option<Duration>,
344    ) -> Self {
345        self.enable_write_cache = true;
346        self.write_cache_path = path;
347        self.write_cache_size = size;
348        self.write_cache_ttl = ttl;
349        self
350    }
351}
352
353/// Index build mode.
354#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
355#[serde(rename_all = "snake_case")]
356pub enum IndexBuildMode {
357    /// Build index synchronously.
358    #[default]
359    Sync,
360    /// Build index asynchronously.
361    Async,
362}
363
364#[serde_as]
365#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
366#[serde(default)]
367pub struct IndexConfig {
368    /// Auxiliary directory path for the index in filesystem, used to
369    /// store intermediate files for creating the index and staging files
370    /// for searching the index, defaults to `{data_home}/index_intermediate`.
371    ///
372    /// This path contains two subdirectories:
373    /// - `__intm`: for storing intermediate files used during creating index.
374    /// - `staging`: for storing staging files used during searching index.
375    ///
376    /// The default name for this directory is `index_intermediate` for backward compatibility.
377    pub aux_path: String,
378
379    /// The max capacity of the staging directory.
380    pub staging_size: ReadableSize,
381    /// The TTL of the staging directory.
382    /// Defaults to 7 days.
383    /// Setting it to "0s" to disable TTL.
384    #[serde(with = "humantime_serde")]
385    pub staging_ttl: Option<Duration>,
386
387    /// Index Build Mode
388    pub build_mode: IndexBuildMode,
389
390    /// Write buffer size for creating the index.
391    pub write_buffer_size: ReadableSize,
392
393    /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache.
394    pub metadata_cache_size: ReadableSize,
395    /// Cache size for inverted index content. Setting it to 0 to disable the cache.
396    pub content_cache_size: ReadableSize,
397    /// Page size for inverted index content.
398    pub content_cache_page_size: ReadableSize,
399    /// Cache size for index result. Setting it to 0 to disable the cache.
400    pub result_cache_size: ReadableSize,
401}
402
403impl Default for IndexConfig {
404    fn default() -> Self {
405        Self {
406            aux_path: String::new(),
407            staging_size: ReadableSize::gb(2),
408            staging_ttl: Some(Duration::from_secs(7 * 24 * 60 * 60)),
409            build_mode: IndexBuildMode::default(),
410            write_buffer_size: ReadableSize::mb(8),
411            metadata_cache_size: ReadableSize::mb(64),
412            content_cache_size: ReadableSize::mb(128),
413            content_cache_page_size: ReadableSize::kb(64),
414            result_cache_size: ReadableSize::mb(128),
415        }
416    }
417}
418
419impl IndexConfig {
420    pub fn sanitize(
421        &mut self,
422        data_home: &str,
423        inverted_index: &InvertedIndexConfig,
424    ) -> Result<()> {
425        #[allow(deprecated)]
426        if self.aux_path.is_empty() && !inverted_index.intermediate_path.is_empty() {
427            self.aux_path.clone_from(&inverted_index.intermediate_path);
428            warn!(
429                "`inverted_index.intermediate_path` is deprecated, use
430                 `index.aux_path` instead. Set `index.aux_path` to {}",
431                &inverted_index.intermediate_path
432            )
433        }
434        if self.aux_path.is_empty() {
435            let path = Path::new(data_home).join("index_intermediate");
436            self.aux_path = path.as_os_str().to_string_lossy().to_string();
437        }
438
439        if self.write_buffer_size < MULTIPART_UPLOAD_MINIMUM_SIZE {
440            self.write_buffer_size = MULTIPART_UPLOAD_MINIMUM_SIZE;
441            warn!(
442                "Sanitize index write buffer size to {}",
443                self.write_buffer_size
444            );
445        }
446
447        if self.staging_ttl.map(|ttl| ttl.is_zero()).unwrap_or(false) {
448            self.staging_ttl = None;
449        }
450
451        Ok(())
452    }
453
454    pub fn adjust_buffer_and_cache_size(&mut self, sys_memory: ReadableSize) {
455        let cache_size = cmp::min(sys_memory / MEM_CACHE_SIZE_FACTOR, ReadableSize::mb(128));
456        self.result_cache_size = cmp::min(self.result_cache_size, cache_size);
457        self.content_cache_size = cmp::min(self.content_cache_size, cache_size);
458
459        let metadata_cache_size = cmp::min(
460            sys_memory / SST_META_CACHE_SIZE_FACTOR,
461            ReadableSize::mb(64),
462        );
463        self.metadata_cache_size = cmp::min(self.metadata_cache_size, metadata_cache_size);
464    }
465}
466
467/// Operational mode for certain actions.
468#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
469#[serde(rename_all = "snake_case")]
470pub enum Mode {
471    /// The action is performed automatically based on internal criteria.
472    #[default]
473    Auto,
474    /// The action is explicitly disabled.
475    Disable,
476}
477
478impl Mode {
479    /// Whether the action is disabled.
480    pub fn disabled(&self) -> bool {
481        matches!(self, Mode::Disable)
482    }
483
484    /// Whether the action is automatic.
485    pub fn auto(&self) -> bool {
486        matches!(self, Mode::Auto)
487    }
488}
489
490/// Memory threshold for performing certain actions.
491#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
492#[serde(rename_all = "snake_case")]
493pub enum MemoryThreshold {
494    /// Automatically determine the threshold based on internal criteria.
495    #[default]
496    Auto,
497    /// Unlimited memory.
498    Unlimited,
499    /// Fixed memory threshold.
500    #[serde(untagged)]
501    Size(ReadableSize),
502}
503
504/// Configuration options for the inverted index.
505#[serde_as]
506#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
507#[serde(default)]
508pub struct InvertedIndexConfig {
509    /// Whether to create the index on flush: automatically or never.
510    pub create_on_flush: Mode,
511    /// Whether to create the index on compaction: automatically or never.
512    pub create_on_compaction: Mode,
513    /// Whether to apply the index on query: automatically or never.
514    pub apply_on_query: Mode,
515
516    /// Memory threshold for performing an external sort during index creation.
517    pub mem_threshold_on_create: MemoryThreshold,
518
519    #[deprecated = "use [IndexConfig::aux_path] instead"]
520    #[serde(skip_serializing)]
521    pub intermediate_path: String,
522
523    #[deprecated = "use [IndexConfig::write_buffer_size] instead"]
524    #[serde(skip_serializing)]
525    pub write_buffer_size: ReadableSize,
526}
527
528impl Default for InvertedIndexConfig {
529    #[allow(deprecated)]
530    fn default() -> Self {
531        Self {
532            create_on_flush: Mode::Auto,
533            create_on_compaction: Mode::Auto,
534            apply_on_query: Mode::Auto,
535            mem_threshold_on_create: MemoryThreshold::Auto,
536            write_buffer_size: ReadableSize::mb(8),
537            intermediate_path: String::new(),
538        }
539    }
540}
541
542impl InvertedIndexConfig {
543    pub fn mem_threshold_on_create(&self) -> Option<usize> {
544        match self.mem_threshold_on_create {
545            MemoryThreshold::Auto => {
546                if let Some(sys_memory) = get_total_memory_readable() {
547                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
548                } else {
549                    Some(ReadableSize::mb(64).as_bytes() as usize)
550                }
551            }
552            MemoryThreshold::Unlimited => None,
553            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
554        }
555    }
556}
557
558/// Configuration options for the full-text index.
559#[serde_as]
560#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
561#[serde(default)]
562pub struct FulltextIndexConfig {
563    /// Whether to create the index on flush: automatically or never.
564    pub create_on_flush: Mode,
565    /// Whether to create the index on compaction: automatically or never.
566    pub create_on_compaction: Mode,
567    /// Whether to apply the index on query: automatically or never.
568    pub apply_on_query: Mode,
569    /// Memory threshold for creating the index.
570    pub mem_threshold_on_create: MemoryThreshold,
571    /// Whether to compress the index data.
572    pub compress: bool,
573}
574
575impl Default for FulltextIndexConfig {
576    fn default() -> Self {
577        Self {
578            create_on_flush: Mode::Auto,
579            create_on_compaction: Mode::Auto,
580            apply_on_query: Mode::Auto,
581            mem_threshold_on_create: MemoryThreshold::Auto,
582            compress: true,
583        }
584    }
585}
586
587impl FulltextIndexConfig {
588    pub fn mem_threshold_on_create(&self) -> usize {
589        match self.mem_threshold_on_create {
590            MemoryThreshold::Auto => {
591                if let Some(sys_memory) = get_total_memory_readable() {
592                    (sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as _
593                } else {
594                    ReadableSize::mb(64).as_bytes() as _
595                }
596            }
597            MemoryThreshold::Unlimited => usize::MAX,
598            MemoryThreshold::Size(size) => size.as_bytes() as _,
599        }
600    }
601}
602
603/// Configuration options for the bloom filter.
604#[serde_as]
605#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
606#[serde(default)]
607pub struct BloomFilterConfig {
608    /// Whether to create the index on flush: automatically or never.
609    pub create_on_flush: Mode,
610    /// Whether to create the index on compaction: automatically or never.
611    pub create_on_compaction: Mode,
612    /// Whether to apply the index on query: automatically or never.
613    pub apply_on_query: Mode,
614    /// Memory threshold for creating the index.
615    pub mem_threshold_on_create: MemoryThreshold,
616}
617
618impl Default for BloomFilterConfig {
619    fn default() -> Self {
620        Self {
621            create_on_flush: Mode::Auto,
622            create_on_compaction: Mode::Auto,
623            apply_on_query: Mode::Auto,
624            mem_threshold_on_create: MemoryThreshold::Auto,
625        }
626    }
627}
628
629impl BloomFilterConfig {
630    pub fn mem_threshold_on_create(&self) -> Option<usize> {
631        match self.mem_threshold_on_create {
632            MemoryThreshold::Auto => {
633                if let Some(sys_memory) = get_total_memory_readable() {
634                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
635                } else {
636                    Some(ReadableSize::mb(64).as_bytes() as usize)
637                }
638            }
639            MemoryThreshold::Unlimited => None,
640            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
641        }
642    }
643}
644
645/// Configuration options for the vector index (HNSW).
646#[cfg(feature = "vector_index")]
647#[serde_as]
648#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
649#[serde(default)]
650pub struct VectorIndexConfig {
651    /// Whether to create the index on flush: automatically or never.
652    pub create_on_flush: Mode,
653    /// Whether to create the index on compaction: automatically or never.
654    pub create_on_compaction: Mode,
655    /// Whether to apply the index on query: automatically or never.
656    pub apply_on_query: Mode,
657    /// Memory threshold for creating the index.
658    pub mem_threshold_on_create: MemoryThreshold,
659}
660
661#[cfg(feature = "vector_index")]
662impl Default for VectorIndexConfig {
663    fn default() -> Self {
664        Self {
665            create_on_flush: Mode::Auto,
666            create_on_compaction: Mode::Auto,
667            apply_on_query: Mode::Auto,
668            mem_threshold_on_create: MemoryThreshold::Auto,
669        }
670    }
671}
672
673#[cfg(feature = "vector_index")]
674impl VectorIndexConfig {
675    pub fn mem_threshold_on_create(&self) -> Option<usize> {
676        match self.mem_threshold_on_create {
677            MemoryThreshold::Auto => {
678                if let Some(sys_memory) = get_total_memory_readable() {
679                    Some((sys_memory / INDEX_CREATE_MEM_THRESHOLD_FACTOR).as_bytes() as usize)
680                } else {
681                    Some(ReadableSize::mb(64).as_bytes() as usize)
682                }
683            }
684            MemoryThreshold::Unlimited => None,
685            MemoryThreshold::Size(size) => Some(size.as_bytes() as usize),
686        }
687    }
688}
689
690/// Divide cpu num by a non-zero `divisor` and returns at least 1.
691fn divide_num_cpus(divisor: usize) -> usize {
692    debug_assert!(divisor > 0);
693    let cores = get_total_cpu_cores();
694    debug_assert!(cores > 0);
695
696    cores.div_ceil(divisor)
697}