Skip to main content

metric_engine/engine/
options.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Specific options for the metric engine to create or open a region.
16
17use std::collections::HashMap;
18
19use store_api::metric_engine_consts::{
20    MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING,
21    METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
22    METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT,
23    METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
24    METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT, METRIC_ENGINE_INDEX_TYPE_OPTION,
25    PRIMARY_KEY_ENCODING,
26};
27use store_api::mito_engine_options::{COMPACTION_TYPE, COMPACTION_TYPE_TWCS, TWCS_TIME_WINDOW};
28
29/// Prefix for legacy `memtable.partition_tree.*` option keys. These keys are
30/// silently dropped by the metric engine; the partition tree memtable is gone.
31const LEGACY_PARTITION_TREE_OPTION_PREFIX: &str = "memtable.partition_tree.";
32
33use crate::error::{Error, ParseRegionOptionsSnafu, Result};
34
35/// The empirical value for the seg row count of the metric data region.
36/// Compared to the mito engine, the pattern of the metric engine constructs smaller indices.
37/// Therefore, compared to the default seg row count of 1024, by adjusting it to a smaller
38/// value and appropriately increasing the size of the index, it results in an improved indexing effect.
39const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256;
40
41/// The default compaction time window for metric engine data regions.
42const DEFAULT_DATA_REGION_COMPACTION_TIME_WINDOW: &str = "1d";
43
44/// Physical region options.
45#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct PhysicalRegionOptions {
47    pub index: IndexOptions,
48}
49
50/// Index options for auto created columns
51#[derive(Debug, Clone, Copy, Default, PartialEq)]
52pub enum IndexOptions {
53    #[default]
54    None,
55    Inverted,
56    Skipping {
57        granularity: u32,
58        false_positive_rate: f64,
59    },
60}
61
62/// Sets data region specific options.
63pub fn set_data_region_options(
64    options: &mut HashMap<String, String>,
65    sparse_primary_key_encoding_if_absent: bool,
66) {
67    options.remove(METRIC_ENGINE_INDEX_TYPE_OPTION);
68    options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION);
69    options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION);
70    options.insert(
71        "index.inverted_index.segment_row_count".to_string(),
72        SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
73    );
74
75    // Extract primary key encoding from the legacy nested key before dropping
76    // all `memtable.partition_tree.*` keys.
77    let legacy_encoding = options.remove(MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING);
78    options.retain(|k, _| !k.starts_with(LEGACY_PARTITION_TREE_OPTION_PREFIX));
79
80    // Set memtable options for the data region. Bulk memtable produces
81    // flat-encoded ranges, so the SST format must be flat to match.
82    options.insert("memtable.type".to_string(), "bulk".to_string());
83    options.insert("sst_format".to_string(), "flat".to_string());
84
85    // Decide the top-level primary key encoding: caller-supplied top-level key wins,
86    // then extracted legacy value, then the `sparse` default if requested.
87    if !options.contains_key(PRIMARY_KEY_ENCODING) {
88        if let Some(encoding) = legacy_encoding {
89            options.insert(PRIMARY_KEY_ENCODING.to_string(), encoding);
90        } else if sparse_primary_key_encoding_if_absent {
91            options.insert(PRIMARY_KEY_ENCODING.to_string(), "sparse".to_string());
92        }
93    }
94
95    if !options.contains_key(TWCS_TIME_WINDOW) {
96        options.insert(
97            COMPACTION_TYPE.to_string(),
98            COMPACTION_TYPE_TWCS.to_string(),
99        );
100        options.insert(
101            TWCS_TIME_WINDOW.to_string(),
102            DEFAULT_DATA_REGION_COMPACTION_TIME_WINDOW.to_string(),
103        );
104    }
105}
106
107impl TryFrom<&HashMap<String, String>> for PhysicalRegionOptions {
108    type Error = Error;
109
110    fn try_from(value: &HashMap<String, String>) -> Result<Self> {
111        let index = match value
112            .get(METRIC_ENGINE_INDEX_TYPE_OPTION)
113            .map(|s| s.to_lowercase())
114        {
115            Some(ref index_type) if index_type == "inverted" => Ok(IndexOptions::Inverted),
116            Some(ref index_type) if index_type == "skipping" => {
117                let granularity = value
118                    .get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION)
119                    .map_or(
120                        Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT),
121                        |g| {
122                            g.parse().map_err(|_| {
123                                ParseRegionOptionsSnafu {
124                                    reason: format!("Invalid granularity: {}", g),
125                                }
126                                .build()
127                            })
128                        },
129                    )?;
130                let false_positive_rate = value
131                    .get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION)
132                    .map_or(
133                        Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT),
134                        |f| {
135                            f.parse().ok().filter(|f| *f > 0.0 && *f <= 1.0).ok_or(
136                                ParseRegionOptionsSnafu {
137                                    reason: format!("Invalid false positive rate: {}", f),
138                                }
139                                .build(),
140                            )
141                        },
142                    )?;
143                Ok(IndexOptions::Skipping {
144                    granularity,
145                    false_positive_rate,
146                })
147            }
148            Some(index_type) => ParseRegionOptionsSnafu {
149                reason: format!("Invalid index type: {}", index_type),
150            }
151            .fail(),
152            None => Ok(IndexOptions::default()),
153        }?;
154
155        Ok(PhysicalRegionOptions { index })
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    #[test]
164    fn test_set_data_region_options_should_remove_metric_engine_options() {
165        let mut options = HashMap::new();
166        options.insert(
167            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
168            "inverted".to_string(),
169        );
170        options.insert(
171            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
172            "102400".to_string(),
173        );
174        options.insert(
175            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
176            "0.01".to_string(),
177        );
178        set_data_region_options(&mut options, false);
179
180        for key in [
181            METRIC_ENGINE_INDEX_TYPE_OPTION,
182            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
183            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
184        ] {
185            assert_eq!(options.get(key), None);
186        }
187    }
188
189    #[test]
190    fn test_deserialize_physical_region_options_from_hashmap() {
191        let mut options = HashMap::new();
192        options.insert(
193            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
194            "inverted".to_string(),
195        );
196        options.insert(
197            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
198            "102400".to_string(),
199        );
200        let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap();
201        assert_eq!(physical_region_options.index, IndexOptions::Inverted);
202
203        let mut options = HashMap::new();
204        options.insert(
205            METRIC_ENGINE_INDEX_TYPE_OPTION.to_string(),
206            "skipping".to_string(),
207        );
208        options.insert(
209            METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
210            "102400".to_string(),
211        );
212        options.insert(
213            METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
214            "0.01".to_string(),
215        );
216        let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap();
217        assert_eq!(
218            physical_region_options.index,
219            IndexOptions::Skipping {
220                granularity: 102400,
221                false_positive_rate: 0.01,
222            }
223        );
224    }
225
226    #[test]
227    fn test_set_data_region_options_default_compaction_time_window() {
228        // Test that default time window is set when not specified
229        let mut options = HashMap::new();
230        set_data_region_options(&mut options, false);
231
232        assert_eq!(options.get("memtable.type"), Some(&"bulk".to_string()));
233        assert_eq!(options.get("sst_format"), Some(&"flat".to_string()));
234        assert_eq!(
235            options.get(COMPACTION_TYPE),
236            Some(&COMPACTION_TYPE_TWCS.to_string())
237        );
238        assert_eq!(options.get(TWCS_TIME_WINDOW), Some(&"1d".to_string()));
239    }
240
241    #[test]
242    fn test_set_data_region_options_sparse_primary_key_encoding() {
243        let mut options = HashMap::new();
244        set_data_region_options(&mut options, true);
245
246        assert_eq!(options.get("memtable.type"), Some(&"bulk".to_string()));
247        assert_eq!(options.get("sst_format"), Some(&"flat".to_string()));
248        assert_eq!(
249            options.get(PRIMARY_KEY_ENCODING),
250            Some(&"sparse".to_string())
251        );
252        assert!(!options.contains_key(MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING));
253    }
254
255    #[test]
256    fn test_set_data_region_options_migrates_legacy_partition_tree_options() {
257        let mut options = HashMap::new();
258        options.insert("memtable.type".to_string(), "partition_tree".to_string());
259        options.insert(
260            MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING.to_string(),
261            "sparse".to_string(),
262        );
263        options.insert(
264            "memtable.partition_tree.index_max_keys_per_shard".to_string(),
265            "2048".to_string(),
266        );
267        set_data_region_options(&mut options, false);
268
269        assert_eq!(options.get("memtable.type"), Some(&"bulk".to_string()));
270        assert_eq!(options.get("sst_format"), Some(&"flat".to_string()));
271        assert_eq!(
272            options.get(PRIMARY_KEY_ENCODING),
273            Some(&"sparse".to_string())
274        );
275        // All legacy partition-tree-specific keys should be stripped.
276        assert!(!options.contains_key(MEMTABLE_PARTITION_TREE_PRIMARY_KEY_ENCODING));
277        assert!(!options.contains_key("memtable.partition_tree.index_max_keys_per_shard"));
278    }
279
280    #[test]
281    fn test_set_data_region_options_preserves_existing_top_level_encoding() {
282        let mut options = HashMap::new();
283        options.insert(PRIMARY_KEY_ENCODING.to_string(), "dense".to_string());
284        // Sparse flag is on but caller already specified dense.
285        set_data_region_options(&mut options, true);
286
287        assert_eq!(
288            options.get(PRIMARY_KEY_ENCODING),
289            Some(&"dense".to_string())
290        );
291    }
292
293    #[test]
294    fn test_set_data_region_options_respects_user_compaction_time_window() {
295        // Test that user-specified time window is preserved
296        let mut options = HashMap::new();
297        options.insert(TWCS_TIME_WINDOW.to_string(), "2h".to_string());
298        options.insert(COMPACTION_TYPE.to_string(), "twcs".to_string());
299        set_data_region_options(&mut options, false);
300
301        // User's time window should be preserved
302        assert_eq!(options.get(TWCS_TIME_WINDOW), Some(&"2h".to_string()));
303    }
304}