Skip to main content

metric_engine/engine/
state.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Internal states of metric engine
16
17use std::collections::{HashMap, HashSet};
18
19use api::v1::SemanticType;
20use common_telemetry::warn;
21use common_time::timestamp::TimeUnit;
22use snafu::OptionExt;
23use store_api::codec::PrimaryKeyEncoding;
24use store_api::metadata::ColumnMetadata;
25use store_api::storage::RegionId;
26
27use crate::engine::options::PhysicalRegionOptions;
28use crate::error::{PhysicalRegionNotFoundSnafu, Result};
29use crate::metrics::LOGICAL_REGION_COUNT;
30use crate::utils::to_data_region_id;
31
32pub struct PhysicalRegionState {
33    logical_regions: HashSet<RegionId>,
34    physical_columns: HashMap<String, ColumnMetadata>,
35    /// Name of the time index column, cached at region load so that the write
36    /// path doesn't have to scan `physical_columns` for the timestamp on every
37    /// row batch. The time index is fixed at region creation and never
38    /// changes, so this stays in sync with `physical_columns`.
39    time_index_column_name: String,
40    /// Name of the field column. Metric regions have exactly one field column
41    /// verified at creation time, so the write path can validate completeness
42    /// without consulting per-logical-region metadata.
43    field_column_name: String,
44    primary_key_encoding: PrimaryKeyEncoding,
45    options: PhysicalRegionOptions,
46    time_index_unit: TimeUnit,
47}
48
49impl PhysicalRegionState {
50    pub fn new(
51        physical_columns: HashMap<String, ColumnMetadata>,
52        primary_key_encoding: PrimaryKeyEncoding,
53        options: PhysicalRegionOptions,
54        time_index_unit: TimeUnit,
55    ) -> Self {
56        // Safety: a valid physical region always has exactly one time index
57        // column; callers validate this before reaching here (see
58        // `create_data_region_request` and the open path).
59        let time_index_column_name = physical_columns
60            .iter()
61            .find(|(_, meta)| meta.semantic_type == SemanticType::Timestamp)
62            .map(|(name, _)| name.clone())
63            .unwrap_or_default();
64        let field_column_name = physical_columns
65            .iter()
66            .find(|(_, meta)| meta.semantic_type == SemanticType::Field)
67            .map(|(name, _)| name.clone())
68            .unwrap_or_default();
69        Self {
70            logical_regions: HashSet::new(),
71            physical_columns,
72            time_index_column_name,
73            field_column_name,
74            primary_key_encoding,
75            options,
76            time_index_unit,
77        }
78    }
79
80    /// Returns a reference to the logical region ids.
81    pub fn logical_regions(&self) -> &HashSet<RegionId> {
82        &self.logical_regions
83    }
84
85    /// Returns a reference to the physical columns.
86    pub fn physical_columns(&self) -> &HashMap<String, ColumnMetadata> {
87        &self.physical_columns
88    }
89
90    /// Returns the cached name of the time index column.
91    pub fn time_index_column_name(&self) -> &str {
92        &self.time_index_column_name
93    }
94
95    /// Returns the cached name of the field column.
96    pub fn field_column_name(&self) -> &str {
97        &self.field_column_name
98    }
99
100    /// Returns a reference to the physical region options.
101    pub fn options(&self) -> &PhysicalRegionOptions {
102        &self.options
103    }
104
105    /// Removes a logical region id from the physical region state.
106    /// Returns true if the logical region id was present.
107    pub fn remove_logical_region(&mut self, logical_region_id: RegionId) -> bool {
108        self.logical_regions.remove(&logical_region_id)
109    }
110}
111
112/// Internal states of metric engine
113#[derive(Default)]
114pub(crate) struct MetricEngineState {
115    /// Physical regions states.
116    physical_regions: HashMap<RegionId, PhysicalRegionState>,
117    /// Mapping from logical region id to physical region id.
118    logical_regions: HashMap<RegionId, RegionId>,
119    /// Cache for the column metadata of logical regions.
120    /// The column order is the same with the order in the metadata, which is
121    /// alphabetically ordered on column name.
122    logical_columns: HashMap<RegionId, Vec<ColumnMetadata>>,
123}
124
125impl MetricEngineState {
126    pub fn add_physical_region(
127        &mut self,
128        physical_region_id: RegionId,
129        physical_columns: HashMap<String, ColumnMetadata>,
130        primary_key_encoding: PrimaryKeyEncoding,
131        options: PhysicalRegionOptions,
132        time_index_unit: TimeUnit,
133    ) {
134        let physical_region_id = to_data_region_id(physical_region_id);
135        self.physical_regions.insert(
136            physical_region_id,
137            PhysicalRegionState::new(
138                physical_columns,
139                primary_key_encoding,
140                options,
141                time_index_unit,
142            ),
143        );
144    }
145
146    /// # Panic
147    /// if the physical region does not exist
148    pub fn add_physical_columns(
149        &mut self,
150        physical_region_id: RegionId,
151        physical_columns: impl IntoIterator<Item = (String, ColumnMetadata)>,
152    ) {
153        let physical_region_id = to_data_region_id(physical_region_id);
154        let state = self.physical_regions.get_mut(&physical_region_id).unwrap();
155        for (col, meta) in physical_columns {
156            // The time index is fixed at region creation and alter cannot add
157            // a new one; keep the cached name in sync defensively.
158            debug_assert_ne!(
159                meta.semantic_type,
160                SemanticType::Timestamp,
161                "unexpected time index column {col} added to an existing physical region"
162            );
163            if meta.semantic_type == SemanticType::Field {
164                warn!(
165                    "Unexpected field column {col} added to physical region {physical_region_id}; cached field column remains {}",
166                    state.field_column_name
167                );
168            }
169            state.physical_columns.insert(col, meta);
170        }
171    }
172
173    /// # Panic
174    /// if the physical region does not exist
175    pub fn add_logical_regions(
176        &mut self,
177        physical_region_id: RegionId,
178        logical_region_ids: impl IntoIterator<Item = RegionId>,
179    ) {
180        let physical_region_id = to_data_region_id(physical_region_id);
181        let state = self.physical_regions.get_mut(&physical_region_id).unwrap();
182        for logical_region_id in logical_region_ids {
183            state.logical_regions.insert(logical_region_id);
184            self.logical_regions
185                .insert(logical_region_id, physical_region_id);
186        }
187    }
188
189    pub fn invalid_logical_regions_cache(
190        &mut self,
191        logical_region_ids: impl IntoIterator<Item = RegionId>,
192    ) {
193        for logical_region_id in logical_region_ids {
194            self.logical_columns.remove(&logical_region_id);
195        }
196    }
197
198    /// # Panic
199    /// if the physical region does not exist
200    pub fn add_logical_region(
201        &mut self,
202        physical_region_id: RegionId,
203        logical_region_id: RegionId,
204    ) {
205        let physical_region_id = to_data_region_id(physical_region_id);
206        self.physical_regions
207            .get_mut(&physical_region_id)
208            .unwrap()
209            .logical_regions
210            .insert(logical_region_id);
211        self.logical_regions
212            .insert(logical_region_id, physical_region_id);
213    }
214
215    /// Replace the logical columns of the logical region with given columns.
216    pub fn set_logical_columns(
217        &mut self,
218        logical_region_id: RegionId,
219        columns: Vec<ColumnMetadata>,
220    ) {
221        self.logical_columns.insert(logical_region_id, columns);
222    }
223
224    pub fn get_physical_region_id(&self, logical_region_id: RegionId) -> Option<RegionId> {
225        self.logical_regions.get(&logical_region_id).copied()
226    }
227
228    pub fn logical_columns(&self) -> &HashMap<RegionId, Vec<ColumnMetadata>> {
229        &self.logical_columns
230    }
231
232    pub fn physical_region_states(&self) -> &HashMap<RegionId, PhysicalRegionState> {
233        &self.physical_regions
234    }
235
236    pub fn exist_physical_region(&self, physical_region_id: RegionId) -> bool {
237        self.physical_regions.contains_key(&physical_region_id)
238    }
239
240    pub fn physical_region_time_index_unit(
241        &self,
242        physical_region_id: RegionId,
243    ) -> Option<TimeUnit> {
244        self.physical_regions
245            .get(&physical_region_id)
246            .map(|state| state.time_index_unit)
247    }
248
249    pub fn get_primary_key_encoding(
250        &self,
251        physical_region_id: RegionId,
252    ) -> Option<PrimaryKeyEncoding> {
253        self.physical_regions
254            .get(&physical_region_id)
255            .map(|state| state.primary_key_encoding)
256    }
257
258    pub fn logical_regions(&self) -> &HashMap<RegionId, RegionId> {
259        &self.logical_regions
260    }
261
262    /// Remove all data that are related to the physical region id.
263    pub fn remove_physical_region(&mut self, physical_region_id: RegionId) -> Result<()> {
264        let physical_region_id = to_data_region_id(physical_region_id);
265
266        let logical_regions = &self
267            .physical_regions
268            .get(&physical_region_id)
269            .context(PhysicalRegionNotFoundSnafu {
270                region_id: physical_region_id,
271            })?
272            .logical_regions;
273
274        LOGICAL_REGION_COUNT.sub(logical_regions.len() as i64);
275
276        for logical_region in logical_regions {
277            self.logical_regions.remove(logical_region);
278        }
279        self.physical_regions.remove(&physical_region_id);
280        Ok(())
281    }
282
283    /// Remove all data that are related to the logical region id.
284    pub fn remove_logical_region(&mut self, logical_region_id: RegionId) -> Result<()> {
285        let physical_region_id = self.logical_regions.remove(&logical_region_id).context(
286            PhysicalRegionNotFoundSnafu {
287                region_id: logical_region_id,
288            },
289        )?;
290
291        self.physical_regions
292            .get_mut(&physical_region_id)
293            .unwrap() // Safety: physical_region_id is got from physical_regions
294            .remove_logical_region(logical_region_id);
295
296        self.logical_columns.remove(&logical_region_id);
297
298        Ok(())
299    }
300
301    pub fn is_logical_region_exist(&self, logical_region_id: RegionId) -> bool {
302        self.logical_regions().contains_key(&logical_region_id)
303    }
304}