Skip to main content

metric_engine/engine/
create.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15mod extract_new_columns;
16
17use std::collections::{HashMap, HashSet};
18
19use api::v1::SemanticType;
20use common_telemetry::info;
21use common_time::{FOREVER, Timestamp};
22use datatypes::data_type::ConcreteDataType;
23use datatypes::schema::{ColumnSchema, SkippingIndexOptions};
24use datatypes::value::Value;
25use mito2::engine::MITO_ENGINE_NAME;
26use snafu::{OptionExt, ResultExt, ensure};
27use store_api::metadata::ColumnMetadata;
28use store_api::metric_engine_consts::{
29    ALTER_PHYSICAL_EXTENSION_KEY, DATA_REGION_SUBDIR, DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
30    DATA_SCHEMA_TSID_COLUMN_NAME, LOGICAL_TABLE_METADATA_KEY, METADATA_REGION_SUBDIR,
31    METADATA_SCHEMA_KEY_COLUMN_INDEX, METADATA_SCHEMA_KEY_COLUMN_NAME,
32    METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX, METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
33    METADATA_SCHEMA_VALUE_COLUMN_INDEX, METADATA_SCHEMA_VALUE_COLUMN_NAME,
34    is_metric_engine_internal_column,
35};
36use store_api::mito_engine_options::{TTL_KEY, WAL_OPTIONS_KEY};
37use store_api::region_engine::RegionEngine;
38use store_api::region_request::{AffectedRows, PathType, RegionCreateRequest, RegionRequest};
39use store_api::storage::RegionId;
40use store_api::storage::consts::ReservedColumnId;
41
42use crate::engine::MetricEngineInner;
43use crate::engine::create::extract_new_columns::extract_new_columns;
44use crate::engine::options::{PhysicalRegionOptions, set_data_region_options};
45use crate::error::{
46    ColumnTypeMismatchSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu,
47    InternalColumnOccupiedSnafu, InvalidMetadataSnafu, MissingRegionOptionSnafu,
48    MultipleFieldColumnSnafu, NoFieldColumnSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu,
49    Result, SerializeColumnMetadataSnafu, UnexpectedRequestSnafu,
50};
51use crate::metrics::PHYSICAL_REGION_COUNT;
52use crate::utils::{
53    self, append_manifest_info, encode_manifest_info_to_extensions, to_data_region_id,
54    to_metadata_region_id,
55};
56
57const DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY: u32 = 1024;
58const DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE: f64 = 0.01;
59
60impl MetricEngineInner {
61    pub async fn create_regions(
62        &self,
63        mut requests: Vec<(RegionId, RegionCreateRequest)>,
64        extension_return_value: &mut HashMap<String, Vec<u8>>,
65    ) -> Result<AffectedRows> {
66        if requests.is_empty() {
67            return Ok(0);
68        }
69
70        for (_, request) in requests.iter() {
71            Self::verify_region_create_request(request)?;
72        }
73
74        let first_request = &requests.first().unwrap().1;
75        if first_request.is_physical_table() {
76            ensure!(
77                requests.len() == 1,
78                UnexpectedRequestSnafu {
79                    reason: "Physical table must be created with single request".to_string(),
80                }
81            );
82            let (region_id, request) = requests.pop().unwrap();
83            self.create_physical_region(region_id, request, extension_return_value)
84                .await?;
85
86            return Ok(0);
87        } else if first_request
88            .options
89            .contains_key(LOGICAL_TABLE_METADATA_KEY)
90        {
91            if requests.len() == 1 {
92                let request = &requests.first().unwrap().1;
93                let physical_region_id = parse_physical_region_id(request)?;
94                let mut manifest_infos = Vec::with_capacity(1);
95                self.create_logical_regions(physical_region_id, requests, extension_return_value)
96                    .await?;
97                append_manifest_info(&self.mito, physical_region_id, &mut manifest_infos);
98                encode_manifest_info_to_extensions(&manifest_infos, extension_return_value)?;
99            } else {
100                let grouped_requests =
101                    group_create_logical_region_requests_by_physical_region_id(requests)?;
102                let mut manifest_infos = Vec::with_capacity(grouped_requests.len());
103                for (physical_region_id, requests) in grouped_requests {
104                    self.create_logical_regions(
105                        physical_region_id,
106                        requests,
107                        extension_return_value,
108                    )
109                    .await?;
110                    append_manifest_info(&self.mito, physical_region_id, &mut manifest_infos);
111                }
112                encode_manifest_info_to_extensions(&manifest_infos, extension_return_value)?;
113            }
114        } else {
115            return MissingRegionOptionSnafu {}.fail();
116        }
117
118        Ok(0)
119    }
120
121    /// Initialize a physical metric region at given region id.
122    async fn create_physical_region(
123        &self,
124        region_id: RegionId,
125        request: RegionCreateRequest,
126        extension_return_value: &mut HashMap<String, Vec<u8>>,
127    ) -> Result<()> {
128        let physical_region_options = PhysicalRegionOptions::try_from(&request.options)?;
129        let (data_region_id, metadata_region_id) = Self::transform_region_id(region_id);
130
131        // create metadata region
132        let create_metadata_region_request = self.create_request_for_metadata_region(&request);
133        self.mito
134            .handle_request(
135                metadata_region_id,
136                RegionRequest::Create(create_metadata_region_request),
137            )
138            .await
139            .with_context(|_| CreateMitoRegionSnafu {
140                region_type: METADATA_REGION_SUBDIR,
141            })?;
142
143        // create data region
144        let create_data_region_request = self.create_request_for_data_region(&request);
145        let physical_columns = create_data_region_request
146            .column_metadatas
147            .iter()
148            .map(|metadata| (metadata.column_schema.name.clone(), metadata.clone()))
149            .collect::<HashMap<_, _>>();
150        let time_index_unit = create_data_region_request
151            .column_metadatas
152            .iter()
153            .find_map(|metadata| {
154                if metadata.semantic_type == SemanticType::Timestamp {
155                    metadata
156                        .column_schema
157                        .data_type
158                        .as_timestamp()
159                        .map(|data_type| data_type.unit())
160                } else {
161                    None
162                }
163            })
164            .context(UnexpectedRequestSnafu {
165                reason: "No time index column found",
166            })?;
167        let response = self
168            .mito
169            .handle_request(
170                data_region_id,
171                RegionRequest::Create(create_data_region_request),
172            )
173            .await
174            .with_context(|_| CreateMitoRegionSnafu {
175                region_type: DATA_REGION_SUBDIR,
176            })?;
177        let primary_key_encoding = self.mito.get_primary_key_encoding(data_region_id).context(
178            PhysicalRegionNotFoundSnafu {
179                region_id: data_region_id,
180            },
181        )?;
182        extension_return_value.extend(response.extensions);
183
184        info!(
185            "Created physical metric region {region_id}, primary key encoding={primary_key_encoding}, physical_region_options={physical_region_options:?}"
186        );
187        PHYSICAL_REGION_COUNT.inc();
188
189        // remember this table
190        self.state.write().unwrap().add_physical_region(
191            data_region_id,
192            physical_columns,
193            primary_key_encoding,
194            physical_region_options,
195            time_index_unit,
196        );
197
198        Ok(())
199    }
200
201    /// Create multiple logical regions on the same physical region.
202    async fn create_logical_regions(
203        &self,
204        physical_region_id: RegionId,
205        requests: Vec<(RegionId, RegionCreateRequest)>,
206        extension_return_value: &mut HashMap<String, Vec<u8>>,
207    ) -> Result<()> {
208        let data_region_id = utils::to_data_region_id(physical_region_id);
209
210        let unit = self
211            .state
212            .read()
213            .unwrap()
214            .physical_region_time_index_unit(physical_region_id)
215            .context(PhysicalRegionNotFoundSnafu {
216                region_id: data_region_id,
217            })?;
218        // Checks the time index unit of each request.
219        for (_, request) in &requests {
220            // Safety: verify_region_create_request() ensures that the request is valid.
221            let time_index_column = request
222                .column_metadatas
223                .iter()
224                .find(|col| col.semantic_type == SemanticType::Timestamp)
225                .unwrap();
226            let request_unit = time_index_column
227                .column_schema
228                .data_type
229                .as_timestamp()
230                .unwrap()
231                .unit();
232            ensure!(
233                request_unit == unit,
234                UnexpectedRequestSnafu {
235                    reason: format!(
236                        "Metric has differenttime unit ({:?}) than the physical region ({:?})",
237                        request_unit, unit
238                    ),
239                }
240            );
241        }
242
243        // Filters out the requests that the logical region already exists
244        let requests = {
245            let state = self.state.read().unwrap();
246            let mut skipped = Vec::with_capacity(requests.len());
247            let mut kept_requests = Vec::with_capacity(requests.len());
248
249            for (region_id, request) in requests {
250                if state.is_logical_region_exist(region_id) {
251                    skipped.push(region_id);
252                } else {
253                    kept_requests.push((region_id, request));
254                }
255            }
256
257            // log skipped regions
258            if !skipped.is_empty() {
259                info!(
260                    "Skipped creating logical regions {skipped:?} because they already exist",
261                    skipped = skipped
262                );
263            }
264            kept_requests
265        };
266
267        // Finds new columns to add to physical region
268        let mut new_column_names = HashSet::new();
269        let mut new_columns = Vec::new();
270
271        let index_option = {
272            let state = &self.state.read().unwrap();
273            let region_state = state
274                .physical_region_states()
275                .get(&data_region_id)
276                .with_context(|| PhysicalRegionNotFoundSnafu {
277                    region_id: data_region_id,
278                })?;
279            let physical_columns = region_state.physical_columns();
280
281            extract_new_columns(
282                &requests,
283                physical_columns,
284                &mut new_column_names,
285                &mut new_columns,
286            )?;
287            region_state.options().index
288        };
289
290        // TODO(weny): we dont need to pass a mutable new_columns here.
291        self.data_region
292            .add_columns(data_region_id, new_columns, index_option)
293            .await?;
294
295        let physical_columns = self.data_region.physical_columns(data_region_id).await?;
296        let physical_schema_map = physical_columns
297            .iter()
298            .map(|metadata| (metadata.column_schema.name.as_str(), metadata))
299            .collect::<HashMap<_, _>>();
300        let logical_regions = requests
301            .iter()
302            .map(|(region_id, _)| *region_id)
303            .collect::<Vec<_>>();
304        let logical_region_columns = requests.iter().map(|(region_id, request)| {
305            (
306                *region_id,
307                request
308                    .column_metadatas
309                    .iter()
310                    .map(|metadata| {
311                        // Safety: previous steps ensure the physical region exist
312                        let column_metadata = *physical_schema_map
313                            .get(metadata.column_schema.name.as_str())
314                            .unwrap();
315                        (metadata.column_schema.name.as_str(), column_metadata)
316                    })
317                    .collect::<HashMap<_, _>>(),
318            )
319        });
320
321        let new_add_columns = new_column_names.iter().map(|name| {
322            // Safety: previous steps ensure the physical region exist
323            let column_metadata = *physical_schema_map.get(name).unwrap();
324            (name.to_string(), column_metadata.clone())
325        });
326
327        extension_return_value.insert(
328            ALTER_PHYSICAL_EXTENSION_KEY.to_string(),
329            ColumnMetadata::encode_list(&physical_columns).context(SerializeColumnMetadataSnafu)?,
330        );
331
332        // Writes logical regions metadata to metadata region
333        self.metadata_region
334            .add_logical_regions(physical_region_id, true, logical_region_columns)
335            .await?;
336
337        {
338            let mut state = self.state.write().unwrap();
339            state.add_physical_columns(data_region_id, new_add_columns);
340            state.add_logical_regions(physical_region_id, logical_regions.clone());
341        }
342        for logical_region_id in logical_regions {
343            self.metadata_region
344                .open_logical_region(logical_region_id)
345                .await;
346        }
347
348        Ok(())
349    }
350
351    /// Check if
352    /// - internal columns are not occupied
353    /// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or
354    ///   [LOGICAL_TABLE_METADATA_KEY])
355    fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> {
356        request.validate().context(InvalidMetadataSnafu)?;
357
358        let name_to_index = request
359            .column_metadatas
360            .iter()
361            .enumerate()
362            .map(|(idx, metadata)| (metadata.column_schema.name.clone(), idx))
363            .collect::<HashMap<String, usize>>();
364
365        let table_id_col_def = request.column_metadatas.iter().any(is_metric_name_col);
366        let tsid_col_def = request.column_metadatas.iter().any(is_tsid_col);
367
368        // check if internal columns are not occupied or defined in the request
369        ensure!(
370            !name_to_index.contains_key(DATA_SCHEMA_TABLE_ID_COLUMN_NAME) || table_id_col_def,
371            InternalColumnOccupiedSnafu {
372                column: DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
373            }
374        );
375        ensure!(
376            !name_to_index.contains_key(DATA_SCHEMA_TSID_COLUMN_NAME) || tsid_col_def,
377            InternalColumnOccupiedSnafu {
378                column: DATA_SCHEMA_TSID_COLUMN_NAME,
379            }
380        );
381
382        // check if required table option is present
383        ensure!(
384            request.is_physical_table() || request.options.contains_key(LOGICAL_TABLE_METADATA_KEY),
385            MissingRegionOptionSnafu {}
386        );
387        ensure!(
388            !(request.is_physical_table()
389                && request.options.contains_key(LOGICAL_TABLE_METADATA_KEY)),
390            ConflictRegionOptionSnafu {}
391        );
392
393        // check if only one field column is declared, and all tag columns are string
394        let mut field_col: Option<&ColumnMetadata> = None;
395        for col in &request.column_metadatas {
396            // Verified in above steps.
397            if is_metric_engine_internal_column(&col.column_schema.name) {
398                continue;
399            }
400            match col.semantic_type {
401                SemanticType::Tag => ensure!(
402                    col.column_schema.data_type == ConcreteDataType::string_datatype(),
403                    ColumnTypeMismatchSnafu {
404                        expect: ConcreteDataType::string_datatype(),
405                        actual: col.column_schema.data_type.clone(),
406                    }
407                ),
408                SemanticType::Field => {
409                    if let Some(field_col) = field_col {
410                        MultipleFieldColumnSnafu {
411                            previous: field_col.column_schema.name.clone(),
412                            current: col.column_schema.name.clone(),
413                        }
414                        .fail()?;
415                    }
416                    field_col = Some(col)
417                }
418                SemanticType::Timestamp => {}
419            }
420        }
421        let field_col = field_col.context(NoFieldColumnSnafu)?;
422
423        // make sure the field column is float64 type
424        ensure!(
425            field_col.column_schema.data_type == ConcreteDataType::float64_datatype(),
426            ColumnTypeMismatchSnafu {
427                expect: ConcreteDataType::float64_datatype(),
428                actual: field_col.column_schema.data_type.clone(),
429            }
430        );
431
432        Ok(())
433    }
434
435    /// Build data region id and metadata region id from the given region id.
436    ///
437    /// Return value: (data_region_id, metadata_region_id)
438    fn transform_region_id(region_id: RegionId) -> (RegionId, RegionId) {
439        (
440            to_data_region_id(region_id),
441            to_metadata_region_id(region_id),
442        )
443    }
444
445    /// Build [RegionCreateRequest] for metadata region
446    ///
447    /// This method will append [METADATA_REGION_SUBDIR] to the given `region_dir`.
448    pub fn create_request_for_metadata_region(
449        &self,
450        request: &RegionCreateRequest,
451    ) -> RegionCreateRequest {
452        // ts TIME INDEX DEFAULT 0
453        let timestamp_column_metadata = ColumnMetadata {
454            column_id: METADATA_SCHEMA_TIMESTAMP_COLUMN_INDEX as _,
455            semantic_type: SemanticType::Timestamp,
456            column_schema: ColumnSchema::new(
457                METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
458                ConcreteDataType::timestamp_millisecond_datatype(),
459                false,
460            )
461            .with_default_constraint(Some(datatypes::schema::ColumnDefaultConstraint::Value(
462                Value::Timestamp(Timestamp::new_millisecond(0)),
463            )))
464            .unwrap(),
465        };
466        // key STRING PRIMARY KEY
467        let key_column_metadata = ColumnMetadata {
468            column_id: METADATA_SCHEMA_KEY_COLUMN_INDEX as _,
469            semantic_type: SemanticType::Tag,
470            column_schema: ColumnSchema::new(
471                METADATA_SCHEMA_KEY_COLUMN_NAME,
472                ConcreteDataType::string_datatype(),
473                false,
474            ),
475        };
476        // val STRING
477        let value_column_metadata = ColumnMetadata {
478            column_id: METADATA_SCHEMA_VALUE_COLUMN_INDEX as _,
479            semantic_type: SemanticType::Field,
480            column_schema: ColumnSchema::new(
481                METADATA_SCHEMA_VALUE_COLUMN_NAME,
482                ConcreteDataType::string_datatype(),
483                true,
484            ),
485        };
486
487        let options = region_options_for_metadata_region(&request.options);
488        RegionCreateRequest {
489            engine: MITO_ENGINE_NAME.to_string(),
490            column_metadatas: vec![
491                timestamp_column_metadata,
492                key_column_metadata,
493                value_column_metadata,
494            ],
495            primary_key: vec![METADATA_SCHEMA_KEY_COLUMN_INDEX as _],
496            options,
497            table_dir: request.table_dir.clone(),
498            path_type: PathType::Metadata,
499            partition_expr_json: Some("".to_string()),
500            requirements: request.requirements,
501        }
502    }
503
504    /// Convert [RegionCreateRequest] for data region.
505    ///
506    /// All tag columns in the original request will be converted to value columns.
507    /// Those columns real semantic type is stored in metadata region.
508    ///
509    /// This will also add internal columns to the request.
510    pub fn create_request_for_data_region(
511        &self,
512        request: &RegionCreateRequest,
513    ) -> RegionCreateRequest {
514        let mut data_region_request = request.clone();
515        let mut primary_key = vec![ReservedColumnId::table_id(), ReservedColumnId::tsid()];
516
517        data_region_request.table_dir = request.table_dir.clone();
518        data_region_request.path_type = PathType::Data;
519
520        let table_id_col_def = request.column_metadatas.iter().any(is_metric_name_col);
521        let tsid_col_def = request.column_metadatas.iter().any(is_tsid_col);
522
523        // change nullability for tag columns
524        data_region_request
525            .column_metadatas
526            .iter_mut()
527            .for_each(|metadata| {
528                if metadata.semantic_type == SemanticType::Tag
529                    && !is_metric_name_col(metadata)
530                    && !is_tsid_col(metadata)
531                {
532                    metadata.column_schema.set_nullable();
533                    primary_key.push(metadata.column_id);
534                }
535            });
536
537        // add internal columns if not defined in the request
538        if !table_id_col_def {
539            data_region_request.column_metadatas.push(table_id_col());
540        }
541        if !tsid_col_def {
542            data_region_request.column_metadatas.push(tsid_col());
543        }
544        data_region_request.primary_key = primary_key;
545
546        // set data region options
547        set_data_region_options(
548            &mut data_region_request.options,
549            self.config.sparse_primary_key_encoding,
550        );
551
552        data_region_request
553    }
554}
555
556fn table_id_col() -> ColumnMetadata {
557    ColumnMetadata {
558        column_id: ReservedColumnId::table_id(),
559        semantic_type: SemanticType::Tag,
560        column_schema: ColumnSchema::new(
561            DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
562            ConcreteDataType::uint32_datatype(),
563            false,
564        )
565        .with_skipping_options(SkippingIndexOptions::new_unchecked(
566            DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY,
567            DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE,
568            datatypes::schema::SkippingIndexType::BloomFilter,
569        ))
570        .unwrap(),
571    }
572}
573
574fn tsid_col() -> ColumnMetadata {
575    ColumnMetadata {
576        column_id: ReservedColumnId::tsid(),
577        semantic_type: SemanticType::Tag,
578        column_schema: ColumnSchema::new(
579            DATA_SCHEMA_TSID_COLUMN_NAME,
580            ConcreteDataType::uint64_datatype(),
581            false,
582        )
583        .with_inverted_index(false),
584    }
585}
586
587/// Returns true if the column is the metric name column.
588pub(crate) fn is_metric_name_col(column: &ColumnMetadata) -> bool {
589    column.column_id == ReservedColumnId::table_id()
590        && column.semantic_type == SemanticType::Tag
591        && column.column_schema.data_type == ConcreteDataType::uint32_datatype()
592        && column.column_schema.name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME
593        && !column.column_schema.is_nullable()
594}
595
596/// Returns true if the column is the tsid column.
597pub(crate) fn is_tsid_col(column: &ColumnMetadata) -> bool {
598    column.column_id == ReservedColumnId::tsid()
599        && column.semantic_type == SemanticType::Tag
600        && column.column_schema.data_type == ConcreteDataType::uint64_datatype()
601        && column.column_schema.name == DATA_SCHEMA_TSID_COLUMN_NAME
602        && !column.column_schema.is_nullable()
603}
604
605/// Groups the create logical region requests by physical region id.
606fn group_create_logical_region_requests_by_physical_region_id(
607    requests: Vec<(RegionId, RegionCreateRequest)>,
608) -> Result<HashMap<RegionId, Vec<(RegionId, RegionCreateRequest)>>> {
609    let mut result = HashMap::with_capacity(requests.len());
610    for (region_id, request) in requests {
611        let physical_region_id = parse_physical_region_id(&request)?;
612        result
613            .entry(physical_region_id)
614            .or_insert_with(Vec::new)
615            .push((region_id, request));
616    }
617
618    Ok(result)
619}
620
621/// Parses the physical region id from the request.
622fn parse_physical_region_id(request: &RegionCreateRequest) -> Result<RegionId> {
623    let physical_region_id_raw = request
624        .options
625        .get(LOGICAL_TABLE_METADATA_KEY)
626        .ok_or(MissingRegionOptionSnafu {}.build())?;
627
628    let physical_region_id: RegionId = physical_region_id_raw
629        .parse::<u64>()
630        .with_context(|_| ParseRegionIdSnafu {
631            raw: physical_region_id_raw,
632        })?
633        .into();
634
635    Ok(physical_region_id)
636}
637
638/// Creates the region options for metadata region in metric engine.
639pub(crate) fn region_options_for_metadata_region(
640    original: &HashMap<String, String>,
641) -> HashMap<String, String> {
642    let mut metadata_region_options = HashMap::new();
643    metadata_region_options.insert(TTL_KEY.to_string(), FOREVER.to_string());
644
645    if let Some(wal_options) = original.get(WAL_OPTIONS_KEY) {
646        metadata_region_options.insert(WAL_OPTIONS_KEY.to_string(), wal_options.clone());
647    }
648
649    metadata_region_options
650}
651
652#[cfg(test)]
653mod test {
654    use common_meta::ddl::test_util::assert_column_name_and_id;
655    use common_meta::ddl::utils::{parse_column_metadatas, parse_manifest_infos_from_extensions};
656    use common_query::prelude::{greptime_timestamp, greptime_value};
657    use store_api::metric_engine_consts::{METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY};
658    use store_api::region_request::{BatchRegionDdlRequest, RegionRequirements};
659
660    use super::*;
661    use crate::config::EngineConfig;
662    use crate::engine::MetricEngine;
663    use crate::test_util::{TestEnv, create_logical_region_request};
664
665    #[test]
666    fn test_internal_column_metadata() {
667        let table_id_col = table_id_col();
668        let tsid_col = tsid_col();
669        assert!(is_metric_name_col(&table_id_col));
670        assert!(is_tsid_col(&tsid_col));
671    }
672
673    #[test]
674    fn test_verify_region_create_request() {
675        // internal column is occupied
676        let request = RegionCreateRequest {
677            column_metadatas: vec![
678                ColumnMetadata {
679                    column_id: 0,
680                    semantic_type: SemanticType::Timestamp,
681                    column_schema: ColumnSchema::new(
682                        METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
683                        ConcreteDataType::timestamp_millisecond_datatype(),
684                        false,
685                    ),
686                },
687                ColumnMetadata {
688                    column_id: 1,
689                    semantic_type: SemanticType::Tag,
690                    column_schema: ColumnSchema::new(
691                        DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
692                        ConcreteDataType::uint32_datatype(),
693                        false,
694                    ),
695                },
696            ],
697            table_dir: "test_dir".to_string(),
698            path_type: PathType::Bare,
699            engine: METRIC_ENGINE_NAME.to_string(),
700            primary_key: vec![],
701            options: HashMap::new(),
702            partition_expr_json: Some("".to_string()),
703            requirements: RegionRequirements::object_storage(),
704        };
705        let result = MetricEngineInner::verify_region_create_request(&request);
706        assert!(result.is_err());
707        assert_eq!(
708            result.unwrap_err().to_string(),
709            "Internal column __table_id is reserved".to_string()
710        );
711
712        // allow reserved internal columns when defined properly
713        let request = RegionCreateRequest {
714            column_metadatas: vec![
715                ColumnMetadata {
716                    column_id: 0,
717                    semantic_type: SemanticType::Timestamp,
718                    column_schema: ColumnSchema::new(
719                        METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
720                        ConcreteDataType::timestamp_millisecond_datatype(),
721                        false,
722                    ),
723                },
724                ColumnMetadata {
725                    column_id: 1,
726                    semantic_type: SemanticType::Tag,
727                    column_schema: ColumnSchema::new(
728                        "column1".to_string(),
729                        ConcreteDataType::string_datatype(),
730                        false,
731                    ),
732                },
733                ColumnMetadata {
734                    column_id: 2,
735                    semantic_type: SemanticType::Field,
736                    column_schema: ColumnSchema::new(
737                        "column2".to_string(),
738                        ConcreteDataType::float64_datatype(),
739                        false,
740                    ),
741                },
742                table_id_col(),
743                tsid_col(),
744            ],
745            table_dir: "test_dir".to_string(),
746            path_type: PathType::Bare,
747            engine: METRIC_ENGINE_NAME.to_string(),
748            primary_key: vec![],
749            options: [(PHYSICAL_TABLE_METADATA_KEY.to_string(), String::new())]
750                .into_iter()
751                .collect(),
752            partition_expr_json: Some("".to_string()),
753            requirements: Default::default(),
754        };
755        MetricEngineInner::verify_region_create_request(&request).unwrap();
756
757        // valid request
758        let request = RegionCreateRequest {
759            column_metadatas: vec![
760                ColumnMetadata {
761                    column_id: 0,
762                    semantic_type: SemanticType::Timestamp,
763                    column_schema: ColumnSchema::new(
764                        METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
765                        ConcreteDataType::timestamp_millisecond_datatype(),
766                        false,
767                    ),
768                },
769                ColumnMetadata {
770                    column_id: 1,
771                    semantic_type: SemanticType::Tag,
772                    column_schema: ColumnSchema::new(
773                        "column1".to_string(),
774                        ConcreteDataType::string_datatype(),
775                        false,
776                    ),
777                },
778                ColumnMetadata {
779                    column_id: 2,
780                    semantic_type: SemanticType::Field,
781                    column_schema: ColumnSchema::new(
782                        "column2".to_string(),
783                        ConcreteDataType::float64_datatype(),
784                        false,
785                    ),
786                },
787            ],
788            table_dir: "test_dir".to_string(),
789            path_type: PathType::Bare,
790            engine: METRIC_ENGINE_NAME.to_string(),
791            primary_key: vec![],
792            options: [(PHYSICAL_TABLE_METADATA_KEY.to_string(), String::new())]
793                .into_iter()
794                .collect(),
795            partition_expr_json: Some("".to_string()),
796            requirements: Default::default(),
797        };
798        MetricEngineInner::verify_region_create_request(&request).unwrap();
799    }
800
801    #[test]
802    fn test_verify_region_create_request_options() {
803        let mut request = RegionCreateRequest {
804            column_metadatas: vec![
805                ColumnMetadata {
806                    column_id: 0,
807                    semantic_type: SemanticType::Timestamp,
808                    column_schema: ColumnSchema::new(
809                        METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
810                        ConcreteDataType::timestamp_millisecond_datatype(),
811                        false,
812                    ),
813                },
814                ColumnMetadata {
815                    column_id: 1,
816                    semantic_type: SemanticType::Field,
817                    column_schema: ColumnSchema::new(
818                        "val".to_string(),
819                        ConcreteDataType::float64_datatype(),
820                        false,
821                    ),
822                },
823            ],
824            table_dir: "test_dir".to_string(),
825            path_type: PathType::Bare,
826            engine: METRIC_ENGINE_NAME.to_string(),
827            primary_key: vec![],
828            options: HashMap::new(),
829            partition_expr_json: Some("".to_string()),
830            requirements: Default::default(),
831        };
832        MetricEngineInner::verify_region_create_request(&request).unwrap_err();
833
834        let mut options = HashMap::new();
835        options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
836        request.options.clone_from(&options);
837        MetricEngineInner::verify_region_create_request(&request).unwrap();
838
839        options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
840        request.options.clone_from(&options);
841        MetricEngineInner::verify_region_create_request(&request).unwrap_err();
842
843        options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap();
844        request.options = options;
845        MetricEngineInner::verify_region_create_request(&request).unwrap();
846    }
847
848    #[tokio::test]
849    async fn test_create_request_for_physical_regions() {
850        // original request
851        let options: HashMap<_, _> = [
852            ("ttl".to_string(), "60m".to_string()),
853            ("skip_wal".to_string(), "true".to_string()),
854        ]
855        .into_iter()
856        .collect();
857        let request = RegionCreateRequest {
858            engine: METRIC_ENGINE_NAME.to_string(),
859            column_metadatas: vec![
860                ColumnMetadata {
861                    column_id: 0,
862                    semantic_type: SemanticType::Timestamp,
863                    column_schema: ColumnSchema::new(
864                        "timestamp",
865                        ConcreteDataType::timestamp_millisecond_datatype(),
866                        false,
867                    ),
868                },
869                ColumnMetadata {
870                    column_id: 1,
871                    semantic_type: SemanticType::Tag,
872                    column_schema: ColumnSchema::new(
873                        "tag",
874                        ConcreteDataType::string_datatype(),
875                        false,
876                    ),
877                },
878            ],
879            primary_key: vec![0],
880            options,
881            table_dir: "/test_dir".to_string(),
882            path_type: PathType::Bare,
883            partition_expr_json: Some("".to_string()),
884            requirements: RegionRequirements::object_storage(),
885        };
886
887        // set up
888        let env = TestEnv::new().await;
889        let engine = MetricEngine::try_new(env.mito(), EngineConfig::default()).unwrap();
890        let engine_inner = engine.inner;
891
892        // check create data region request
893        let data_region_request = engine_inner.create_request_for_data_region(&request);
894        assert_eq!(data_region_request.table_dir, "/test_dir".to_string());
895        assert_eq!(data_region_request.path_type, PathType::Data);
896        assert_eq!(data_region_request.column_metadatas.len(), 4);
897        assert_eq!(
898            data_region_request.primary_key,
899            vec![ReservedColumnId::table_id(), ReservedColumnId::tsid(), 1]
900        );
901        assert!(data_region_request.options.contains_key("ttl"));
902        assert_eq!(
903            data_region_request.requirements,
904            RegionRequirements::object_storage()
905        );
906
907        // check create metadata region request
908        let metadata_region_request = engine_inner.create_request_for_metadata_region(&request);
909        assert_eq!(metadata_region_request.table_dir, "/test_dir".to_string());
910        assert_eq!(metadata_region_request.path_type, PathType::Metadata);
911        assert_eq!(
912            metadata_region_request.options.get("ttl").unwrap(),
913            "forever"
914        );
915        assert!(!metadata_region_request.options.contains_key("skip_wal"));
916        assert_eq!(
917            metadata_region_request.requirements,
918            RegionRequirements::object_storage()
919        );
920    }
921
922    #[tokio::test]
923    async fn test_create_request_for_physical_regions_with_internal_columns() {
924        let options: HashMap<_, _> = [
925            ("ttl".to_string(), "60m".to_string()),
926            ("skip_wal".to_string(), "true".to_string()),
927        ]
928        .into_iter()
929        .collect();
930        let request = RegionCreateRequest {
931            engine: METRIC_ENGINE_NAME.to_string(),
932            column_metadatas: vec![
933                ColumnMetadata {
934                    column_id: 0,
935                    semantic_type: SemanticType::Timestamp,
936                    column_schema: ColumnSchema::new(
937                        "timestamp",
938                        ConcreteDataType::timestamp_millisecond_datatype(),
939                        false,
940                    ),
941                },
942                ColumnMetadata {
943                    column_id: 1,
944                    semantic_type: SemanticType::Tag,
945                    column_schema: ColumnSchema::new(
946                        "tag",
947                        ConcreteDataType::string_datatype(),
948                        false,
949                    ),
950                },
951                ColumnMetadata {
952                    column_id: 2,
953                    semantic_type: SemanticType::Field,
954                    column_schema: ColumnSchema::new(
955                        "value",
956                        ConcreteDataType::float64_datatype(),
957                        false,
958                    ),
959                },
960                table_id_col(),
961                tsid_col(),
962            ],
963            primary_key: vec![0],
964            options,
965            table_dir: "/test_dir".to_string(),
966            path_type: PathType::Bare,
967            partition_expr_json: Some("".to_string()),
968            requirements: Default::default(),
969        };
970
971        let env = TestEnv::new().await;
972        let engine = MetricEngine::try_new(env.mito(), EngineConfig::default()).unwrap();
973        let engine_inner = engine.inner;
974
975        let data_region_request = engine_inner.create_request_for_data_region(&request);
976        assert_eq!(data_region_request.column_metadatas.len(), 5);
977        assert_eq!(
978            data_region_request.primary_key,
979            vec![ReservedColumnId::table_id(), ReservedColumnId::tsid(), 1]
980        );
981
982        let table_id_count = data_region_request
983            .column_metadatas
984            .iter()
985            .filter(|metadata| metadata.column_schema.name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME)
986            .count();
987        let tsid_count = data_region_request
988            .column_metadatas
989            .iter()
990            .filter(|metadata| metadata.column_schema.name == DATA_SCHEMA_TSID_COLUMN_NAME)
991            .count();
992        assert_eq!(table_id_count, 1);
993        assert_eq!(tsid_count, 1);
994
995        let tag_metadata = data_region_request
996            .column_metadatas
997            .iter()
998            .find(|metadata| metadata.column_schema.name == "tag")
999            .unwrap();
1000        assert!(tag_metadata.column_schema.is_nullable());
1001
1002        let table_id_metadata = data_region_request
1003            .column_metadatas
1004            .iter()
1005            .find(|metadata| metadata.column_schema.name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME)
1006            .unwrap();
1007        assert!(is_metric_name_col(table_id_metadata));
1008
1009        let tsid_metadata = data_region_request
1010            .column_metadatas
1011            .iter()
1012            .find(|metadata| metadata.column_schema.name == DATA_SCHEMA_TSID_COLUMN_NAME)
1013            .unwrap();
1014        assert!(is_tsid_col(tsid_metadata));
1015    }
1016
1017    #[tokio::test]
1018    async fn test_create_logical_regions() {
1019        let env = TestEnv::new().await;
1020        let engine = env.metric();
1021        let physical_region_id1 = RegionId::new(1024, 0);
1022        let physical_region_id2 = RegionId::new(1024, 1);
1023        let logical_region_id1 = RegionId::new(1025, 0);
1024        let logical_region_id2 = RegionId::new(1025, 1);
1025        env.create_physical_region(physical_region_id1, "/test_dir1", vec![])
1026            .await;
1027        env.create_physical_region(physical_region_id2, "/test_dir2", vec![])
1028            .await;
1029
1030        let region_create_request1 =
1031            create_logical_region_request(&["job"], physical_region_id1, "logical1");
1032        let region_create_request2 =
1033            create_logical_region_request(&["job"], physical_region_id2, "logical2");
1034
1035        let response = engine
1036            .handle_batch_ddl_requests(BatchRegionDdlRequest::Create(vec![
1037                (logical_region_id1, region_create_request1),
1038                (logical_region_id2, region_create_request2),
1039            ]))
1040            .await
1041            .unwrap();
1042
1043        let manifest_infos = parse_manifest_infos_from_extensions(&response.extensions).unwrap();
1044        assert_eq!(manifest_infos.len(), 2);
1045        let region_ids = manifest_infos.into_iter().map(|i| i.0).collect::<Vec<_>>();
1046        assert!(region_ids.contains(&physical_region_id1));
1047        assert!(region_ids.contains(&physical_region_id2));
1048
1049        let column_metadatas =
1050            parse_column_metadatas(&response.extensions, ALTER_PHYSICAL_EXTENSION_KEY).unwrap();
1051        assert_column_name_and_id(
1052            &column_metadatas,
1053            &[
1054                (greptime_timestamp(), 0),
1055                (greptime_value(), 1),
1056                ("__table_id", ReservedColumnId::table_id()),
1057                ("__tsid", ReservedColumnId::tsid()),
1058                ("job", 2),
1059            ],
1060        );
1061    }
1062}