Skip to main content

common_grpc_expr/
util.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashSet;
16
17use api::v1::column_data_type_extension::TypeExt;
18use api::v1::column_def::contains_fulltext;
19use api::v1::{
20    AddColumn, AddColumns, Column, ColumnDataType, ColumnDataTypeExtension, ColumnDef,
21    ColumnOptions, ColumnSchema, CreateTableExpr, JsonTypeExtension, SemanticType,
22};
23use datatypes::schema::Schema;
24use snafu::{OptionExt, ResultExt, ensure};
25use table::metadata::TableId;
26use table::table_reference::TableReference;
27
28use crate::error::{
29    self, DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu,
30    InvalidFulltextIndexColumnTypeSnafu, MissingTimestampColumnSnafu, Result,
31    UnknownColumnDataTypeSnafu,
32};
33pub struct ColumnExpr<'a> {
34    pub column_name: &'a str,
35    pub datatype: i32,
36    pub semantic_type: i32,
37    pub datatype_extension: &'a Option<ColumnDataTypeExtension>,
38    pub options: &'a Option<ColumnOptions>,
39}
40
41impl<'a> ColumnExpr<'a> {
42    #[inline]
43    pub fn from_columns(columns: &'a [Column]) -> Vec<Self> {
44        columns.iter().map(Self::from).collect()
45    }
46
47    #[inline]
48    pub fn from_column_schemas(schemas: &'a [ColumnSchema]) -> Vec<Self> {
49        schemas.iter().map(Self::from).collect()
50    }
51}
52
53impl<'a> From<&'a Column> for ColumnExpr<'a> {
54    fn from(column: &'a Column) -> Self {
55        Self {
56            column_name: &column.column_name,
57            datatype: column.datatype,
58            semantic_type: column.semantic_type,
59            datatype_extension: &column.datatype_extension,
60            options: &column.options,
61        }
62    }
63}
64
65impl<'a> From<&'a ColumnSchema> for ColumnExpr<'a> {
66    fn from(schema: &'a ColumnSchema) -> Self {
67        Self {
68            column_name: &schema.column_name,
69            datatype: schema.datatype,
70            semantic_type: schema.semantic_type,
71            datatype_extension: &schema.datatype_extension,
72            options: &schema.options,
73        }
74    }
75}
76
77fn infer_column_datatype(
78    datatype: i32,
79    datatype_extension: &Option<ColumnDataTypeExtension>,
80) -> Result<ColumnDataType> {
81    let column_type =
82        ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
83
84    if matches!(&column_type, ColumnDataType::Binary)
85        && let Some(ext) = datatype_extension
86    {
87        let type_ext = ext
88            .type_ext
89            .as_ref()
90            .context(error::MissingFieldSnafu { field: "type_ext" })?;
91        if *type_ext == TypeExt::JsonType(JsonTypeExtension::JsonBinary.into()) {
92            return Ok(ColumnDataType::Json);
93        }
94    }
95
96    Ok(column_type)
97}
98
99pub fn build_create_table_expr(
100    table_id: Option<TableId>,
101    table_name: &TableReference<'_>,
102    column_exprs: Vec<ColumnExpr>,
103    engine: &str,
104    desc: &str,
105) -> Result<CreateTableExpr> {
106    // Check for duplicate names. If found, raise an error.
107    //
108    // The introduction of hashset incurs additional memory overhead
109    // but achieves a time complexity of O(1).
110    //
111    // The separate iteration over `column_exprs` is because the CPU prefers
112    // smaller loops, and avoid cloning String.
113    let mut distinct_names = HashSet::with_capacity(column_exprs.len());
114    for ColumnExpr { column_name, .. } in &column_exprs {
115        ensure!(
116            distinct_names.insert(*column_name),
117            DuplicatedColumnNameSnafu { name: *column_name }
118        );
119    }
120
121    let mut column_defs = Vec::with_capacity(column_exprs.len());
122    let mut primary_keys = Vec::with_capacity(column_exprs.len());
123    let mut time_index = None;
124
125    for expr in column_exprs {
126        let ColumnExpr {
127            column_name,
128            datatype,
129            semantic_type,
130            datatype_extension,
131            options,
132        } = expr;
133
134        let mut is_nullable = true;
135        match semantic_type {
136            v if v == SemanticType::Tag as i32 => primary_keys.push(column_name.to_owned()),
137            v if v == SemanticType::Timestamp as i32 => {
138                ensure!(
139                    time_index.is_none(),
140                    DuplicatedTimestampColumnSnafu {
141                        exists: time_index.as_ref().unwrap(),
142                        duplicated: column_name,
143                    }
144                );
145                time_index = Some(column_name.to_owned());
146                // Timestamp column must not be null.
147                is_nullable = false;
148            }
149            _ => {}
150        }
151
152        let column_type = infer_column_datatype(datatype, datatype_extension)?;
153
154        ensure!(
155            !contains_fulltext(options) || column_type == ColumnDataType::String,
156            InvalidFulltextIndexColumnTypeSnafu {
157                column_name,
158                column_type,
159            }
160        );
161
162        column_defs.push(ColumnDef {
163            name: column_name.to_owned(),
164            data_type: datatype,
165            is_nullable,
166            default_constraint: vec![],
167            semantic_type,
168            comment: String::new(),
169            datatype_extension: datatype_extension.clone(),
170            options: options.clone(),
171        });
172    }
173
174    let time_index = time_index.context(MissingTimestampColumnSnafu {
175        msg: format!("table is {}", table_name.table),
176    })?;
177
178    Ok(CreateTableExpr {
179        catalog_name: table_name.catalog.to_string(),
180        schema_name: table_name.schema.to_string(),
181        table_name: table_name.table.to_string(),
182        desc: desc.to_string(),
183        column_defs,
184        time_index,
185        primary_keys,
186        create_if_not_exists: true,
187        table_options: Default::default(),
188        table_id: table_id.map(|id| api::v1::TableId { id }),
189        engine: engine.to_string(),
190    })
191}
192
193/// Find columns that are not present in the schema and return them as `AddColumns`
194/// for adding columns automatically.
195/// It always sets `add_if_not_exists` to `true` for now.
196pub fn extract_new_columns(
197    schema: &Schema,
198    column_exprs: Vec<ColumnExpr>,
199) -> Result<Option<AddColumns>> {
200    let columns_to_add = column_exprs
201        .into_iter()
202        .filter(|expr| schema.column_schema_by_name(expr.column_name).is_none())
203        .map(|expr| {
204            let column_def = Some(ColumnDef {
205                name: expr.column_name.to_string(),
206                data_type: expr.datatype,
207                is_nullable: true,
208                default_constraint: vec![],
209                semantic_type: expr.semantic_type,
210                comment: String::new(),
211                datatype_extension: expr.datatype_extension.clone(),
212                options: expr.options.clone(),
213            });
214            AddColumn {
215                column_def,
216                location: None,
217                add_if_not_exists: true,
218            }
219        })
220        .collect::<Vec<_>>();
221
222    if columns_to_add.is_empty() {
223        Ok(None)
224    } else {
225        let mut distinct_names = HashSet::with_capacity(columns_to_add.len());
226        for add_column in &columns_to_add {
227            let name = add_column.column_def.as_ref().unwrap().name.as_str();
228            ensure!(
229                distinct_names.insert(name),
230                DuplicatedColumnNameSnafu { name }
231            );
232        }
233
234        Ok(Some(AddColumns {
235            add_columns: columns_to_add,
236        }))
237    }
238}
239#[cfg(test)]
240mod tests {
241    use std::sync::Arc;
242    use std::{assert_eq, vec};
243
244    use api::helper::ColumnDataTypeWrapper;
245    use api::v1::column::Values;
246    use api::v1::column_data_type_extension::TypeExt;
247    use api::v1::column_def::{options_from_fulltext, options_from_skipping};
248    use api::v1::{
249        Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
250        IntervalMonthDayNano, SemanticType,
251    };
252    use common_catalog::consts::MITO_ENGINE;
253    use common_time::interval::IntervalUnit;
254    use common_time::timestamp::TimeUnit;
255    use datatypes::data_type::ConcreteDataType;
256    use datatypes::schema::{ColumnSchema, FulltextOptions, SchemaBuilder, SkippingIndexOptions};
257    use snafu::ResultExt;
258
259    use super::*;
260    use crate::error;
261    use crate::error::ColumnDataTypeSnafu;
262
263    #[inline]
264    fn build_column_schema(
265        column_name: &str,
266        datatype: i32,
267        nullable: bool,
268    ) -> error::Result<ColumnSchema> {
269        let datatype_wrapper =
270            ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
271
272        Ok(ColumnSchema::new(
273            column_name,
274            datatype_wrapper.into(),
275            nullable,
276        ))
277    }
278
279    fn build_create_expr_from_insertion(
280        catalog_name: &str,
281        schema_name: &str,
282        table_id: Option<TableId>,
283        table_name: &str,
284        columns: &[Column],
285        engine: &str,
286    ) -> Result<CreateTableExpr> {
287        let table_name = TableReference::full(catalog_name, schema_name, table_name);
288        let column_exprs = ColumnExpr::from_columns(columns);
289        build_create_table_expr(
290            table_id,
291            &table_name,
292            column_exprs,
293            engine,
294            "Created on insertion",
295        )
296    }
297
298    fn build_proto_column_schema(
299        column_name: &str,
300        datatype: ColumnDataType,
301        semantic_type: SemanticType,
302        options: Option<ColumnOptions>,
303    ) -> api::v1::ColumnSchema {
304        api::v1::ColumnSchema {
305            column_name: column_name.to_string(),
306            datatype: datatype as i32,
307            semantic_type: semantic_type as i32,
308            options,
309            ..Default::default()
310        }
311    }
312
313    #[test]
314    fn test_build_create_table_request() {
315        let table_id = Some(10);
316        let table_name = "test_metric";
317
318        assert!(
319            build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
320                .is_err()
321        );
322
323        let insert_batch = mock_insert_batch();
324
325        let create_expr = build_create_expr_from_insertion(
326            "",
327            "",
328            table_id,
329            table_name,
330            &insert_batch.0,
331            MITO_ENGINE,
332        )
333        .unwrap();
334
335        assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
336        assert_eq!(table_name, create_expr.table_name);
337        assert_eq!("Created on insertion".to_string(), create_expr.desc);
338        assert_eq!(
339            vec![create_expr.column_defs[0].name.clone()],
340            create_expr.primary_keys
341        );
342
343        let column_defs = create_expr.column_defs;
344        assert_eq!(column_defs[5].name, create_expr.time_index);
345        assert_eq!(7, column_defs.len());
346
347        assert_eq!(
348            ConcreteDataType::string_datatype(),
349            ConcreteDataType::from(
350                ColumnDataTypeWrapper::try_new(
351                    column_defs
352                        .iter()
353                        .find(|c| c.name == "host")
354                        .unwrap()
355                        .data_type,
356                    None
357                )
358                .unwrap()
359            )
360        );
361
362        assert_eq!(
363            ConcreteDataType::float64_datatype(),
364            ConcreteDataType::from(
365                ColumnDataTypeWrapper::try_new(
366                    column_defs
367                        .iter()
368                        .find(|c| c.name == "cpu")
369                        .unwrap()
370                        .data_type,
371                    None
372                )
373                .unwrap()
374            )
375        );
376
377        assert_eq!(
378            ConcreteDataType::float64_datatype(),
379            ConcreteDataType::from(
380                ColumnDataTypeWrapper::try_new(
381                    column_defs
382                        .iter()
383                        .find(|c| c.name == "memory")
384                        .unwrap()
385                        .data_type,
386                    None
387                )
388                .unwrap()
389            )
390        );
391
392        assert_eq!(
393            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
394            ConcreteDataType::from(
395                ColumnDataTypeWrapper::try_new(
396                    column_defs
397                        .iter()
398                        .find(|c| c.name == "time")
399                        .unwrap()
400                        .data_type,
401                    None
402                )
403                .unwrap()
404            )
405        );
406
407        assert_eq!(
408            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
409            ConcreteDataType::from(
410                ColumnDataTypeWrapper::try_new(
411                    column_defs
412                        .iter()
413                        .find(|c| c.name == "interval")
414                        .unwrap()
415                        .data_type,
416                    None
417                )
418                .unwrap()
419            )
420        );
421
422        assert_eq!(
423            ConcreteDataType::timestamp_millisecond_datatype(),
424            ConcreteDataType::from(
425                ColumnDataTypeWrapper::try_new(
426                    column_defs
427                        .iter()
428                        .find(|c| c.name == "ts")
429                        .unwrap()
430                        .data_type,
431                    None
432                )
433                .unwrap()
434            )
435        );
436
437        let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
438        assert_eq!(
439            ConcreteDataType::decimal128_datatype(38, 10),
440            ConcreteDataType::from(
441                ColumnDataTypeWrapper::try_new(
442                    decimal_column.data_type,
443                    decimal_column.datatype_extension.clone(),
444                )
445                .unwrap()
446            )
447        );
448    }
449
450    #[test]
451    fn test_find_new_columns() {
452        let mut columns = Vec::with_capacity(1);
453        let cpu_column = build_column_schema("cpu", 10, true).unwrap();
454        let ts_column = build_column_schema("ts", 15, false)
455            .unwrap()
456            .with_time_index(true);
457        columns.push(cpu_column);
458        columns.push(ts_column);
459
460        let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
461
462        assert!(
463            extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
464                .unwrap()
465                .is_none()
466        );
467
468        let insert_batch = mock_insert_batch();
469
470        let add_columns = extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
471            .unwrap()
472            .unwrap();
473
474        assert_eq!(5, add_columns.add_columns.len());
475        let host_column = &add_columns.add_columns[0];
476        assert_eq!(
477            ConcreteDataType::string_datatype(),
478            ConcreteDataType::from(
479                ColumnDataTypeWrapper::try_new(
480                    host_column.column_def.as_ref().unwrap().data_type,
481                    None
482                )
483                .unwrap()
484            )
485        );
486        assert!(host_column.add_if_not_exists);
487
488        let memory_column = &add_columns.add_columns[1];
489        assert_eq!(
490            ConcreteDataType::float64_datatype(),
491            ConcreteDataType::from(
492                ColumnDataTypeWrapper::try_new(
493                    memory_column.column_def.as_ref().unwrap().data_type,
494                    None
495                )
496                .unwrap()
497            )
498        );
499        assert!(host_column.add_if_not_exists);
500
501        let time_column = &add_columns.add_columns[2];
502        assert_eq!(
503            ConcreteDataType::time_datatype(TimeUnit::Millisecond),
504            ConcreteDataType::from(
505                ColumnDataTypeWrapper::try_new(
506                    time_column.column_def.as_ref().unwrap().data_type,
507                    None
508                )
509                .unwrap()
510            )
511        );
512        assert!(host_column.add_if_not_exists);
513
514        let interval_column = &add_columns.add_columns[3];
515        assert_eq!(
516            ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
517            ConcreteDataType::from(
518                ColumnDataTypeWrapper::try_new(
519                    interval_column.column_def.as_ref().unwrap().data_type,
520                    None
521                )
522                .unwrap()
523            )
524        );
525        assert!(host_column.add_if_not_exists);
526
527        let decimal_column = &add_columns.add_columns[4];
528        assert_eq!(
529            ConcreteDataType::decimal128_datatype(38, 10),
530            ConcreteDataType::from(
531                ColumnDataTypeWrapper::try_new(
532                    decimal_column.column_def.as_ref().unwrap().data_type,
533                    decimal_column
534                        .column_def
535                        .as_ref()
536                        .unwrap()
537                        .datatype_extension
538                        .clone()
539                )
540                .unwrap()
541            )
542        );
543        assert!(host_column.add_if_not_exists);
544    }
545
546    #[test]
547    fn test_build_create_table_expr_allows_skipping_index_on_int_column() {
548        let table_name = TableReference::full("", "", "test_metric");
549        let column_schemas = vec![
550            build_proto_column_schema(
551                "value",
552                ColumnDataType::Int64,
553                SemanticType::Field,
554                options_from_skipping(&SkippingIndexOptions::default()).unwrap(),
555            ),
556            build_proto_column_schema(
557                "ts",
558                ColumnDataType::TimestampMillisecond,
559                SemanticType::Timestamp,
560                None,
561            ),
562        ];
563
564        let result = build_create_table_expr(
565            None,
566            &table_name,
567            ColumnExpr::from_column_schemas(&column_schemas),
568            MITO_ENGINE,
569            "Created on insertion",
570        );
571
572        assert!(result.is_ok());
573    }
574
575    #[test]
576    fn test_build_create_table_expr_rejects_fulltext_index_on_non_string_column() {
577        let table_name = TableReference::full("", "", "test_metric");
578        let column_schemas = vec![
579            build_proto_column_schema(
580                "value",
581                ColumnDataType::Int64,
582                SemanticType::Field,
583                options_from_fulltext(&FulltextOptions {
584                    enable: true,
585                    ..Default::default()
586                })
587                .unwrap(),
588            ),
589            build_proto_column_schema(
590                "ts",
591                ColumnDataType::TimestampMillisecond,
592                SemanticType::Timestamp,
593                None,
594            ),
595        ];
596
597        let result = build_create_table_expr(
598            None,
599            &table_name,
600            ColumnExpr::from_column_schemas(&column_schemas),
601            MITO_ENGINE,
602            "Created on insertion",
603        );
604
605        assert!(result.is_err());
606    }
607
608    fn mock_insert_batch() -> (Vec<Column>, u32) {
609        let row_count = 2;
610
611        let host_vals = Values {
612            string_values: vec!["host1".to_string(), "host2".to_string()],
613            ..Default::default()
614        };
615        let host_column = Column {
616            column_name: "host".to_string(),
617            semantic_type: SemanticType::Tag as i32,
618            values: Some(host_vals),
619            null_mask: vec![0],
620            datatype: ColumnDataType::String as i32,
621            ..Default::default()
622        };
623
624        let cpu_vals = Values {
625            f64_values: vec![0.31],
626            ..Default::default()
627        };
628        let cpu_column = Column {
629            column_name: "cpu".to_string(),
630            semantic_type: SemanticType::Field as i32,
631            values: Some(cpu_vals),
632            null_mask: vec![2],
633            datatype: ColumnDataType::Float64 as i32,
634            ..Default::default()
635        };
636
637        let mem_vals = Values {
638            f64_values: vec![0.1],
639            ..Default::default()
640        };
641        let mem_column = Column {
642            column_name: "memory".to_string(),
643            semantic_type: SemanticType::Field as i32,
644            values: Some(mem_vals),
645            null_mask: vec![1],
646            datatype: ColumnDataType::Float64 as i32,
647            ..Default::default()
648        };
649
650        let time_vals = Values {
651            time_millisecond_values: vec![100, 101],
652            ..Default::default()
653        };
654        let time_column = Column {
655            column_name: "time".to_string(),
656            semantic_type: SemanticType::Field as i32,
657            values: Some(time_vals),
658            null_mask: vec![0],
659            datatype: ColumnDataType::TimeMillisecond as i32,
660            ..Default::default()
661        };
662
663        let interval1 = IntervalMonthDayNano {
664            months: 1,
665            days: 2,
666            nanoseconds: 3,
667        };
668        let interval2 = IntervalMonthDayNano {
669            months: 4,
670            days: 5,
671            nanoseconds: 6,
672        };
673        let interval_vals = Values {
674            interval_month_day_nano_values: vec![interval1, interval2],
675            ..Default::default()
676        };
677        let interval_column = Column {
678            column_name: "interval".to_string(),
679            semantic_type: SemanticType::Field as i32,
680            values: Some(interval_vals),
681            null_mask: vec![0],
682            datatype: ColumnDataType::IntervalMonthDayNano as i32,
683            ..Default::default()
684        };
685
686        let ts_vals = Values {
687            timestamp_millisecond_values: vec![100, 101],
688            ..Default::default()
689        };
690        let ts_column = Column {
691            column_name: "ts".to_string(),
692            semantic_type: SemanticType::Timestamp as i32,
693            values: Some(ts_vals),
694            null_mask: vec![0],
695            datatype: ColumnDataType::TimestampMillisecond as i32,
696            ..Default::default()
697        };
698        let decimal_vals = Values {
699            decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
700            ..Default::default()
701        };
702        let decimal_column = Column {
703            column_name: "decimals".to_string(),
704            semantic_type: SemanticType::Field as i32,
705            values: Some(decimal_vals),
706            null_mask: vec![0],
707            datatype: ColumnDataType::Decimal128 as i32,
708            datatype_extension: Some(ColumnDataTypeExtension {
709                type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
710                    precision: 38,
711                    scale: 10,
712                })),
713            }),
714            options: None,
715        };
716
717        (
718            vec![
719                host_column,
720                cpu_column,
721                mem_column,
722                time_column,
723                interval_column,
724                ts_column,
725                decimal_column,
726            ],
727            row_count,
728        )
729    }
730}