1use std::collections::HashSet;
16
17use api::v1::column_data_type_extension::TypeExt;
18use api::v1::column_def::contains_fulltext;
19use api::v1::{
20 AddColumn, AddColumns, Column, ColumnDataType, ColumnDataTypeExtension, ColumnDef,
21 ColumnOptions, ColumnSchema, CreateTableExpr, JsonTypeExtension, SemanticType,
22};
23use datatypes::schema::Schema;
24use snafu::{OptionExt, ResultExt, ensure};
25use table::metadata::TableId;
26use table::table_reference::TableReference;
27
28use crate::error::{
29 self, DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu,
30 InvalidFulltextIndexColumnTypeSnafu, MissingTimestampColumnSnafu, Result,
31 UnknownColumnDataTypeSnafu,
32};
33pub struct ColumnExpr<'a> {
34 pub column_name: &'a str,
35 pub datatype: i32,
36 pub semantic_type: i32,
37 pub datatype_extension: &'a Option<ColumnDataTypeExtension>,
38 pub options: &'a Option<ColumnOptions>,
39}
40
41impl<'a> ColumnExpr<'a> {
42 #[inline]
43 pub fn from_columns(columns: &'a [Column]) -> Vec<Self> {
44 columns.iter().map(Self::from).collect()
45 }
46
47 #[inline]
48 pub fn from_column_schemas(schemas: &'a [ColumnSchema]) -> Vec<Self> {
49 schemas.iter().map(Self::from).collect()
50 }
51}
52
53impl<'a> From<&'a Column> for ColumnExpr<'a> {
54 fn from(column: &'a Column) -> Self {
55 Self {
56 column_name: &column.column_name,
57 datatype: column.datatype,
58 semantic_type: column.semantic_type,
59 datatype_extension: &column.datatype_extension,
60 options: &column.options,
61 }
62 }
63}
64
65impl<'a> From<&'a ColumnSchema> for ColumnExpr<'a> {
66 fn from(schema: &'a ColumnSchema) -> Self {
67 Self {
68 column_name: &schema.column_name,
69 datatype: schema.datatype,
70 semantic_type: schema.semantic_type,
71 datatype_extension: &schema.datatype_extension,
72 options: &schema.options,
73 }
74 }
75}
76
77fn infer_column_datatype(
78 datatype: i32,
79 datatype_extension: &Option<ColumnDataTypeExtension>,
80) -> Result<ColumnDataType> {
81 let column_type =
82 ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
83
84 if matches!(&column_type, ColumnDataType::Binary)
85 && let Some(ext) = datatype_extension
86 {
87 let type_ext = ext
88 .type_ext
89 .as_ref()
90 .context(error::MissingFieldSnafu { field: "type_ext" })?;
91 if *type_ext == TypeExt::JsonType(JsonTypeExtension::JsonBinary.into()) {
92 return Ok(ColumnDataType::Json);
93 }
94 }
95
96 Ok(column_type)
97}
98
99pub fn build_create_table_expr(
100 table_id: Option<TableId>,
101 table_name: &TableReference<'_>,
102 column_exprs: Vec<ColumnExpr>,
103 engine: &str,
104 desc: &str,
105) -> Result<CreateTableExpr> {
106 let mut distinct_names = HashSet::with_capacity(column_exprs.len());
114 for ColumnExpr { column_name, .. } in &column_exprs {
115 ensure!(
116 distinct_names.insert(*column_name),
117 DuplicatedColumnNameSnafu { name: *column_name }
118 );
119 }
120
121 let mut column_defs = Vec::with_capacity(column_exprs.len());
122 let mut primary_keys = Vec::with_capacity(column_exprs.len());
123 let mut time_index = None;
124
125 for expr in column_exprs {
126 let ColumnExpr {
127 column_name,
128 datatype,
129 semantic_type,
130 datatype_extension,
131 options,
132 } = expr;
133
134 let mut is_nullable = true;
135 match semantic_type {
136 v if v == SemanticType::Tag as i32 => primary_keys.push(column_name.to_owned()),
137 v if v == SemanticType::Timestamp as i32 => {
138 ensure!(
139 time_index.is_none(),
140 DuplicatedTimestampColumnSnafu {
141 exists: time_index.as_ref().unwrap(),
142 duplicated: column_name,
143 }
144 );
145 time_index = Some(column_name.to_owned());
146 is_nullable = false;
148 }
149 _ => {}
150 }
151
152 let column_type = infer_column_datatype(datatype, datatype_extension)?;
153
154 ensure!(
155 !contains_fulltext(options) || column_type == ColumnDataType::String,
156 InvalidFulltextIndexColumnTypeSnafu {
157 column_name,
158 column_type,
159 }
160 );
161
162 column_defs.push(ColumnDef {
163 name: column_name.to_owned(),
164 data_type: datatype,
165 is_nullable,
166 default_constraint: vec![],
167 semantic_type,
168 comment: String::new(),
169 datatype_extension: datatype_extension.clone(),
170 options: options.clone(),
171 });
172 }
173
174 let time_index = time_index.context(MissingTimestampColumnSnafu {
175 msg: format!("table is {}", table_name.table),
176 })?;
177
178 Ok(CreateTableExpr {
179 catalog_name: table_name.catalog.to_string(),
180 schema_name: table_name.schema.to_string(),
181 table_name: table_name.table.to_string(),
182 desc: desc.to_string(),
183 column_defs,
184 time_index,
185 primary_keys,
186 create_if_not_exists: true,
187 table_options: Default::default(),
188 table_id: table_id.map(|id| api::v1::TableId { id }),
189 engine: engine.to_string(),
190 })
191}
192
193pub fn extract_new_columns(
197 schema: &Schema,
198 column_exprs: Vec<ColumnExpr>,
199) -> Result<Option<AddColumns>> {
200 let columns_to_add = column_exprs
201 .into_iter()
202 .filter(|expr| schema.column_schema_by_name(expr.column_name).is_none())
203 .map(|expr| {
204 let column_def = Some(ColumnDef {
205 name: expr.column_name.to_string(),
206 data_type: expr.datatype,
207 is_nullable: true,
208 default_constraint: vec![],
209 semantic_type: expr.semantic_type,
210 comment: String::new(),
211 datatype_extension: expr.datatype_extension.clone(),
212 options: expr.options.clone(),
213 });
214 AddColumn {
215 column_def,
216 location: None,
217 add_if_not_exists: true,
218 }
219 })
220 .collect::<Vec<_>>();
221
222 if columns_to_add.is_empty() {
223 Ok(None)
224 } else {
225 let mut distinct_names = HashSet::with_capacity(columns_to_add.len());
226 for add_column in &columns_to_add {
227 let name = add_column.column_def.as_ref().unwrap().name.as_str();
228 ensure!(
229 distinct_names.insert(name),
230 DuplicatedColumnNameSnafu { name }
231 );
232 }
233
234 Ok(Some(AddColumns {
235 add_columns: columns_to_add,
236 }))
237 }
238}
239#[cfg(test)]
240mod tests {
241 use std::sync::Arc;
242 use std::{assert_eq, vec};
243
244 use api::helper::ColumnDataTypeWrapper;
245 use api::v1::column::Values;
246 use api::v1::column_data_type_extension::TypeExt;
247 use api::v1::column_def::{options_from_fulltext, options_from_skipping};
248 use api::v1::{
249 Column, ColumnDataType, ColumnDataTypeExtension, Decimal128, DecimalTypeExtension,
250 IntervalMonthDayNano, SemanticType,
251 };
252 use common_catalog::consts::MITO_ENGINE;
253 use common_time::interval::IntervalUnit;
254 use common_time::timestamp::TimeUnit;
255 use datatypes::data_type::ConcreteDataType;
256 use datatypes::schema::{ColumnSchema, FulltextOptions, SchemaBuilder, SkippingIndexOptions};
257 use snafu::ResultExt;
258
259 use super::*;
260 use crate::error;
261 use crate::error::ColumnDataTypeSnafu;
262
263 #[inline]
264 fn build_column_schema(
265 column_name: &str,
266 datatype: i32,
267 nullable: bool,
268 ) -> error::Result<ColumnSchema> {
269 let datatype_wrapper =
270 ColumnDataTypeWrapper::try_new(datatype, None).context(ColumnDataTypeSnafu)?;
271
272 Ok(ColumnSchema::new(
273 column_name,
274 datatype_wrapper.into(),
275 nullable,
276 ))
277 }
278
279 fn build_create_expr_from_insertion(
280 catalog_name: &str,
281 schema_name: &str,
282 table_id: Option<TableId>,
283 table_name: &str,
284 columns: &[Column],
285 engine: &str,
286 ) -> Result<CreateTableExpr> {
287 let table_name = TableReference::full(catalog_name, schema_name, table_name);
288 let column_exprs = ColumnExpr::from_columns(columns);
289 build_create_table_expr(
290 table_id,
291 &table_name,
292 column_exprs,
293 engine,
294 "Created on insertion",
295 )
296 }
297
298 fn build_proto_column_schema(
299 column_name: &str,
300 datatype: ColumnDataType,
301 semantic_type: SemanticType,
302 options: Option<ColumnOptions>,
303 ) -> api::v1::ColumnSchema {
304 api::v1::ColumnSchema {
305 column_name: column_name.to_string(),
306 datatype: datatype as i32,
307 semantic_type: semantic_type as i32,
308 options,
309 ..Default::default()
310 }
311 }
312
313 #[test]
314 fn test_build_create_table_request() {
315 let table_id = Some(10);
316 let table_name = "test_metric";
317
318 assert!(
319 build_create_expr_from_insertion("", "", table_id, table_name, &[], MITO_ENGINE)
320 .is_err()
321 );
322
323 let insert_batch = mock_insert_batch();
324
325 let create_expr = build_create_expr_from_insertion(
326 "",
327 "",
328 table_id,
329 table_name,
330 &insert_batch.0,
331 MITO_ENGINE,
332 )
333 .unwrap();
334
335 assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
336 assert_eq!(table_name, create_expr.table_name);
337 assert_eq!("Created on insertion".to_string(), create_expr.desc);
338 assert_eq!(
339 vec![create_expr.column_defs[0].name.clone()],
340 create_expr.primary_keys
341 );
342
343 let column_defs = create_expr.column_defs;
344 assert_eq!(column_defs[5].name, create_expr.time_index);
345 assert_eq!(7, column_defs.len());
346
347 assert_eq!(
348 ConcreteDataType::string_datatype(),
349 ConcreteDataType::from(
350 ColumnDataTypeWrapper::try_new(
351 column_defs
352 .iter()
353 .find(|c| c.name == "host")
354 .unwrap()
355 .data_type,
356 None
357 )
358 .unwrap()
359 )
360 );
361
362 assert_eq!(
363 ConcreteDataType::float64_datatype(),
364 ConcreteDataType::from(
365 ColumnDataTypeWrapper::try_new(
366 column_defs
367 .iter()
368 .find(|c| c.name == "cpu")
369 .unwrap()
370 .data_type,
371 None
372 )
373 .unwrap()
374 )
375 );
376
377 assert_eq!(
378 ConcreteDataType::float64_datatype(),
379 ConcreteDataType::from(
380 ColumnDataTypeWrapper::try_new(
381 column_defs
382 .iter()
383 .find(|c| c.name == "memory")
384 .unwrap()
385 .data_type,
386 None
387 )
388 .unwrap()
389 )
390 );
391
392 assert_eq!(
393 ConcreteDataType::time_datatype(TimeUnit::Millisecond),
394 ConcreteDataType::from(
395 ColumnDataTypeWrapper::try_new(
396 column_defs
397 .iter()
398 .find(|c| c.name == "time")
399 .unwrap()
400 .data_type,
401 None
402 )
403 .unwrap()
404 )
405 );
406
407 assert_eq!(
408 ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
409 ConcreteDataType::from(
410 ColumnDataTypeWrapper::try_new(
411 column_defs
412 .iter()
413 .find(|c| c.name == "interval")
414 .unwrap()
415 .data_type,
416 None
417 )
418 .unwrap()
419 )
420 );
421
422 assert_eq!(
423 ConcreteDataType::timestamp_millisecond_datatype(),
424 ConcreteDataType::from(
425 ColumnDataTypeWrapper::try_new(
426 column_defs
427 .iter()
428 .find(|c| c.name == "ts")
429 .unwrap()
430 .data_type,
431 None
432 )
433 .unwrap()
434 )
435 );
436
437 let decimal_column = column_defs.iter().find(|c| c.name == "decimals").unwrap();
438 assert_eq!(
439 ConcreteDataType::decimal128_datatype(38, 10),
440 ConcreteDataType::from(
441 ColumnDataTypeWrapper::try_new(
442 decimal_column.data_type,
443 decimal_column.datatype_extension.clone(),
444 )
445 .unwrap()
446 )
447 );
448 }
449
450 #[test]
451 fn test_find_new_columns() {
452 let mut columns = Vec::with_capacity(1);
453 let cpu_column = build_column_schema("cpu", 10, true).unwrap();
454 let ts_column = build_column_schema("ts", 15, false)
455 .unwrap()
456 .with_time_index(true);
457 columns.push(cpu_column);
458 columns.push(ts_column);
459
460 let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
461
462 assert!(
463 extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
464 .unwrap()
465 .is_none()
466 );
467
468 let insert_batch = mock_insert_batch();
469
470 let add_columns = extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
471 .unwrap()
472 .unwrap();
473
474 assert_eq!(5, add_columns.add_columns.len());
475 let host_column = &add_columns.add_columns[0];
476 assert_eq!(
477 ConcreteDataType::string_datatype(),
478 ConcreteDataType::from(
479 ColumnDataTypeWrapper::try_new(
480 host_column.column_def.as_ref().unwrap().data_type,
481 None
482 )
483 .unwrap()
484 )
485 );
486 assert!(host_column.add_if_not_exists);
487
488 let memory_column = &add_columns.add_columns[1];
489 assert_eq!(
490 ConcreteDataType::float64_datatype(),
491 ConcreteDataType::from(
492 ColumnDataTypeWrapper::try_new(
493 memory_column.column_def.as_ref().unwrap().data_type,
494 None
495 )
496 .unwrap()
497 )
498 );
499 assert!(host_column.add_if_not_exists);
500
501 let time_column = &add_columns.add_columns[2];
502 assert_eq!(
503 ConcreteDataType::time_datatype(TimeUnit::Millisecond),
504 ConcreteDataType::from(
505 ColumnDataTypeWrapper::try_new(
506 time_column.column_def.as_ref().unwrap().data_type,
507 None
508 )
509 .unwrap()
510 )
511 );
512 assert!(host_column.add_if_not_exists);
513
514 let interval_column = &add_columns.add_columns[3];
515 assert_eq!(
516 ConcreteDataType::interval_datatype(IntervalUnit::MonthDayNano),
517 ConcreteDataType::from(
518 ColumnDataTypeWrapper::try_new(
519 interval_column.column_def.as_ref().unwrap().data_type,
520 None
521 )
522 .unwrap()
523 )
524 );
525 assert!(host_column.add_if_not_exists);
526
527 let decimal_column = &add_columns.add_columns[4];
528 assert_eq!(
529 ConcreteDataType::decimal128_datatype(38, 10),
530 ConcreteDataType::from(
531 ColumnDataTypeWrapper::try_new(
532 decimal_column.column_def.as_ref().unwrap().data_type,
533 decimal_column
534 .column_def
535 .as_ref()
536 .unwrap()
537 .datatype_extension
538 .clone()
539 )
540 .unwrap()
541 )
542 );
543 assert!(host_column.add_if_not_exists);
544 }
545
546 #[test]
547 fn test_build_create_table_expr_allows_skipping_index_on_int_column() {
548 let table_name = TableReference::full("", "", "test_metric");
549 let column_schemas = vec![
550 build_proto_column_schema(
551 "value",
552 ColumnDataType::Int64,
553 SemanticType::Field,
554 options_from_skipping(&SkippingIndexOptions::default()).unwrap(),
555 ),
556 build_proto_column_schema(
557 "ts",
558 ColumnDataType::TimestampMillisecond,
559 SemanticType::Timestamp,
560 None,
561 ),
562 ];
563
564 let result = build_create_table_expr(
565 None,
566 &table_name,
567 ColumnExpr::from_column_schemas(&column_schemas),
568 MITO_ENGINE,
569 "Created on insertion",
570 );
571
572 assert!(result.is_ok());
573 }
574
575 #[test]
576 fn test_build_create_table_expr_rejects_fulltext_index_on_non_string_column() {
577 let table_name = TableReference::full("", "", "test_metric");
578 let column_schemas = vec![
579 build_proto_column_schema(
580 "value",
581 ColumnDataType::Int64,
582 SemanticType::Field,
583 options_from_fulltext(&FulltextOptions {
584 enable: true,
585 ..Default::default()
586 })
587 .unwrap(),
588 ),
589 build_proto_column_schema(
590 "ts",
591 ColumnDataType::TimestampMillisecond,
592 SemanticType::Timestamp,
593 None,
594 ),
595 ];
596
597 let result = build_create_table_expr(
598 None,
599 &table_name,
600 ColumnExpr::from_column_schemas(&column_schemas),
601 MITO_ENGINE,
602 "Created on insertion",
603 );
604
605 assert!(result.is_err());
606 }
607
608 fn mock_insert_batch() -> (Vec<Column>, u32) {
609 let row_count = 2;
610
611 let host_vals = Values {
612 string_values: vec!["host1".to_string(), "host2".to_string()],
613 ..Default::default()
614 };
615 let host_column = Column {
616 column_name: "host".to_string(),
617 semantic_type: SemanticType::Tag as i32,
618 values: Some(host_vals),
619 null_mask: vec![0],
620 datatype: ColumnDataType::String as i32,
621 ..Default::default()
622 };
623
624 let cpu_vals = Values {
625 f64_values: vec![0.31],
626 ..Default::default()
627 };
628 let cpu_column = Column {
629 column_name: "cpu".to_string(),
630 semantic_type: SemanticType::Field as i32,
631 values: Some(cpu_vals),
632 null_mask: vec![2],
633 datatype: ColumnDataType::Float64 as i32,
634 ..Default::default()
635 };
636
637 let mem_vals = Values {
638 f64_values: vec![0.1],
639 ..Default::default()
640 };
641 let mem_column = Column {
642 column_name: "memory".to_string(),
643 semantic_type: SemanticType::Field as i32,
644 values: Some(mem_vals),
645 null_mask: vec![1],
646 datatype: ColumnDataType::Float64 as i32,
647 ..Default::default()
648 };
649
650 let time_vals = Values {
651 time_millisecond_values: vec![100, 101],
652 ..Default::default()
653 };
654 let time_column = Column {
655 column_name: "time".to_string(),
656 semantic_type: SemanticType::Field as i32,
657 values: Some(time_vals),
658 null_mask: vec![0],
659 datatype: ColumnDataType::TimeMillisecond as i32,
660 ..Default::default()
661 };
662
663 let interval1 = IntervalMonthDayNano {
664 months: 1,
665 days: 2,
666 nanoseconds: 3,
667 };
668 let interval2 = IntervalMonthDayNano {
669 months: 4,
670 days: 5,
671 nanoseconds: 6,
672 };
673 let interval_vals = Values {
674 interval_month_day_nano_values: vec![interval1, interval2],
675 ..Default::default()
676 };
677 let interval_column = Column {
678 column_name: "interval".to_string(),
679 semantic_type: SemanticType::Field as i32,
680 values: Some(interval_vals),
681 null_mask: vec![0],
682 datatype: ColumnDataType::IntervalMonthDayNano as i32,
683 ..Default::default()
684 };
685
686 let ts_vals = Values {
687 timestamp_millisecond_values: vec![100, 101],
688 ..Default::default()
689 };
690 let ts_column = Column {
691 column_name: "ts".to_string(),
692 semantic_type: SemanticType::Timestamp as i32,
693 values: Some(ts_vals),
694 null_mask: vec![0],
695 datatype: ColumnDataType::TimestampMillisecond as i32,
696 ..Default::default()
697 };
698 let decimal_vals = Values {
699 decimal128_values: vec![Decimal128 { hi: 0, lo: 123 }, Decimal128 { hi: 0, lo: 456 }],
700 ..Default::default()
701 };
702 let decimal_column = Column {
703 column_name: "decimals".to_string(),
704 semantic_type: SemanticType::Field as i32,
705 values: Some(decimal_vals),
706 null_mask: vec![0],
707 datatype: ColumnDataType::Decimal128 as i32,
708 datatype_extension: Some(ColumnDataTypeExtension {
709 type_ext: Some(TypeExt::DecimalType(DecimalTypeExtension {
710 precision: 38,
711 scale: 10,
712 })),
713 }),
714 options: None,
715 };
716
717 (
718 vec![
719 host_column,
720 cpu_column,
721 mem_column,
722 time_column,
723 interval_column,
724 ts_column,
725 decimal_column,
726 ],
727 row_count,
728 )
729 }
730}