diff --git a/src/metric-engine/src/data_region.rs b/src/metric-engine/src/data_region.rs index 9207d0f107..1dd1b53faa 100644 --- a/src/metric-engine/src/data_region.rs +++ b/src/metric-engine/src/data_region.rs @@ -25,7 +25,7 @@ use store_api::region_request::{ AddColumn, AffectedRows, AlterKind, RegionAlterRequest, RegionPutRequest, RegionRequest, }; use store_api::storage::consts::ReservedColumnId; -use store_api::storage::RegionId; +use store_api::storage::{ConcreteDataType, RegionId}; use crate::error::{ ColumnTypeMismatchSnafu, MitoReadOperationSnafu, MitoWriteOperationSnafu, Result, @@ -128,7 +128,8 @@ impl DataRegion { if c.semantic_type == SemanticType::Tag { if !c.column_schema.data_type.is_string() { return ColumnTypeMismatchSnafu { - column_type: c.column_schema.data_type.clone(), + expect: ConcreteDataType::string_datatype(), + actual: c.column_schema.data_type.clone(), } .fail(); } diff --git a/src/metric-engine/src/engine/create.rs b/src/metric-engine/src/engine/create.rs index a7e3c5c364..c71375299c 100644 --- a/src/metric-engine/src/engine/create.rs +++ b/src/metric-engine/src/engine/create.rs @@ -43,9 +43,11 @@ use crate::engine::options::{ }; use crate::engine::MetricEngineInner; use crate::error::{ - ColumnNotFoundSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu, - InternalColumnOccupiedSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu, - ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu, + AddingFieldColumnSnafu, ColumnNotFoundSnafu, ColumnTypeMismatchSnafu, + ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu, + InvalidMetadataSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu, + MultipleFieldColumnSnafu, NoFieldColumnSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, + Result, SerializeColumnMetadataSnafu, }; use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT}; use crate::utils::{to_data_region_id, to_metadata_region_id}; @@ -191,6 +193,14 @@ impl MetricEngineInner { })?; for col in &request.column_metadatas { if !physical_columns.contains(&col.column_schema.name) { + // Multi-field on physical table is explicit forbidden at present + // TODO(ruihang): support multi-field on both logical and physical column + ensure!( + col.semantic_type != SemanticType::Field, + AddingFieldColumnSnafu { + name: col.column_schema.name.clone() + } + ); new_columns.push(col.clone()); } else { existing_columns.push(col.column_schema.name.clone()); @@ -290,6 +300,8 @@ impl MetricEngineInner { /// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or /// [LOGICAL_TABLE_METADATA_KEY]) fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> { + request.validate().context(InvalidMetadataSnafu)?; + let name_to_index = request .column_metadatas .iter() @@ -323,6 +335,41 @@ impl MetricEngineInner { ConflictRegionOptionSnafu {} ); + // check if only one field column is declared, and all tag columns are string + let mut field_col: Option<&ColumnMetadata> = None; + for col in &request.column_metadatas { + match col.semantic_type { + SemanticType::Tag => ensure!( + col.column_schema.data_type == ConcreteDataType::string_datatype(), + ColumnTypeMismatchSnafu { + expect: ConcreteDataType::string_datatype(), + actual: col.column_schema.data_type.clone(), + } + ), + SemanticType::Field => { + if field_col.is_some() { + MultipleFieldColumnSnafu { + previous: field_col.unwrap().column_schema.name.clone(), + current: col.column_schema.name.clone(), + } + .fail()?; + } + field_col = Some(col) + } + SemanticType::Timestamp => {} + } + } + let field_col = field_col.context(NoFieldColumnSnafu)?; + + // make sure the field column is float64 type + ensure!( + field_col.column_schema.data_type == ConcreteDataType::float64_datatype(), + ColumnTypeMismatchSnafu { + expect: ConcreteDataType::float64_datatype(), + actual: field_col.column_schema.data_type.clone(), + } + ); + Ok(()) } @@ -531,6 +578,15 @@ mod test { false, ), }, + ColumnMetadata { + column_id: 2, + semantic_type: SemanticType::Field, + column_schema: ColumnSchema::new( + "column2".to_string(), + ConcreteDataType::float64_datatype(), + false, + ), + }, ], region_dir: "test_dir".to_string(), engine: METRIC_ENGINE_NAME.to_string(), @@ -539,37 +595,51 @@ mod test { .into_iter() .collect(), }; - let result = MetricEngineInner::verify_region_create_request(&request); - assert!(result.is_ok()); + MetricEngineInner::verify_region_create_request(&request).unwrap(); } #[test] fn test_verify_region_create_request_options() { let mut request = RegionCreateRequest { - column_metadatas: vec![], + column_metadatas: vec![ + ColumnMetadata { + column_id: 0, + semantic_type: SemanticType::Timestamp, + column_schema: ColumnSchema::new( + METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME, + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ), + }, + ColumnMetadata { + column_id: 1, + semantic_type: SemanticType::Field, + column_schema: ColumnSchema::new( + "val".to_string(), + ConcreteDataType::float64_datatype(), + false, + ), + }, + ], region_dir: "test_dir".to_string(), engine: METRIC_ENGINE_NAME.to_string(), primary_key: vec![], options: HashMap::new(), }; - let result = MetricEngineInner::verify_region_create_request(&request); - assert!(result.is_err()); + MetricEngineInner::verify_region_create_request(&request).unwrap_err(); let mut options = HashMap::new(); options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string()); request.options.clone_from(&options); - let result = MetricEngineInner::verify_region_create_request(&request); - assert!(result.is_ok()); + MetricEngineInner::verify_region_create_request(&request).unwrap(); options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string()); request.options.clone_from(&options); - let result = MetricEngineInner::verify_region_create_request(&request); - assert!(result.is_err()); + MetricEngineInner::verify_region_create_request(&request).unwrap_err(); options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap(); request.options = options; - let result = MetricEngineInner::verify_region_create_request(&request); - assert!(result.is_ok()); + MetricEngineInner::verify_region_create_request(&request).unwrap(); } #[tokio::test] diff --git a/src/metric-engine/src/error.rs b/src/metric-engine/src/error.rs index b256894729..81e680dfd0 100644 --- a/src/metric-engine/src/error.rs +++ b/src/metric-engine/src/error.rs @@ -133,9 +133,10 @@ pub enum Error { location: Location, }, - #[snafu(display("Column type mismatch. Expect string, got {:?}", column_type))] + #[snafu(display("Column type mismatch. Expect {:?}, got {:?}", expect, actual))] ColumnTypeMismatch { - column_type: ConcreteDataType, + expect: ConcreteDataType, + actual: ConcreteDataType, location: Location, }, @@ -169,6 +170,19 @@ pub enum Error { request: RegionRequest, location: Location, }, + + #[snafu(display("Multiple field column found: {} and {}", previous, current))] + MultipleFieldColumn { + previous: String, + current: String, + location: Location, + }, + + #[snafu(display("Adding field column {} to physical table", name))] + AddingFieldColumn { name: String, location: Location }, + + #[snafu(display("No field column found"))] + NoFieldColumn { location: Location }, } pub type Result = std::result::Result; @@ -182,7 +196,10 @@ impl ErrorExt for Error { | MissingRegionOption { .. } | ConflictRegionOption { .. } | ColumnTypeMismatch { .. } - | PhysicalRegionBusy { .. } => StatusCode::InvalidArguments, + | PhysicalRegionBusy { .. } + | MultipleFieldColumn { .. } + | NoFieldColumn { .. } + | AddingFieldColumn { .. } => StatusCode::InvalidArguments, ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => { StatusCode::Unsupported diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index 0d013ac7b3..2da459419d 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -210,9 +210,9 @@ pub fn create_logical_region_request( ), }, ]; - for tag in tags { + for (bias, tag) in tags.iter().enumerate() { column_metadatas.push(ColumnMetadata { - column_id: 2, + column_id: 2 + bias as ColumnId, semantic_type: SemanticType::Tag, column_schema: ColumnSchema::new( tag.to_string(), diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 42d8ee3187..cc950f6ba7 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use api::helper::ColumnDataTypeWrapper; use api::v1::alter_expr::Kind; @@ -31,7 +31,7 @@ use query::sql::{ }; use session::context::QueryContextRef; use session::table_name::table_idents_to_full_name; -use snafu::{ensure, ResultExt}; +use snafu::{ensure, OptionExt, ResultExt}; use sql::ast::{ColumnDef, ColumnOption, TableConstraint}; use sql::statements::alter::{AlterTable, AlterTableOperation}; use sql::statements::create::{CreateExternalTable, CreateTable, TIME_INDEX}; @@ -214,9 +214,72 @@ pub fn create_to_expr(create: &CreateTable, query_ctx: QueryContextRef) -> Resul table_id: None, engine: create.engine.to_string(), }; + + validate_create_expr(&expr)?; Ok(expr) } +/// Validate the [`CreateTableExpr`] request. +pub fn validate_create_expr(create: &CreateTableExpr) -> Result<()> { + // construct column list + let mut column_to_indices = HashMap::with_capacity(create.column_defs.len()); + for (idx, column) in create.column_defs.iter().enumerate() { + if let Some(indices) = column_to_indices.get(&column.name) { + return InvalidSqlSnafu { + err_msg: format!( + "column name `{}` is duplicated at index {} and {}", + column.name, indices, idx + ), + } + .fail(); + } + column_to_indices.insert(&column.name, idx); + } + + // verify time_index exists + let _ = column_to_indices + .get(&create.time_index) + .with_context(|| InvalidSqlSnafu { + err_msg: format!( + "column name `{}` is not found in column list", + create.time_index + ), + })?; + + // verify primary_key exists + for pk in &create.primary_keys { + let _ = column_to_indices + .get(&pk) + .with_context(|| InvalidSqlSnafu { + err_msg: format!("column name `{}` is not found in column list", pk), + })?; + } + + // construct primary_key set + let mut pk_set = HashSet::new(); + for pk in &create.primary_keys { + if !pk_set.insert(pk) { + return InvalidSqlSnafu { + err_msg: format!("column name `{}` is duplicated in primary keys", pk), + } + .fail(); + } + } + + // verify time index is not primary key + if pk_set.contains(&create.time_index) { + return InvalidSqlSnafu { + err_msg: format!( + "column name `{}` is both primary key and time index", + create.time_index + ), + } + .fail(); + } + + Ok(()) +} + fn find_primary_keys( columns: &[ColumnDef], constraints: &[TableConstraint], @@ -457,6 +520,33 @@ mod tests { ); } + #[test] + fn test_invalid_create_to_expr() { + let cases = [ + // duplicate column declaration + "CREATE TABLE monitor (host STRING primary key, ts TIMESTAMP TIME INDEX, some_column text, some_column string);", + // duplicate primary key + "CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, some_column STRING, PRIMARY KEY (some_column, host, some_column));", + // time index is primary key + "CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, PRIMARY KEY (host, ts));" + ]; + + for sql in cases { + let stmt = ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap() + .pop() + .unwrap(); + let Statement::CreateTable(create_table) = stmt else { + unreachable!() + }; + create_to_expr(&create_table, QueryContext::arc()).unwrap_err(); + } + } + #[test] fn test_create_to_expr_with_default_timestamp_value() { let sql = "CREATE TABLE monitor (v double,ts TIMESTAMP default '2024-01-30T00:01:01',TIME INDEX (ts)) engine=mito;"; diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index b98c3951d5..687db126f0 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -247,6 +247,53 @@ pub struct RegionCreateRequest { pub region_dir: String, } +impl RegionCreateRequest { + /// Checks whether the request is valid, returns an error if it is invalid. + pub fn validate(&self) -> Result<()> { + // time index must exist + ensure!( + self.column_metadatas + .iter() + .any(|x| x.semantic_type == SemanticType::Timestamp), + InvalidRegionRequestSnafu { + region_id: RegionId::new(0, 0), + err: "missing timestamp column in create region request".to_string(), + } + ); + + // build column id to indices + let mut column_id_to_indices = HashMap::with_capacity(self.column_metadatas.len()); + for (i, c) in self.column_metadatas.iter().enumerate() { + if let Some(previous) = column_id_to_indices.insert(c.column_id, i) { + return InvalidRegionRequestSnafu { + region_id: RegionId::new(0, 0), + err: format!( + "duplicate column id {} (at position {} and {}) in create region request", + c.column_id, previous, i + ), + } + .fail(); + } + } + + // primary key must exist + for column_id in &self.primary_key { + ensure!( + column_id_to_indices.contains_key(column_id), + InvalidRegionRequestSnafu { + region_id: RegionId::new(0, 0), + err: format!( + "missing primary key column {} in create region request", + column_id + ), + } + ); + } + + Ok(()) + } +} + #[derive(Debug, Clone, Default)] pub struct RegionDropRequest {} @@ -965,4 +1012,46 @@ mod tests { metadata.schema_version = 1; request.validate(&metadata).unwrap(); } + + #[test] + fn test_validate_create_region() { + let column_metadatas = vec![ + ColumnMetadata { + column_schema: ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ), + semantic_type: SemanticType::Timestamp, + column_id: 1, + }, + ColumnMetadata { + column_schema: ColumnSchema::new( + "tag_0", + ConcreteDataType::string_datatype(), + true, + ), + semantic_type: SemanticType::Tag, + column_id: 2, + }, + ColumnMetadata { + column_schema: ColumnSchema::new( + "field_0", + ConcreteDataType::string_datatype(), + true, + ), + semantic_type: SemanticType::Field, + column_id: 3, + }, + ]; + let create = RegionCreateRequest { + engine: "mito".to_string(), + column_metadatas, + primary_key: vec![3, 4], + options: HashMap::new(), + region_dir: "path".to_string(), + }; + + assert!(create.validate().is_err()); + } } diff --git a/tests-fuzz/targets/fuzz_create_table.rs b/tests-fuzz/targets/fuzz_create_table.rs index 60bcb1628b..ae43e6d696 100644 --- a/tests-fuzz/targets/fuzz_create_table.rs +++ b/tests-fuzz/targets/fuzz_create_table.rs @@ -61,34 +61,19 @@ impl Arbitrary<'_> for FuzzInput { fn generate_expr(input: FuzzInput) -> Result { let mut rng = ChaChaRng::seed_from_u64(input.seed); - let metric_engine = rng.gen_bool(0.5); let if_not_exists = rng.gen_bool(0.5); - if metric_engine { - let create_table_generator = CreateTableExprGeneratorBuilder::default() - .name_generator(Box::new(MappedGenerator::new( - WordGenerator, - merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), - ))) - .columns(input.columns) - .engine("metric") - .if_not_exists(if_not_exists) - .with_clause([("physical_metric_table".to_string(), "".to_string())]) - .build() - .unwrap(); - create_table_generator.generate(&mut rng) - } else { - let create_table_generator = CreateTableExprGeneratorBuilder::default() - .name_generator(Box::new(MappedGenerator::new( - WordGenerator, - merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), - ))) - .columns(input.columns) - .engine("mito") - .if_not_exists(if_not_exists) - .build() - .unwrap(); - create_table_generator.generate(&mut rng) - } + + let create_table_generator = CreateTableExprGeneratorBuilder::default() + .name_generator(Box::new(MappedGenerator::new( + WordGenerator, + merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), + ))) + .columns(input.columns) + .engine("mito") + .if_not_exists(if_not_exists) + .build() + .unwrap(); + create_table_generator.generate(&mut rng) } async fn execute_create_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { diff --git a/tests/cases/standalone/common/create/create_metric_table.result b/tests/cases/standalone/common/create/create_metric_table.result index f844c5cbd5..37a59598ef 100644 --- a/tests/cases/standalone/common/create/create_metric_table.result +++ b/tests/cases/standalone/common/create/create_metric_table.result @@ -20,6 +20,11 @@ DESC TABLE phy; | val | Float64 | | YES | | FIELD | +--------+----------------------+-----+------+---------+---------------+ +-- create table with duplicate column def +CREATE TABLE t1(ts timestamp time index, val double, host text, host string) engine=metric with ("on_physical_table" = "phy"); + +Error: 1004(InvalidArguments), Invalid SQL, error: column name `host` is duplicated at index 2 and 3 + CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy"); Affected Rows: 0 @@ -28,6 +33,21 @@ CREATE TABLE t2 (ts timestamp time index, job string primary key, val double) en Affected Rows: 0 +-- create logical table with different data type on field column +CREATE TABLE t3 (ts timestamp time index, val string, host string, primary key (host)) engine=metric with ("on_physical_table" = "phy"); + +Error: 1004(InvalidArguments), Column type mismatch. Expect Float64(Float64Type), got String(StringType) + +-- create logical table with different data type on tag column +CREATE TABLE t4 (ts timestamp time index, val double, host double, primary key (host)) engine=metric with ("on_physical_table" = "phy"); + +Error: 1004(InvalidArguments), Column type mismatch. Expect String(StringType), got Float64(Float64Type) + +-- create logical table with different column name on field column +CREATE TABLE t5 (ts timestamp time index, valval double, host string primary key) engine = metric with ("on_physical_table" = "phy"); + +Error: 1004(InvalidArguments), Adding field column valval to physical table + SELECT table_catalog, table_schema, table_name, table_type, engine FROM information_schema.tables WHERE engine = 'metric' order by table_name; +---------------+--------------+------------+------------+--------+ @@ -126,18 +146,10 @@ Affected Rows: 0 -- fuzz test case https://github.com/GreptimeTeam/greptimedb/issues/3612 CREATE TABLE `auT`( incidunt TIMESTAMP(3) TIME INDEX, - `QuaErAT` BOOLEAN, - `REPREHenDERIt` BOOLEAN DEFAULT true, - `Et` INT NULL, - `AutEM` INT, - esse DOUBLE, - `Tempore` BOOLEAN, - `reruM` BOOLEAN, - `eRrOR` BOOLEAN NULL, - `cOMmodi` BOOLEAN, - `PERfERENdIS` DOUBLE, - `eSt` FLOAT DEFAULT 0.70978713, - PRIMARY KEY(`cOMmodi`, `PERfERENdIS`, esse) + `REPREHenDERIt` double DEFAULT 0.70978713, + `cOMmodi` STRING, + `PERfERENdIS` STRING, + PRIMARY KEY(`cOMmodi`, `PERfERENdIS`) ) ENGINE = metric with ("physical_metric_table" = ""); Affected Rows: 0 @@ -148,17 +160,9 @@ DESC TABLE `auT`; | Column | Type | Key | Null | Default | Semantic Type | +---------------+----------------------+-----+------+------------+---------------+ | incidunt | TimestampMillisecond | PRI | NO | | TIMESTAMP | -| QuaErAT | Boolean | | YES | | FIELD | -| REPREHenDERIt | Boolean | | YES | true | FIELD | -| Et | Int32 | | YES | | FIELD | -| AutEM | Int32 | | YES | | FIELD | -| esse | Float64 | PRI | YES | | TAG | -| Tempore | Boolean | | YES | | FIELD | -| reruM | Boolean | | YES | | FIELD | -| eRrOR | Boolean | | YES | | FIELD | -| cOMmodi | Boolean | PRI | YES | | TAG | -| PERfERENdIS | Float64 | PRI | YES | | TAG | -| eSt | Float32 | | YES | 0.70978713 | FIELD | +| REPREHenDERIt | Float64 | | YES | 0.70978713 | FIELD | +| cOMmodi | String | PRI | YES | | TAG | +| PERfERENdIS | String | PRI | YES | | TAG | +---------------+----------------------+-----+------+------------+---------------+ DROP TABLE `auT`; diff --git a/tests/cases/standalone/common/create/create_metric_table.sql b/tests/cases/standalone/common/create/create_metric_table.sql index af1acdac05..a444986e9e 100644 --- a/tests/cases/standalone/common/create/create_metric_table.sql +++ b/tests/cases/standalone/common/create/create_metric_table.sql @@ -4,10 +4,22 @@ SHOW TABLES; DESC TABLE phy; +-- create table with duplicate column def +CREATE TABLE t1(ts timestamp time index, val double, host text, host string) engine=metric with ("on_physical_table" = "phy"); + CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine = metric with ("on_physical_table" = "phy"); CREATE TABLE t2 (ts timestamp time index, job string primary key, val double) engine = metric with ("on_physical_table" = "phy"); +-- create logical table with different data type on field column +CREATE TABLE t3 (ts timestamp time index, val string, host string, primary key (host)) engine=metric with ("on_physical_table" = "phy"); + +-- create logical table with different data type on tag column +CREATE TABLE t4 (ts timestamp time index, val double, host double, primary key (host)) engine=metric with ("on_physical_table" = "phy"); + +-- create logical table with different column name on field column +CREATE TABLE t5 (ts timestamp time index, valval double, host string primary key) engine = metric with ("on_physical_table" = "phy"); + SELECT table_catalog, table_schema, table_name, table_type, engine FROM information_schema.tables WHERE engine = 'metric' order by table_name; DESC TABLE phy; @@ -38,18 +50,10 @@ DROP TABLE phy2; -- fuzz test case https://github.com/GreptimeTeam/greptimedb/issues/3612 CREATE TABLE `auT`( incidunt TIMESTAMP(3) TIME INDEX, - `QuaErAT` BOOLEAN, - `REPREHenDERIt` BOOLEAN DEFAULT true, - `Et` INT NULL, - `AutEM` INT, - esse DOUBLE, - `Tempore` BOOLEAN, - `reruM` BOOLEAN, - `eRrOR` BOOLEAN NULL, - `cOMmodi` BOOLEAN, - `PERfERENdIS` DOUBLE, - `eSt` FLOAT DEFAULT 0.70978713, - PRIMARY KEY(`cOMmodi`, `PERfERENdIS`, esse) + `REPREHenDERIt` double DEFAULT 0.70978713, + `cOMmodi` STRING, + `PERfERENdIS` STRING, + PRIMARY KEY(`cOMmodi`, `PERfERENdIS`) ) ENGINE = metric with ("physical_metric_table" = ""); DESC TABLE `auT`;