feat: add validate method to CreateExpr (#3772)

* feat: add validate method to CreateExpr

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness reproducer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* verify region create request

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix existing test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add tailing empty line

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more validation

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* disable metric table fuzz

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* minor refactor

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2024-04-24 15:29:10 +08:00
committed by GitHub
parent 659d34a170
commit df01ac05a1
9 changed files with 345 additions and 85 deletions

View File

@@ -25,7 +25,7 @@ use store_api::region_request::{
AddColumn, AffectedRows, AlterKind, RegionAlterRequest, RegionPutRequest, RegionRequest,
};
use store_api::storage::consts::ReservedColumnId;
use store_api::storage::RegionId;
use store_api::storage::{ConcreteDataType, RegionId};
use crate::error::{
ColumnTypeMismatchSnafu, MitoReadOperationSnafu, MitoWriteOperationSnafu, Result,
@@ -128,7 +128,8 @@ impl DataRegion {
if c.semantic_type == SemanticType::Tag {
if !c.column_schema.data_type.is_string() {
return ColumnTypeMismatchSnafu {
column_type: c.column_schema.data_type.clone(),
expect: ConcreteDataType::string_datatype(),
actual: c.column_schema.data_type.clone(),
}
.fail();
}

View File

@@ -43,9 +43,11 @@ use crate::engine::options::{
};
use crate::engine::MetricEngineInner;
use crate::error::{
ColumnNotFoundSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu,
InternalColumnOccupiedSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
AddingFieldColumnSnafu, ColumnNotFoundSnafu, ColumnTypeMismatchSnafu,
ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu,
InvalidMetadataSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
MultipleFieldColumnSnafu, NoFieldColumnSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu,
Result, SerializeColumnMetadataSnafu,
};
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT};
use crate::utils::{to_data_region_id, to_metadata_region_id};
@@ -191,6 +193,14 @@ impl MetricEngineInner {
})?;
for col in &request.column_metadatas {
if !physical_columns.contains(&col.column_schema.name) {
// Multi-field on physical table is explicit forbidden at present
// TODO(ruihang): support multi-field on both logical and physical column
ensure!(
col.semantic_type != SemanticType::Field,
AddingFieldColumnSnafu {
name: col.column_schema.name.clone()
}
);
new_columns.push(col.clone());
} else {
existing_columns.push(col.column_schema.name.clone());
@@ -290,6 +300,8 @@ impl MetricEngineInner {
/// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or
/// [LOGICAL_TABLE_METADATA_KEY])
fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> {
request.validate().context(InvalidMetadataSnafu)?;
let name_to_index = request
.column_metadatas
.iter()
@@ -323,6 +335,41 @@ impl MetricEngineInner {
ConflictRegionOptionSnafu {}
);
// check if only one field column is declared, and all tag columns are string
let mut field_col: Option<&ColumnMetadata> = None;
for col in &request.column_metadatas {
match col.semantic_type {
SemanticType::Tag => ensure!(
col.column_schema.data_type == ConcreteDataType::string_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::string_datatype(),
actual: col.column_schema.data_type.clone(),
}
),
SemanticType::Field => {
if field_col.is_some() {
MultipleFieldColumnSnafu {
previous: field_col.unwrap().column_schema.name.clone(),
current: col.column_schema.name.clone(),
}
.fail()?;
}
field_col = Some(col)
}
SemanticType::Timestamp => {}
}
}
let field_col = field_col.context(NoFieldColumnSnafu)?;
// make sure the field column is float64 type
ensure!(
field_col.column_schema.data_type == ConcreteDataType::float64_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::float64_datatype(),
actual: field_col.column_schema.data_type.clone(),
}
);
Ok(())
}
@@ -531,6 +578,15 @@ mod test {
false,
),
},
ColumnMetadata {
column_id: 2,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"column2".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
],
region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(),
@@ -539,37 +595,51 @@ mod test {
.into_iter()
.collect(),
};
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();
}
#[test]
fn test_verify_region_create_request_options() {
let mut request = RegionCreateRequest {
column_metadatas: vec![],
column_metadatas: vec![
ColumnMetadata {
column_id: 0,
semantic_type: SemanticType::Timestamp,
column_schema: ColumnSchema::new(
METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
},
ColumnMetadata {
column_id: 1,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"val".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
],
region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(),
primary_key: vec![],
options: HashMap::new(),
};
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
MetricEngineInner::verify_region_create_request(&request).unwrap_err();
let mut options = HashMap::new();
options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options);
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();
options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options);
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
MetricEngineInner::verify_region_create_request(&request).unwrap_err();
options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap();
request.options = options;
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();
}
#[tokio::test]

View File

@@ -133,9 +133,10 @@ pub enum Error {
location: Location,
},
#[snafu(display("Column type mismatch. Expect string, got {:?}", column_type))]
#[snafu(display("Column type mismatch. Expect {:?}, got {:?}", expect, actual))]
ColumnTypeMismatch {
column_type: ConcreteDataType,
expect: ConcreteDataType,
actual: ConcreteDataType,
location: Location,
},
@@ -169,6 +170,19 @@ pub enum Error {
request: RegionRequest,
location: Location,
},
#[snafu(display("Multiple field column found: {} and {}", previous, current))]
MultipleFieldColumn {
previous: String,
current: String,
location: Location,
},
#[snafu(display("Adding field column {} to physical table", name))]
AddingFieldColumn { name: String, location: Location },
#[snafu(display("No field column found"))]
NoFieldColumn { location: Location },
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -182,7 +196,10 @@ impl ErrorExt for Error {
| MissingRegionOption { .. }
| ConflictRegionOption { .. }
| ColumnTypeMismatch { .. }
| PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
| PhysicalRegionBusy { .. }
| MultipleFieldColumn { .. }
| NoFieldColumn { .. }
| AddingFieldColumn { .. } => StatusCode::InvalidArguments,
ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => {
StatusCode::Unsupported

View File

@@ -210,9 +210,9 @@ pub fn create_logical_region_request(
),
},
];
for tag in tags {
for (bias, tag) in tags.iter().enumerate() {
column_metadatas.push(ColumnMetadata {
column_id: 2,
column_id: 2 + bias as ColumnId,
semantic_type: SemanticType::Tag,
column_schema: ColumnSchema::new(
tag.to_string(),

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use api::helper::ColumnDataTypeWrapper;
use api::v1::alter_expr::Kind;
@@ -31,7 +31,7 @@ use query::sql::{
};
use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name;
use snafu::{ensure, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{ColumnDef, ColumnOption, TableConstraint};
use sql::statements::alter::{AlterTable, AlterTableOperation};
use sql::statements::create::{CreateExternalTable, CreateTable, TIME_INDEX};
@@ -214,9 +214,72 @@ pub fn create_to_expr(create: &CreateTable, query_ctx: QueryContextRef) -> Resul
table_id: None,
engine: create.engine.to_string(),
};
validate_create_expr(&expr)?;
Ok(expr)
}
/// Validate the [`CreateTableExpr`] request.
pub fn validate_create_expr(create: &CreateTableExpr) -> Result<()> {
// construct column list
let mut column_to_indices = HashMap::with_capacity(create.column_defs.len());
for (idx, column) in create.column_defs.iter().enumerate() {
if let Some(indices) = column_to_indices.get(&column.name) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is duplicated at index {} and {}",
column.name, indices, idx
),
}
.fail();
}
column_to_indices.insert(&column.name, idx);
}
// verify time_index exists
let _ = column_to_indices
.get(&create.time_index)
.with_context(|| InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is not found in column list",
create.time_index
),
})?;
// verify primary_key exists
for pk in &create.primary_keys {
let _ = column_to_indices
.get(&pk)
.with_context(|| InvalidSqlSnafu {
err_msg: format!("column name `{}` is not found in column list", pk),
})?;
}
// construct primary_key set
let mut pk_set = HashSet::new();
for pk in &create.primary_keys {
if !pk_set.insert(pk) {
return InvalidSqlSnafu {
err_msg: format!("column name `{}` is duplicated in primary keys", pk),
}
.fail();
}
}
// verify time index is not primary key
if pk_set.contains(&create.time_index) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is both primary key and time index",
create.time_index
),
}
.fail();
}
Ok(())
}
fn find_primary_keys(
columns: &[ColumnDef],
constraints: &[TableConstraint],
@@ -457,6 +520,33 @@ mod tests {
);
}
#[test]
fn test_invalid_create_to_expr() {
let cases = [
// duplicate column declaration
"CREATE TABLE monitor (host STRING primary key, ts TIMESTAMP TIME INDEX, some_column text, some_column string);",
// duplicate primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, some_column STRING, PRIMARY KEY (some_column, host, some_column));",
// time index is primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, PRIMARY KEY (host, ts));"
];
for sql in cases {
let stmt = ParserContext::create_with_dialect(
sql,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.pop()
.unwrap();
let Statement::CreateTable(create_table) = stmt else {
unreachable!()
};
create_to_expr(&create_table, QueryContext::arc()).unwrap_err();
}
}
#[test]
fn test_create_to_expr_with_default_timestamp_value() {
let sql = "CREATE TABLE monitor (v double,ts TIMESTAMP default '2024-01-30T00:01:01',TIME INDEX (ts)) engine=mito;";

View File

@@ -247,6 +247,53 @@ pub struct RegionCreateRequest {
pub region_dir: String,
}
impl RegionCreateRequest {
/// Checks whether the request is valid, returns an error if it is invalid.
pub fn validate(&self) -> Result<()> {
// time index must exist
ensure!(
self.column_metadatas
.iter()
.any(|x| x.semantic_type == SemanticType::Timestamp),
InvalidRegionRequestSnafu {
region_id: RegionId::new(0, 0),
err: "missing timestamp column in create region request".to_string(),
}
);
// build column id to indices
let mut column_id_to_indices = HashMap::with_capacity(self.column_metadatas.len());
for (i, c) in self.column_metadatas.iter().enumerate() {
if let Some(previous) = column_id_to_indices.insert(c.column_id, i) {
return InvalidRegionRequestSnafu {
region_id: RegionId::new(0, 0),
err: format!(
"duplicate column id {} (at position {} and {}) in create region request",
c.column_id, previous, i
),
}
.fail();
}
}
// primary key must exist
for column_id in &self.primary_key {
ensure!(
column_id_to_indices.contains_key(column_id),
InvalidRegionRequestSnafu {
region_id: RegionId::new(0, 0),
err: format!(
"missing primary key column {} in create region request",
column_id
),
}
);
}
Ok(())
}
}
#[derive(Debug, Clone, Default)]
pub struct RegionDropRequest {}
@@ -965,4 +1012,46 @@ mod tests {
metadata.schema_version = 1;
request.validate(&metadata).unwrap();
}
#[test]
fn test_validate_create_region() {
let column_metadatas = vec![
ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 1,
},
ColumnMetadata {
column_schema: ColumnSchema::new(
"tag_0",
ConcreteDataType::string_datatype(),
true,
),
semantic_type: SemanticType::Tag,
column_id: 2,
},
ColumnMetadata {
column_schema: ColumnSchema::new(
"field_0",
ConcreteDataType::string_datatype(),
true,
),
semantic_type: SemanticType::Field,
column_id: 3,
},
];
let create = RegionCreateRequest {
engine: "mito".to_string(),
column_metadatas,
primary_key: vec![3, 4],
options: HashMap::new(),
region_dir: "path".to_string(),
};
assert!(create.validate().is_err());
}
}