feat(fuzz): add validator for inserted rows (#3932)

* feat(fuzz): add validator for inserted rows

* fix: compatibility with mysql types

* feat(fuzz): add datetime and date type in mysql for row validator
This commit is contained in:
Yohan Wal
2024-05-15 15:05:51 +08:00
committed by GitHub
parent 09129a911e
commit cfae276d37
11 changed files with 373 additions and 14 deletions

1
Cargo.lock generated
View File

@@ -10608,6 +10608,7 @@ version = "0.7.2"
dependencies = [
"arbitrary",
"async-trait",
"chrono",
"common-error",
"common-macro",
"common-query",

View File

@@ -17,6 +17,7 @@ unstable = ["nix"]
[dependencies]
arbitrary = { version = "1.3.0", features = ["derive"] }
async-trait = { workspace = true }
chrono = { workspace = true }
common-error = { workspace = true }
common-macro = { workspace = true }
common-query = { workspace = true }

View File

@@ -20,11 +20,12 @@ pub mod select_expr;
use std::fmt;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use rand::Rng;
use crate::error::Error;
use crate::ir::create_expr::ColumnOption;
use crate::ir::{AlterTableExpr, CreateTableExpr};
use crate::ir::{AlterTableExpr, CreateTableExpr, Ident};
pub type CreateTableExprGenerator<R> =
Box<dyn Generator<CreateTableExpr, R, Error = Error> + Sync + Send>;
@@ -36,6 +37,9 @@ pub type ColumnOptionGenerator<R> = Box<dyn Fn(&mut R, &ConcreteDataType) -> Vec
pub type ConcreteDataTypeGenerator<R> = Box<dyn Random<ConcreteDataType, R>>;
pub type ValueGenerator<R> =
Box<dyn Fn(&mut R, &ConcreteDataType, Option<&dyn Random<Ident, R>>) -> Value>;
pub trait Generator<T, R: Rng> {
type Error: Sync + Send + fmt::Debug;

View File

@@ -22,7 +22,7 @@ use rand::Rng;
use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::generator::{Generator, Random, ValueGenerator};
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};
@@ -37,6 +37,8 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
rows: usize,
#[builder(default = "Box::new(WordGenerator)")]
word_generator: Box<dyn Random<Ident, R>>,
#[builder(default = "Box::new(generate_random_value)")]
value_generator: ValueGenerator<R>,
#[builder(default)]
_phantom: PhantomData<R>,
}
@@ -81,7 +83,7 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
continue;
}
row.push(RowValue::Value(generate_random_value(
row.push(RowValue::Value((self.value_generator)(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
@@ -93,11 +95,8 @@ impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns: if self.omit_column_list {
vec![]
} else {
values_columns
},
omit_column_list: self.omit_column_list,
columns: values_columns,
values_list,
})
}

View File

@@ -65,10 +65,21 @@ lazy_static! {
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype()];
pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
// MySQL only permits fractional seconds with up to microseconds (6 digits) precision.
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
];
}
impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
impl_random!(
ConcreteDataType,
MySQLTsColumnTypeGenerator,
MYSQL_TS_DATA_TYPES
);
impl_random!(
ConcreteDataType,
PartibleColumnTypeGenerator,
@@ -82,6 +93,7 @@ impl_random!(
pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct MySQLTsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;
@@ -110,6 +122,31 @@ pub fn generate_random_value<R: Rng>(
}
}
/// Generates a random [Value] for MySQL.
pub fn generate_random_value_for_mysql<R: Rng>(
rng: &mut R,
datatype: &ConcreteDataType,
random_str: Option<&dyn Random<Ident, R>>,
) -> Value {
match datatype {
&ConcreteDataType::Boolean(_) => Value::from(rng.gen::<bool>()),
ConcreteDataType::Int16(_) => Value::from(rng.gen::<i16>()),
ConcreteDataType::Int32(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::Int64(_) => Value::from(rng.gen::<i64>()),
ConcreteDataType::Float32(_) => Value::from(rng.gen::<f32>()),
ConcreteDataType::Float64(_) => Value::from(rng.gen::<f64>()),
ConcreteDataType::String(_) => match random_str {
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp_for_mysql(rng, ts_type),
_ => unimplemented!("unsupported type: {datatype}"),
}
}
fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
@@ -140,6 +177,37 @@ fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Val
Value::from(v)
}
// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999'
fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = 1;
let max = 2_147_483_647;
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = 1000;
let max = 2_147_483_647_499;
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = 1_000_000;
let max = 2_147_483_647_499_999;
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = 1_000_000_000;
let max = 2_147_483_647_499_999_000;
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}
fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
@@ -258,6 +326,14 @@ impl Column {
)
})
}
// Returns default value if it has.
pub fn default_value(&self) -> Option<&Value> {
self.options.iter().find_map(|opt| match opt {
ColumnOption::DefaultValue(value) => Some(value),
_ => None,
})
}
}
/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use std::fmt::{Debug, Display};
use datatypes::value::Value;
@@ -20,17 +20,28 @@ use crate::ir::Column;
pub struct InsertIntoExpr {
pub table_name: String,
pub omit_column_list: bool,
pub columns: Vec<Column>,
pub values_list: Vec<RowValues>,
}
pub type RowValues = Vec<RowValue>;
#[derive(PartialEq, PartialOrd, Clone)]
pub enum RowValue {
Value(Value),
Default,
}
impl RowValue {
pub fn cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match (self, other) {
(RowValue::Value(v1), RowValue::Value(v2)) => v1.partial_cmp(v2),
_ => panic!("Invalid comparison: {:?} and {:?}", self, other),
}
}
}
impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -46,3 +57,38 @@ impl Display for RowValue {
}
}
}
impl Debug for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}
#[cfg(test)]
mod tests {
use common_time::Timestamp;
use datatypes::value::Value;
use crate::ir::insert_expr::RowValue;
#[test]
fn test_value_cmp() {
let time_stampe1 =
Value::Timestamp(Timestamp::from_str_utc("-39988-01-31 01:21:12.848697+0000").unwrap());
let time_stampe2 =
Value::Timestamp(Timestamp::from_str_utc("+12970-09-22 08:40:58.392839+0000").unwrap());
let v1 = RowValue::Value(time_stampe1);
let v2 = RowValue::Value(time_stampe2);
assert_eq!(v1.cmp(&v2), Some(std::cmp::Ordering::Less));
}
}

View File

@@ -33,7 +33,7 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {
impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
if input.omit_column_list {
"".to_string()
} else {
let list = input

View File

@@ -13,3 +13,4 @@
// limitations under the License.
pub mod column;
pub mod row;

View File

@@ -0,0 +1,162 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime as ChronoDateTime, NaiveDate, NaiveDateTime, Utc};
use common_time::date::Date;
use common_time::{DateTime, Timestamp};
use datatypes::value::Value;
use snafu::{ensure, ResultExt};
use sqlx::database::HasArguments;
use sqlx::{
Column, ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Row, Type, TypeInfo,
ValueRef,
};
use crate::error::{self, Result};
use crate::ir::insert_expr::{RowValue, RowValues};
/// Asserts fetched_rows are equal to rows
pub fn assert_eq<'a, DB>(
columns: &[crate::ir::Column],
fetched_rows: &'a [<DB as Database>::Row],
rows: &[RowValues],
) -> Result<()>
where
DB: Database,
usize: ColumnIndex<<DB as Database>::Row>,
bool: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
i8: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
i16: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
i32: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
i64: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
f32: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
f64: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
String: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
Vec<u8>: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
ChronoDateTime<Utc>: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
NaiveDateTime: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
NaiveDate: sqlx::Type<DB> + sqlx::Decode<'a, DB>,
{
ensure!(
fetched_rows.len() == rows.len(),
error::AssertSnafu {
reason: format!(
"Expected values length: {}, got: {}",
rows.len(),
fetched_rows.len(),
)
}
);
for (idx, fetched_row) in fetched_rows.iter().enumerate() {
let row = &rows[idx];
ensure!(
fetched_row.len() == row.len(),
error::AssertSnafu {
reason: format!(
"Expected row length: {}, got: {}",
row.len(),
fetched_row.len(),
)
}
);
for (idx, value) in row.iter().enumerate() {
let fetched_value = if fetched_row.try_get_raw(idx).unwrap().is_null() {
RowValue::Value(Value::Null)
} else {
let value_type = fetched_row.column(idx).type_info().name();
match value_type {
"BOOL" | "BOOLEAN" => RowValue::Value(Value::Boolean(
fetched_row.try_get::<bool, usize>(idx).unwrap(),
)),
"TINYINT" => {
RowValue::Value(Value::Int8(fetched_row.try_get::<i8, usize>(idx).unwrap()))
}
"SMALLINT" => RowValue::Value(Value::Int16(
fetched_row.try_get::<i16, usize>(idx).unwrap(),
)),
"INT" => RowValue::Value(Value::Int32(
fetched_row.try_get::<i32, usize>(idx).unwrap(),
)),
"BIGINT" => RowValue::Value(Value::Int64(
fetched_row.try_get::<i64, usize>(idx).unwrap(),
)),
"FLOAT" => RowValue::Value(Value::Float32(datatypes::value::OrderedFloat(
fetched_row.try_get::<f32, usize>(idx).unwrap(),
))),
"DOUBLE" => RowValue::Value(Value::Float64(datatypes::value::OrderedFloat(
fetched_row.try_get::<f64, usize>(idx).unwrap(),
))),
"VARCHAR" | "CHAR" | "TEXT" => RowValue::Value(Value::String(
fetched_row.try_get::<String, usize>(idx).unwrap().into(),
)),
"VARBINARY" | "BINARY" | "BLOB" => RowValue::Value(Value::Binary(
fetched_row.try_get::<Vec<u8>, usize>(idx).unwrap().into(),
)),
"TIMESTAMP" => RowValue::Value(Value::Timestamp(
Timestamp::from_chrono_datetime(
fetched_row
.try_get::<ChronoDateTime<Utc>, usize>(idx)
.unwrap()
.naive_utc(),
)
.unwrap(),
)),
"DATETIME" => RowValue::Value(Value::DateTime(DateTime::from(
fetched_row.try_get::<NaiveDateTime, usize>(idx).unwrap(),
))),
"DATE" => RowValue::Value(Value::Date(Date::from(
fetched_row.try_get::<NaiveDate, usize>(idx).unwrap(),
))),
_ => panic!("Unsupported type: {}", value_type),
}
};
let value = match value {
// In MySQL, boolean is stored as TINYINT(1)
RowValue::Value(Value::Boolean(v)) => RowValue::Value(Value::Int8(*v as i8)),
RowValue::Default => match columns[idx].default_value().unwrap().clone() {
Value::Boolean(v) => RowValue::Value(Value::Int8(v as i8)),
default_value => RowValue::Value(default_value),
},
_ => value.clone(),
};
ensure!(
value == fetched_value,
error::AssertSnafu {
reason: format!("Expected value: {:?}, got: {:?}", value, fetched_value)
}
)
}
}
Ok(())
}
/// Returns all [RowEntry] of the `table_name`.
pub async fn fetch_values<'a, DB, E>(e: E, sql: &'a str) -> Result<Vec<<DB as Database>::Row>>
where
DB: Database,
<DB as HasArguments<'a>>::Arguments: IntoArguments<'a, DB>,
for<'c> E: 'a + Executor<'c, Database = DB>,
for<'c> String: Decode<'c, DB> + Type<DB>,
for<'c> String: Encode<'c, DB> + Type<DB>,
{
sqlx::query(sql)
.fetch_all(e)
.await
.context(error::ExecuteQuerySnafu { sql })
}

View File

@@ -32,11 +32,14 @@ use tests_fuzz::fake::{
use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr};
use tests_fuzz::ir::{
generate_random_value_for_mysql, CreateTableExpr, InsertIntoExpr, MySQLTsColumnTypeGenerator,
};
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections_via_env, Connections};
use tests_fuzz::validator;
struct FuzzContext {
greptime: Pool<MySql>,
@@ -80,6 +83,7 @@ fn generate_create_expr<R: Rng + 'static>(
)))
.columns(input.columns)
.engine("mito")
.ts_column_type_generator(Box::new(MySQLTsColumnTypeGenerator))
.build()
.unwrap();
create_table_generator.generate(rng)
@@ -96,6 +100,7 @@ fn generate_insert_expr<R: Rng + 'static>(
.table_ctx(table_ctx)
.omit_column_list(omit_column_list)
.rows(input.rows)
.value_generator(Box::new(generate_random_value_for_mysql))
.build()
.unwrap();
insert_generator.generate(rng)
@@ -135,7 +140,37 @@ async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
}
);
// TODO: Validate inserted rows
// Validate inserted rows
let ts_column_idx = create_expr
.columns
.iter()
.position(|c| c.is_time_index())
.unwrap();
let ts_column_name = create_expr.columns[ts_column_idx].name.clone();
let ts_column_idx_in_insert = insert_expr
.columns
.iter()
.position(|c| c.name == ts_column_name)
.unwrap();
let column_list = insert_expr
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
let select_sql = format!(
"SELECT {} FROM {} ORDER BY {}",
column_list, create_expr.table_name, ts_column_name
);
let fetched_rows = validator::row::fetch_values(&ctx.greptime, select_sql.as_str()).await?;
let mut expected_rows = insert_expr.values_list;
expected_rows.sort_by(|a, b| {
a[ts_column_idx_in_insert]
.cmp(&b[ts_column_idx_in_insert])
.unwrap()
});
validator::row::assert_eq::<MySql>(&insert_expr.columns, &fetched_rows, &expected_rows)?;
// Cleans up
let sql = format!("DROP TABLE {}", create_expr.table_name);

View File

@@ -34,11 +34,12 @@ use tests_fuzz::generator::create_expr::{
};
use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr};
use tests_fuzz::ir::{generate_random_value_for_mysql, CreateTableExpr, InsertIntoExpr};
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections_via_env, Connections};
use tests_fuzz::validator;
struct FuzzContext {
greptime: Pool<MySql>,
@@ -107,6 +108,7 @@ fn generate_insert_expr<R: Rng + 'static>(
.omit_column_list(false)
.table_ctx(table_ctx)
.rows(input.rows)
.value_generator(Box::new(generate_random_value_for_mysql))
.build()
.unwrap();
insert_generator.generate(rng)
@@ -160,7 +162,39 @@ async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
}
);
// TODO: Validate inserted rows
// Validate inserted rows
let ts_column_idx = create_logical_table_expr
.columns
.iter()
.position(|c| c.is_time_index())
.unwrap();
let ts_column_name = create_logical_table_expr.columns[ts_column_idx]
.name
.clone();
let ts_column_idx_in_insert = insert_expr
.columns
.iter()
.position(|c| c.name == ts_column_name)
.unwrap();
let column_list = insert_expr
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
let select_sql = format!(
"SELECT {} FROM {} ORDER BY {}",
column_list, create_logical_table_expr.table_name, ts_column_name
);
let fetched_rows = validator::row::fetch_values(&ctx.greptime, select_sql.as_str()).await?;
let mut expected_rows = insert_expr.values_list;
expected_rows.sort_by(|a, b| {
a[ts_column_idx_in_insert]
.cmp(&b[ts_column_idx_in_insert])
.unwrap()
});
validator::row::assert_eq::<MySql>(&insert_expr.columns, &fetched_rows, &expected_rows)?;
// Clean up logical table
let sql = format!("DROP TABLE {}", create_logical_table_expr.table_name);