diff --git a/Cargo.lock b/Cargo.lock index c2b4dc0b5c..a8ca9d963b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10219,6 +10219,7 @@ dependencies = [ "common-query", "common-runtime", "common-telemetry", + "common-time", "datatypes", "derive_builder 0.12.0", "dotenv", diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index 5cf789eb54..a63dc60bab 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -18,6 +18,7 @@ common-macro = { workspace = true } common-query = { workspace = true } common-runtime = { workspace = true } common-telemetry = { workspace = true } +common-time = { workspace = true } datatypes = { workspace = true } derive_builder = { workspace = true } dotenv = "0.15" @@ -39,13 +40,6 @@ sqlx = { version = "0.6", features = [ ] } [dev-dependencies] -dotenv = "0.15" -sqlx = { version = "0.6", features = [ - "runtime-tokio-rustls", - "mysql", - "postgres", - "chrono", -] } tokio = { workspace = true } [[bin]] @@ -54,3 +48,10 @@ path = "targets/fuzz_create_table.rs" test = false bench = false doc = false + +[[bin]] +name = "fuzz_insert" +path = "targets/fuzz_insert.rs" +test = false +bench = false +doc = false diff --git a/tests-fuzz/src/generator/insert_expr.rs b/tests-fuzz/src/generator/insert_expr.rs index f626b236f3..ec4ba4f615 100644 --- a/tests-fuzz/src/generator/insert_expr.rs +++ b/tests-fuzz/src/generator/insert_expr.rs @@ -14,6 +14,7 @@ use std::marker::PhantomData; +use datatypes::value::Value; use derive_builder::Builder; use rand::seq::SliceRandom; use rand::Rng; @@ -22,7 +23,7 @@ use crate::context::TableContextRef; use crate::error::{Error, Result}; use crate::fake::WordGenerator; use crate::generator::{Generator, Random}; -use crate::ir::insert_expr::InsertIntoExpr; +use crate::ir::insert_expr::{InsertIntoExpr, RowValue}; use crate::ir::{generate_random_value, Ident}; /// Generates [InsertIntoExpr]. @@ -41,30 +42,64 @@ pub struct InsertExprGenerator { impl Generator for InsertExprGenerator { type Error = Error; - /// Generates the [CreateTableExpr]. + /// Generates the [InsertIntoExpr]. fn generate(&self, rng: &mut R) -> Result { - let mut columns = self.table_ctx.columns.clone(); - columns.shuffle(rng); + // Whether to omit all columns, i.e. INSERT INTO table_name VALUES (...) + let omit_column_list = rng.gen_bool(0.2); - let mut rows = Vec::with_capacity(self.rows); + let mut values_columns = vec![]; + if omit_column_list { + // If omit column list, then all columns are required in the values list + values_columns = self.table_ctx.columns.clone(); + } else { + for column in &self.table_ctx.columns { + let can_omit = column.is_nullable() || column.has_default_value(); + + // 50% chance to omit a column if it's not required + if !can_omit || rng.gen_bool(0.5) { + values_columns.push(column.clone()); + } + } + values_columns.shuffle(rng); + + // If all columns are omitted, pick a random column + if values_columns.is_empty() { + values_columns.push(self.table_ctx.columns.choose(rng).unwrap().clone()); + } + } + + let mut values_list = Vec::with_capacity(self.rows); for _ in 0..self.rows { - let mut row = Vec::with_capacity(columns.len()); - for column in &columns { - // TODO(weny): generates the special cases - row.push(generate_random_value( + let mut row = Vec::with_capacity(values_columns.len()); + for column in &values_columns { + if column.is_nullable() && rng.gen_bool(0.2) { + row.push(RowValue::Value(Value::Null)); + continue; + } + + if column.has_default_value() && rng.gen_bool(0.2) { + row.push(RowValue::Default); + continue; + } + + row.push(RowValue::Value(generate_random_value( rng, &column.column_type, Some(self.word_generator.as_ref()), - )); + ))); } - rows.push(row); + values_list.push(row); } Ok(InsertIntoExpr { table_name: self.table_ctx.name.to_string(), - columns, - rows, + columns: if omit_column_list { + vec![] + } else { + values_columns + }, + values_list, }) } } diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index d7acd503eb..d20df0fa33 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -22,10 +22,13 @@ pub(crate) mod select_expr; use core::fmt; pub use alter_expr::AlterTableExpr; +use common_time::{Date, DateTime, Timestamp}; pub use create_expr::CreateTableExpr; use datatypes::data_type::ConcreteDataType; +use datatypes::types::TimestampType; use datatypes::value::Value; use derive_builder::Builder; +pub use insert_expr::InsertIntoExpr; use lazy_static::lazy_static; use rand::seq::SliceRandom; use rand::Rng; @@ -91,14 +94,62 @@ pub fn generate_random_value( Some(random) => Value::from(random.gen(rng).value), None => Value::from(rng.gen::().to_string()), }, - ConcreteDataType::Date(_) => Value::from(rng.gen::()), - ConcreteDataType::DateTime(_) => Value::from(rng.gen::()), - &ConcreteDataType::Timestamp(_) => Value::from(rng.gen::()), + ConcreteDataType::Date(_) => generate_random_date(rng), + ConcreteDataType::DateTime(_) => generate_random_datetime(rng), + &ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp(rng, ts_type), _ => unimplemented!("unsupported type: {datatype}"), } } +fn generate_random_timestamp(rng: &mut R, ts_type: TimestampType) -> Value { + let v = match ts_type { + TimestampType::Second(_) => { + let min = i64::from(Timestamp::MIN_SECOND); + let max = i64::from(Timestamp::MAX_SECOND); + let value = rng.gen_range(min..=max); + Timestamp::new_second(value) + } + TimestampType::Millisecond(_) => { + let min = i64::from(Timestamp::MIN_MILLISECOND); + let max = i64::from(Timestamp::MAX_MILLISECOND); + let value = rng.gen_range(min..=max); + Timestamp::new_millisecond(value) + } + TimestampType::Microsecond(_) => { + let min = i64::from(Timestamp::MIN_MICROSECOND); + let max = i64::from(Timestamp::MAX_MICROSECOND); + let value = rng.gen_range(min..=max); + Timestamp::new_microsecond(value) + } + TimestampType::Nanosecond(_) => { + let min = i64::from(Timestamp::MIN_NANOSECOND); + let max = i64::from(Timestamp::MAX_NANOSECOND); + let value = rng.gen_range(min..=max); + Timestamp::new_nanosecond(value) + } + }; + Value::from(v) +} + +fn generate_random_datetime(rng: &mut R) -> Value { + let min = i64::from(Timestamp::MIN_MILLISECOND); + let max = i64::from(Timestamp::MAX_MILLISECOND); + let value = rng.gen_range(min..=max); + let datetime = Timestamp::new_millisecond(value) + .to_chrono_datetime() + .unwrap(); + Value::from(DateTime::from(datetime)) +} + +fn generate_random_date(rng: &mut R) -> Value { + let min = i64::from(Timestamp::MIN_MILLISECOND); + let max = i64::from(Timestamp::MAX_MILLISECOND); + let value = rng.gen_range(min..=max); + let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap(); + Value::from(Date::from(date)) +} + /// An identifier. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)] pub struct Ident { @@ -181,6 +232,24 @@ impl Column { .iter() .any(|opt| opt == &ColumnOption::PrimaryKey) } + + /// Returns true if it's nullable. + pub fn is_nullable(&self) -> bool { + !self + .options + .iter() + .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex)) + } + + // Returns true if it has default value. + pub fn has_default_value(&self) -> bool { + self.options.iter().any(|opt| { + matches!( + opt, + ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_) + ) + }) + } } /// Returns droppable columns. i.e., non-primary key columns, non-ts columns. diff --git a/tests-fuzz/src/ir/insert_expr.rs b/tests-fuzz/src/ir/insert_expr.rs index 8fc4113384..c7476c5170 100644 --- a/tests-fuzz/src/ir/insert_expr.rs +++ b/tests-fuzz/src/ir/insert_expr.rs @@ -12,14 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; + use datatypes::value::Value; use crate::ir::Column; -pub type RowValue = Vec; - pub struct InsertIntoExpr { pub table_name: String, pub columns: Vec, - pub rows: Vec, + pub values_list: Vec, +} + +pub type RowValues = Vec; + +pub enum RowValue { + Value(Value), + Default, +} + +impl Display for RowValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RowValue::Value(v) => match v { + Value::Null => write!(f, "NULL"), + v @ (Value::String(_) + | Value::Timestamp(_) + | Value::DateTime(_) + | Value::Date(_)) => write!(f, "'{}'", v), + v => write!(f, "{}", v), + }, + RowValue::Default => write!(f, "DEFAULT"), + } + } } diff --git a/tests-fuzz/src/translator/mysql/insert_expr.rs b/tests-fuzz/src/translator/mysql/insert_expr.rs index 269df9e6ee..49ff192afb 100644 --- a/tests-fuzz/src/translator/mysql/insert_expr.rs +++ b/tests-fuzz/src/translator/mysql/insert_expr.rs @@ -22,33 +22,42 @@ impl DslTranslator for InsertIntoExprTranslator { type Error = Error; fn translate(&self, input: &InsertIntoExpr) -> Result { - let columns = input - .columns - .iter() - .map(|c| c.name.to_string()) - .collect::>() - .join(", ") - .to_string(); - Ok(format!( - "INSERT INTO {} ({})\nVALUES\n{};", + "INSERT INTO {} {} VALUES\n{};", input.table_name, - columns, + Self::format_columns(input), Self::format_values(input) )) } } impl InsertIntoExprTranslator { + fn format_columns(input: &InsertIntoExpr) -> String { + if input.columns.is_empty() { + "".to_string() + } else { + let list = input + .columns + .iter() + .map(|c| c.name.to_string()) + .collect::>() + .join(", ") + .to_string(); + + format!("({})", list) + } + } + fn format_values(input: &InsertIntoExpr) -> String { input - .rows + .values_list .iter() - .map(|row| { + .map(|value| { format!( "({})", - row.iter() - .map(|v| format!("'{v}'")) + value + .iter() + .map(|v| v.to_string()) .collect::>() .join(", ") ) @@ -64,7 +73,7 @@ mod tests { use rand::SeedableRng; - use super::InsertIntoExprTranslator; + use super::*; use crate::generator::insert_expr::InsertExprGeneratorBuilder; use crate::generator::Generator; use crate::test_utils; @@ -84,10 +93,23 @@ mod tests { let insert_expr = insert_expr_generator.generate(&mut rng).unwrap(); let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap(); - let expected = r#"INSERT INTO test (host, idc, memory_util, ts, cpu_util, disk_util) -VALUES -('adipisci', 'debitis', '0.5495312687894465', '15292064470292927036', '0.9354265029131291', '0.8037816422279636'), -('ut', 'sequi', '0.8807117723618908', '14214208091261382505', '0.5240550121500691', '0.350785883750684');"#; + let expected = r#"INSERT INTO test (ts, host, cpu_util) VALUES +('+199601-11-07 21:32:56.695+0000', 'corrupti', 0.051130243193075464), +('+40822-03-25 02:17:34.328+0000', NULL, 0.6552502332327004);"#; + assert_eq!(output, expected); + + let insert_expr = insert_expr_generator.generate(&mut rng).unwrap(); + let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap(); + let expected = r#"INSERT INTO test (cpu_util, disk_util, ts) VALUES +(0.7074194466620976, 0.661288102315126, '-47252-05-08 07:33:49.567+0000'), +(0.8266101224213618, 0.7947724277743285, '-224292-12-07 02:51:53.371+0000');"#; + assert_eq!(output, expected); + + let insert_expr = insert_expr_generator.generate(&mut rng).unwrap(); + let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap(); + let expected = r#"INSERT INTO test VALUES +('odio', NULL, 0.48809950435391647, 0.5228925709595407, 0.9091528874275897, '+241156-12-16 20:52:15.185+0000'), +('dignissimos', 'labore', NULL, 0.12983559048685023, 0.6362040919831425, '-30691-06-17 23:41:09.938+0000');"#; assert_eq!(output, expected); } } diff --git a/tests-fuzz/src/validator/column.rs b/tests-fuzz/src/validator/column.rs index 5b148f7f02..797834eec5 100644 --- a/tests-fuzz/src/validator/column.rs +++ b/tests-fuzz/src/validator/column.rs @@ -98,7 +98,7 @@ impl PartialEq for ColumnEntry { .iter() .any(|opt| matches!(opt, ColumnOption::NotNull)) { - debug!("ColumnOption::NotNull is not found"); + debug!("ColumnOption::NotNull is found"); return false; } } else { diff --git a/tests-fuzz/targets/fuzz_insert.rs b/tests-fuzz/targets/fuzz_insert.rs new file mode 100644 index 0000000000..7fc6d30a23 --- /dev/null +++ b/tests-fuzz/targets/fuzz_insert.rs @@ -0,0 +1,163 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![no_main] + +use std::sync::Arc; + +use common_telemetry::info; +use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured}; +use libfuzzer_sys::fuzz_target; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaChaRng; +use snafu::{ensure, ResultExt}; +use sqlx::{Executor, MySql, Pool}; +use tests_fuzz::context::{TableContext, TableContextRef}; +use tests_fuzz::error::{self, Result}; +use tests_fuzz::fake::{ + merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map, + MappedGenerator, WordGenerator, +}; +use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; +use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder; +use tests_fuzz::generator::Generator; +use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr}; +use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; +use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator; +use tests_fuzz::translator::DslTranslator; +use tests_fuzz::utils::{init_greptime_connections, Connections}; + +struct FuzzContext { + greptime: Pool, +} + +impl FuzzContext { + async fn close(self) { + self.greptime.close().await; + } +} + +#[derive(Copy, Clone, Debug)] +struct FuzzInput { + seed: u64, + columns: usize, + rows: usize, +} + +impl Arbitrary<'_> for FuzzInput { + fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { + let seed = u.int_in_range(u64::MIN..=u64::MAX)?; + let mut rng = ChaChaRng::seed_from_u64(seed); + let columns = rng.gen_range(2..30); + let rows = rng.gen_range(1..4096); + Ok(FuzzInput { + columns, + rows, + seed, + }) + } +} + +fn generate_create_expr( + input: FuzzInput, + rng: &mut R, +) -> Result { + let create_table_generator = CreateTableExprGeneratorBuilder::default() + .name_generator(Box::new(MappedGenerator::new( + WordGenerator, + merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), + ))) + .columns(input.columns) + .engine("mito") + .build() + .unwrap(); + create_table_generator.generate(rng) +} + +fn generate_insert_expr( + input: FuzzInput, + rng: &mut R, + table_ctx: TableContextRef, +) -> Result { + let insert_generator = InsertExprGeneratorBuilder::default() + .table_ctx(table_ctx) + .rows(input.rows) + .build() + .unwrap(); + insert_generator.generate(rng) +} + +async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> { + info!("input: {input:?}"); + let mut rng = ChaChaRng::seed_from_u64(input.seed); + + let create_expr = generate_create_expr(input, &mut rng)?; + let translator = CreateTableExprTranslator; + let sql = translator.translate(&create_expr)?; + let _result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + + let table_ctx = Arc::new(TableContext::from(&create_expr)); + let insert_expr = generate_insert_expr(input, &mut rng, table_ctx)?; + let translator = InsertIntoExprTranslator; + let sql = translator.translate(&insert_expr)?; + let result = ctx + .greptime + // unprepared query, see + .execute(sql.as_str()) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + + ensure!( + result.rows_affected() == input.rows as u64, + error::AssertSnafu { + reason: format!( + "expected rows affected: {}, actual: {}", + input.rows, + result.rows_affected(), + ) + } + ); + + // TODO: Validate inserted rows + + // Cleans up + let sql = format!("DROP TABLE {}", create_expr.table_name); + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql })?; + info!( + "Drop table: {}\n\nResult: {result:?}\n\n", + create_expr.table_name + ); + ctx.close().await; + + Ok(()) +} + +fuzz_target!(|input: FuzzInput| { + common_telemetry::init_default_ut_logging(); + common_runtime::block_on_write(async { + let Connections { mysql } = init_greptime_connections().await; + let ctx = FuzzContext { + greptime: mysql.expect("mysql connection init must be succeed"), + }; + execute_insert(ctx, input) + .await + .unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}")); + }) +});