feat(fuzz): add insert target (#3499)

* fix(common-time): allow building nanos timestamp from parts split from i64::MIN

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(fuzz): add insert target

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: cleanup cargo.toml and polish comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-03-13 18:03:03 +08:00
committed by GitHub
parent fb4da05f25
commit b55905cf66
8 changed files with 360 additions and 46 deletions

1
Cargo.lock generated
View File

@@ -10219,6 +10219,7 @@ dependencies = [
"common-query",
"common-runtime",
"common-telemetry",
"common-time",
"datatypes",
"derive_builder 0.12.0",
"dotenv",

View File

@@ -18,6 +18,7 @@ common-macro = { workspace = true }
common-query = { workspace = true }
common-runtime = { workspace = true }
common-telemetry = { workspace = true }
common-time = { workspace = true }
datatypes = { workspace = true }
derive_builder = { workspace = true }
dotenv = "0.15"
@@ -39,13 +40,6 @@ sqlx = { version = "0.6", features = [
] }
[dev-dependencies]
dotenv = "0.15"
sqlx = { version = "0.6", features = [
"runtime-tokio-rustls",
"mysql",
"postgres",
"chrono",
] }
tokio = { workspace = true }
[[bin]]
@@ -54,3 +48,10 @@ path = "targets/fuzz_create_table.rs"
test = false
bench = false
doc = false
[[bin]]
name = "fuzz_insert"
path = "targets/fuzz_insert.rs"
test = false
bench = false
doc = false

View File

@@ -14,6 +14,7 @@
use std::marker::PhantomData;
use datatypes::value::Value;
use derive_builder::Builder;
use rand::seq::SliceRandom;
use rand::Rng;
@@ -22,7 +23,7 @@ use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::ir::insert_expr::InsertIntoExpr;
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};
/// Generates [InsertIntoExpr].
@@ -41,30 +42,64 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
type Error = Error;
/// Generates the [CreateTableExpr].
/// Generates the [InsertIntoExpr].
fn generate(&self, rng: &mut R) -> Result<InsertIntoExpr> {
let mut columns = self.table_ctx.columns.clone();
columns.shuffle(rng);
// Whether to omit all columns, i.e. INSERT INTO table_name VALUES (...)
let omit_column_list = rng.gen_bool(0.2);
let mut rows = Vec::with_capacity(self.rows);
let mut values_columns = vec![];
if omit_column_list {
// If omit column list, then all columns are required in the values list
values_columns = self.table_ctx.columns.clone();
} else {
for column in &self.table_ctx.columns {
let can_omit = column.is_nullable() || column.has_default_value();
// 50% chance to omit a column if it's not required
if !can_omit || rng.gen_bool(0.5) {
values_columns.push(column.clone());
}
}
values_columns.shuffle(rng);
// If all columns are omitted, pick a random column
if values_columns.is_empty() {
values_columns.push(self.table_ctx.columns.choose(rng).unwrap().clone());
}
}
let mut values_list = Vec::with_capacity(self.rows);
for _ in 0..self.rows {
let mut row = Vec::with_capacity(columns.len());
for column in &columns {
// TODO(weny): generates the special cases
row.push(generate_random_value(
let mut row = Vec::with_capacity(values_columns.len());
for column in &values_columns {
if column.is_nullable() && rng.gen_bool(0.2) {
row.push(RowValue::Value(Value::Null));
continue;
}
if column.has_default_value() && rng.gen_bool(0.2) {
row.push(RowValue::Default);
continue;
}
row.push(RowValue::Value(generate_random_value(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
));
)));
}
rows.push(row);
values_list.push(row);
}
Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns,
rows,
columns: if omit_column_list {
vec![]
} else {
values_columns
},
values_list,
})
}
}

View File

@@ -22,10 +22,13 @@ pub(crate) mod select_expr;
use core::fmt;
pub use alter_expr::AlterTableExpr;
use common_time::{Date, DateTime, Timestamp};
pub use create_expr::CreateTableExpr;
use datatypes::data_type::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use derive_builder::Builder;
pub use insert_expr::InsertIntoExpr;
use lazy_static::lazy_static;
use rand::seq::SliceRandom;
use rand::Rng;
@@ -91,14 +94,62 @@ pub fn generate_random_value<R: Rng>(
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::DateTime(_) => Value::from(rng.gen::<i64>()),
&ConcreteDataType::Timestamp(_) => Value::from(rng.gen::<u64>()),
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp(rng, ts_type),
_ => unimplemented!("unsupported type: {datatype}"),
}
}
fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = i64::from(Timestamp::MIN_SECOND);
let max = i64::from(Timestamp::MAX_SECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = i64::from(Timestamp::MIN_MICROSECOND);
let max = i64::from(Timestamp::MAX_MICROSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = i64::from(Timestamp::MIN_NANOSECOND);
let max = i64::from(Timestamp::MAX_NANOSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}
fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let datetime = Timestamp::new_millisecond(value)
.to_chrono_datetime()
.unwrap();
Value::from(DateTime::from(datetime))
}
fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
Value::from(Date::from(date))
}
/// An identifier.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct Ident {
@@ -181,6 +232,24 @@ impl Column {
.iter()
.any(|opt| opt == &ColumnOption::PrimaryKey)
}
/// Returns true if it's nullable.
pub fn is_nullable(&self) -> bool {
!self
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
}
// Returns true if it has default value.
pub fn has_default_value(&self) -> bool {
self.options.iter().any(|opt| {
matches!(
opt,
ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
)
})
}
}
/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.

View File

@@ -12,14 +12,37 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use datatypes::value::Value;
use crate::ir::Column;
pub type RowValue = Vec<Value>;
pub struct InsertIntoExpr {
pub table_name: String,
pub columns: Vec<Column>,
pub rows: Vec<RowValue>,
pub values_list: Vec<RowValues>,
}
pub type RowValues = Vec<RowValue>;
pub enum RowValue {
Value(Value),
Default,
}
impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}

View File

@@ -22,33 +22,42 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {
type Error = Error;
fn translate(&self, input: &InsertIntoExpr) -> Result<String> {
let columns = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
Ok(format!(
"INSERT INTO {} ({})\nVALUES\n{};",
"INSERT INTO {} {} VALUES\n{};",
input.table_name,
columns,
Self::format_columns(input),
Self::format_values(input)
))
}
}
impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
"".to_string()
} else {
let list = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
format!("({})", list)
}
}
fn format_values(input: &InsertIntoExpr) -> String {
input
.rows
.values_list
.iter()
.map(|row| {
.map(|value| {
format!(
"({})",
row.iter()
.map(|v| format!("'{v}'"))
value
.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)
@@ -64,7 +73,7 @@ mod tests {
use rand::SeedableRng;
use super::InsertIntoExprTranslator;
use super::*;
use crate::generator::insert_expr::InsertExprGeneratorBuilder;
use crate::generator::Generator;
use crate::test_utils;
@@ -84,10 +93,23 @@ mod tests {
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (host, idc, memory_util, ts, cpu_util, disk_util)
VALUES
('adipisci', 'debitis', '0.5495312687894465', '15292064470292927036', '0.9354265029131291', '0.8037816422279636'),
('ut', 'sequi', '0.8807117723618908', '14214208091261382505', '0.5240550121500691', '0.350785883750684');"#;
let expected = r#"INSERT INTO test (ts, host, cpu_util) VALUES
('+199601-11-07 21:32:56.695+0000', 'corrupti', 0.051130243193075464),
('+40822-03-25 02:17:34.328+0000', NULL, 0.6552502332327004);"#;
assert_eq!(output, expected);
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (cpu_util, disk_util, ts) VALUES
(0.7074194466620976, 0.661288102315126, '-47252-05-08 07:33:49.567+0000'),
(0.8266101224213618, 0.7947724277743285, '-224292-12-07 02:51:53.371+0000');"#;
assert_eq!(output, expected);
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test VALUES
('odio', NULL, 0.48809950435391647, 0.5228925709595407, 0.9091528874275897, '+241156-12-16 20:52:15.185+0000'),
('dignissimos', 'labore', NULL, 0.12983559048685023, 0.6362040919831425, '-30691-06-17 23:41:09.938+0000');"#;
assert_eq!(output, expected);
}
}

View File

@@ -98,7 +98,7 @@ impl PartialEq<Column> for ColumnEntry {
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull))
{
debug!("ColumnOption::NotNull is not found");
debug!("ColumnOption::NotNull is found");
return false;
}
} else {

View File

@@ -0,0 +1,163 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![no_main]
use std::sync::Arc;
use common_telemetry::info;
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use snafu::{ensure, ResultExt};
use sqlx::{Executor, MySql, Pool};
use tests_fuzz::context::{TableContext, TableContextRef};
use tests_fuzz::error::{self, Result};
use tests_fuzz::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr};
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
struct FuzzContext {
greptime: Pool<MySql>,
}
impl FuzzContext {
async fn close(self) {
self.greptime.close().await;
}
}
#[derive(Copy, Clone, Debug)]
struct FuzzInput {
seed: u64,
columns: usize,
rows: usize,
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
let mut rng = ChaChaRng::seed_from_u64(seed);
let columns = rng.gen_range(2..30);
let rows = rng.gen_range(1..4096);
Ok(FuzzInput {
columns,
rows,
seed,
})
}
}
fn generate_create_expr<R: Rng + 'static>(
input: FuzzInput,
rng: &mut R,
) -> Result<CreateTableExpr> {
let create_table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(input.columns)
.engine("mito")
.build()
.unwrap();
create_table_generator.generate(rng)
}
fn generate_insert_expr<R: Rng + 'static>(
input: FuzzInput,
rng: &mut R,
table_ctx: TableContextRef,
) -> Result<InsertIntoExpr> {
let insert_generator = InsertExprGeneratorBuilder::default()
.table_ctx(table_ctx)
.rows(input.rows)
.build()
.unwrap();
insert_generator.generate(rng)
}
async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
info!("input: {input:?}");
let mut rng = ChaChaRng::seed_from_u64(input.seed);
let create_expr = generate_create_expr(input, &mut rng)?;
let translator = CreateTableExprTranslator;
let sql = translator.translate(&create_expr)?;
let _result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
let table_ctx = Arc::new(TableContext::from(&create_expr));
let insert_expr = generate_insert_expr(input, &mut rng, table_ctx)?;
let translator = InsertIntoExprTranslator;
let sql = translator.translate(&insert_expr)?;
let result = ctx
.greptime
// unprepared query, see <https://github.com/GreptimeTeam/greptimedb/issues/3500>
.execute(sql.as_str())
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
ensure!(
result.rows_affected() == input.rows as u64,
error::AssertSnafu {
reason: format!(
"expected rows affected: {}, actual: {}",
input.rows,
result.rows_affected(),
)
}
);
// TODO: Validate inserted rows
// Cleans up
let sql = format!("DROP TABLE {}", create_expr.table_name);
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql })?;
info!(
"Drop table: {}\n\nResult: {result:?}\n\n",
create_expr.table_name
);
ctx.close().await;
Ok(())
}
fuzz_target!(|input: FuzzInput| {
common_telemetry::init_default_ut_logging();
common_runtime::block_on_write(async {
let Connections { mysql } = init_greptime_connections().await;
let ctx = FuzzContext {
greptime: mysql.expect("mysql connection init must be succeed"),
};
execute_insert(ctx, input)
.await
.unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}"));
})
});