feat(fuzz): add create logical table target (#3756)

* feat(fuzz): add create logical table target

* fix: drop physical table after fuzz test

* fix: remove backticks of table name in with clause

* fix: create physical and logical table properly

* chore: update comments

* chore(ci): add fuzz_create_logical_table ci cfg

* fix: create one logical table once a time

* fix: avoid possible duplicate table and column name

* feat: use hard-code physical table

* chore: remove useless phantom

* refactor: create logical table with struct initialization

* chore: suggested changes and corresponding test changes

* chore: clean up
This commit is contained in:
Yohan Wal
2024-04-26 17:09:08 +08:00
committed by GitHub
parent d8ea7c5585
commit 934c7e3fef
5 changed files with 425 additions and 4 deletions

View File

@@ -130,7 +130,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table" ]
steps:
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3

View File

@@ -50,6 +50,13 @@ test = false
bench = false
doc = false
[[bin]]
name = "fuzz_create_logical_table"
path = "targets/fuzz_create_logical_table.rs"
test = false
bench = false
doc = false
[[bin]]
name = "fuzz_insert"
path = "targets/fuzz_insert.rs"

View File

@@ -14,6 +14,7 @@
use std::collections::HashMap;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use derive_builder::Builder;
use partition::partition::{PartitionBound, PartitionDef};
@@ -22,14 +23,16 @@ use rand::Rng;
use snafu::{ensure, ResultExt};
use super::Generator;
use crate::context::TableContextRef;
use crate::error::{self, Error, Result};
use crate::fake::{random_capitalize_map, MappedGenerator, WordGenerator};
use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Random};
use crate::ir::create_expr::{CreateDatabaseExprBuilder, CreateTableExprBuilder};
use crate::ir::create_expr::{ColumnOption, CreateDatabaseExprBuilder, CreateTableExprBuilder};
use crate::ir::{
column_options_generator, generate_columns, generate_random_value,
partible_column_options_generator, ts_column_options_generator, ColumnTypeGenerator,
CreateDatabaseExpr, CreateTableExpr, Ident, PartibleColumnTypeGenerator, TsColumnTypeGenerator,
partible_column_options_generator, primary_key_options_generator, ts_column_options_generator,
Column, ColumnTypeGenerator, CreateDatabaseExpr, CreateTableExpr, Ident,
PartibleColumnTypeGenerator, StringColumnTypeGenerator, TsColumnTypeGenerator,
};
#[derive(Builder)]
@@ -200,6 +203,107 @@ impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateTableExprGenerato
}
}
/// Generate a physical table with 2 columns: ts of TimestampType::Millisecond as time index and val of Float64Type.
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct CreatePhysicalTableExprGenerator<R: Rng + 'static> {
#[builder(default = "Box::new(WordGenerator)")]
name_generator: Box<dyn Random<Ident, R>>,
#[builder(default = "false")]
if_not_exists: bool,
}
impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreatePhysicalTableExprGenerator<R> {
type Error = Error;
fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
Ok(CreateTableExpr {
table_name: self.name_generator.gen(rng),
columns: vec![
Column {
name: Ident::new("ts"),
column_type: ConcreteDataType::timestamp_millisecond_datatype(),
options: vec![ColumnOption::TimeIndex],
},
Column {
name: Ident::new("val"),
column_type: ConcreteDataType::float64_datatype(),
options: vec![],
},
],
if_not_exists: self.if_not_exists,
partition: None,
engine: "metric".to_string(),
options: [("physical_metric_table".to_string(), "".into())].into(),
primary_keys: vec![],
})
}
}
/// Generate a logical table based on an existing physical table.
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct CreateLogicalTableExprGenerator<R: Rng + 'static> {
physical_table_ctx: TableContextRef,
labels: usize,
if_not_exists: bool,
#[builder(default = "Box::new(WordGenerator)")]
name_generator: Box<dyn Random<Ident, R>>,
}
impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateLogicalTableExprGenerator<R> {
type Error = Error;
fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
// Currently we mock the usage of GreptimeDB as Prometheus' backend, the physical table must have two columns.
ensure!(
self.physical_table_ctx.columns.len() == 2,
error::UnexpectedSnafu {
violated: "The physical table must have two columns"
}
);
// Generates the logical table columns based on the physical table.
let logical_table_name = self
.physical_table_ctx
.generate_unique_table_name(rng, self.name_generator.as_ref());
let mut logical_table = CreateTableExpr {
table_name: logical_table_name,
columns: self.physical_table_ctx.columns.clone(),
if_not_exists: self.if_not_exists,
partition: None,
engine: "metric".to_string(),
options: [(
"on_physical_table".to_string(),
self.physical_table_ctx.name.value.clone().into(),
)]
.into(),
primary_keys: vec![],
};
let column_names = self.name_generator.choose(rng, self.labels);
logical_table.columns.extend(generate_columns(
rng,
column_names,
&StringColumnTypeGenerator,
Box::new(primary_key_options_generator),
));
// Currently only the `primary key` option is kept in physical table,
// so we only keep the `primary key` option in the logical table for fuzz test.
let mut primary_keys = vec![];
for (idx, column) in logical_table.columns.iter().enumerate() {
if column.is_primary_key() {
primary_keys.push(idx);
}
}
primary_keys.shuffle(rng);
logical_table.primary_keys = primary_keys;
Ok(logical_table)
}
}
#[derive(Builder)]
#[builder(default, pattern = "owned")]
pub struct CreateDatabaseExprGenerator<R: Rng + 'static> {
@@ -236,10 +340,14 @@ impl<R: Rng + 'static> Generator<CreateDatabaseExpr, R> for CreateDatabaseExprGe
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use rand::SeedableRng;
use super::*;
use crate::context::TableContext;
#[test]
fn test_float64() {
@@ -296,6 +404,95 @@ mod tests {
assert_eq!(expected, serialized);
}
#[test]
fn test_create_logical_table_expr_generator() {
let mut rng = rand::thread_rng();
let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
.if_not_exists(false)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
assert_eq!(physical_table_expr.engine, "metric");
assert_eq!(physical_table_expr.columns.len(), 2);
let physical_ts = physical_table_expr.columns.iter().position(|column| {
column
.options
.iter()
.any(|option| option == &ColumnOption::TimeIndex)
});
let physical_ts_name = physical_table_expr.columns[physical_ts.unwrap()]
.name
.value
.to_string();
let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr));
let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
.physical_table_ctx(physical_table_ctx)
.labels(5)
.if_not_exists(false)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
let logical_ts = logical_table_expr.columns.iter().position(|column| {
column
.options
.iter()
.any(|option| option == &ColumnOption::TimeIndex)
});
let logical_ts_name = logical_table_expr.columns[logical_ts.unwrap()]
.name
.value
.to_string();
assert_eq!(logical_table_expr.engine, "metric");
assert_eq!(logical_table_expr.columns.len(), 7);
assert_eq!(logical_ts_name, physical_ts_name);
assert!(logical_table_expr
.columns
.iter()
.all(
|column| column.column_type != ConcreteDataType::string_datatype()
|| column
.options
.iter()
.any(|option| option == &ColumnOption::PrimaryKey)
));
}
#[test]
fn test_create_logical_table_expr_generator_deterministic() {
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
.if_not_exists(false)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
let physical_table_serialized = serde_json::to_string(&physical_table_expr).unwrap();
let physical_table_expected = r#"{"table_name":{"value":"expedita","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"physical_metric_table":{"String":""}},"primary_keys":[]}"#;
assert_eq!(physical_table_expected, physical_table_serialized);
let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr));
let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
.physical_table_ctx(physical_table_ctx)
.labels(5)
.if_not_exists(false)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
let logical_table_serialized = serde_json::to_string(&logical_table_expr).unwrap();
let logical_table_expected = r#"{"table_name":{"value":"impedit","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"totam","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"molestias","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"natus","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"cumque","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"expedita"}},"primary_keys":[2,5,3,6,4]}"#;
assert_eq!(logical_table_expected, logical_table_serialized);
}
#[test]
fn test_create_database_expr_generator() {
let mut rng = rand::thread_rng();

View File

@@ -63,6 +63,8 @@ lazy_static! {
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype()];
}
impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
@@ -72,10 +74,16 @@ impl_random!(
PartibleColumnTypeGenerator,
PARTIBLE_DATA_TYPES
);
impl_random!(
ConcreteDataType,
StringColumnTypeGenerator,
STRING_DATA_TYPES
);
pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;
/// Generates a random [Value].
pub fn generate_random_value<R: Rng>(
@@ -318,6 +326,20 @@ pub fn ts_column_options_generator<R: Rng + 'static>(
vec![ColumnOption::TimeIndex]
}
pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
}
pub fn primary_key_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::PrimaryKey]
}
/// Generates columns with given `names`.
pub fn generate_columns<R: Rng + 'static>(
rng: &mut R,

View File

@@ -0,0 +1,195 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![no_main]
use std::sync::Arc;
use common_telemetry::info;
use datatypes::data_type::ConcreteDataType;
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use snafu::ResultExt;
use sqlx::{MySql, Pool};
use tests_fuzz::context::TableContext;
use tests_fuzz::error::{self, Result};
use tests_fuzz::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use tests_fuzz::generator::create_expr::{
CreateLogicalTableExprGeneratorBuilder, CreatePhysicalTableExprGeneratorBuilder,
};
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{primary_key_and_not_null_column_options_generator, Column};
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
use tests_fuzz::validator;
struct FuzzContext {
greptime: Pool<MySql>,
}
impl FuzzContext {
async fn close(self) {
self.greptime.close().await;
}
}
#[derive(Clone, Debug)]
struct FuzzInput {
seed: u64,
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
Ok(FuzzInput { seed })
}
}
async fn execute_create_logic_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
info!("input: {input:?}");
let mut rng = ChaChaRng::seed_from_u64(input.seed);
// Create physical table
let physical_table_if_not_exists = rng.gen_bool(0.5);
let create_physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.if_not_exists(physical_table_if_not_exists)
.build()
.unwrap()
.generate(&mut rng)?;
let translator = CreateTableExprTranslator;
let sql = translator.translate(&create_physical_table_expr)?;
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!("Create physical table: {sql}, result: {result:?}");
let mut physical_table_columns = create_physical_table_expr.columns.clone();
physical_table_columns.push({
let column_type = ConcreteDataType::uint64_datatype();
let options = primary_key_and_not_null_column_options_generator(&mut rng, &column_type);
Column {
name: "__tsid".into(),
column_type,
options,
}
});
physical_table_columns.push({
let column_type = ConcreteDataType::uint32_datatype();
let options = primary_key_and_not_null_column_options_generator(&mut rng, &column_type);
Column {
name: "__table_id".into(),
column_type,
options,
}
});
// Create logical table
let physical_table_ctx = Arc::new(TableContext::from(&create_physical_table_expr));
let labels = rng.gen_range(1..=5);
let logical_table_if_not_exists = rng.gen_bool(0.5);
let create_logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.physical_table_ctx(physical_table_ctx)
.labels(labels)
.if_not_exists(logical_table_if_not_exists)
.build()
.unwrap()
.generate(&mut rng)?;
let sql = translator.translate(&create_logical_table_expr)?;
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!("Create logical table: {sql}, result: {result:?}");
// Validate columns in logical table
let mut column_entries = validator::column::fetch_columns(
&ctx.greptime,
"public".into(),
create_logical_table_expr.table_name.clone(),
)
.await?;
column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name));
let mut columns = create_logical_table_expr.columns.clone();
columns.sort_by(|a, b| a.name.value.cmp(&b.name.value));
validator::column::assert_eq(&column_entries, &columns)?;
// Validate columns in physical table
columns.retain(|column| column.column_type == ConcreteDataType::string_datatype());
physical_table_columns.append(&mut columns);
physical_table_columns.sort_by(|a, b| a.name.value.cmp(&b.name.value));
let mut column_entries = validator::column::fetch_columns(
&ctx.greptime,
"public".into(),
create_physical_table_expr.table_name.clone(),
)
.await?;
column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name));
validator::column::assert_eq(&column_entries, &physical_table_columns)?;
// Clean up logical table
let sql = format!("DROP TABLE {}", create_logical_table_expr.table_name);
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!(
"Drop table: {}, result: {result:?}",
create_logical_table_expr.table_name
);
// Clean up physical table
let sql = format!("DROP TABLE {}", create_physical_table_expr.table_name);
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql })?;
info!(
"Drop table: {}, result: {result:?}",
create_physical_table_expr.table_name
);
ctx.close().await;
Ok(())
}
fuzz_target!(|input: FuzzInput| {
common_telemetry::init_default_ut_logging();
common_runtime::block_on_write(async {
let Connections { mysql } = init_greptime_connections().await;
let ctx = FuzzContext {
greptime: mysql.expect("mysql connection init must be succeed"),
};
execute_create_logic_table(ctx, input)
.await
.unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}"));
})
});