1pub(crate) mod alter_expr;
18pub(crate) mod create_expr;
19pub(crate) mod insert_expr;
20pub(crate) mod partition_expr;
21pub(crate) mod repartition_expr;
22pub(crate) mod select_expr;
23
24use core::fmt;
25use std::collections::HashMap;
26use std::sync::{Arc, Mutex};
27use std::time::Duration;
28
29pub use alter_expr::{AlterTableExpr, AlterTableOption};
30use common_time::timestamp::TimeUnit;
31use common_time::{Date, Timestamp};
32pub use create_expr::{CreateDatabaseExpr, CreateTableExpr};
33use datatypes::data_type::ConcreteDataType;
34use datatypes::types::TimestampType;
35use datatypes::value::Value;
36use derive_builder::Builder;
37pub use insert_expr::InsertIntoExpr;
38use lazy_static::lazy_static;
39pub use partition_expr::SimplePartitions;
40use rand::Rng;
41use rand::seq::{IndexedRandom, SliceRandom};
42pub use repartition_expr::RepartitionExpr;
43use serde::{Deserialize, Serialize};
44
45use self::insert_expr::RowValues;
46use crate::context::TableContextRef;
47use crate::fake::WordGenerator;
48use crate::generator::{Random, TsValueGenerator};
49use crate::impl_random;
50use crate::ir::create_expr::ColumnOption;
51pub use crate::ir::insert_expr::RowValue;
52
53lazy_static! {
54 pub static ref DATA_TYPES: Vec<ConcreteDataType> = vec![
55 ConcreteDataType::boolean_datatype(),
56 ConcreteDataType::int16_datatype(),
57 ConcreteDataType::int32_datatype(),
58 ConcreteDataType::int64_datatype(),
59 ConcreteDataType::float32_datatype(),
60 ConcreteDataType::float64_datatype(),
61 ];
62 pub static ref TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
63 ConcreteDataType::timestamp_nanosecond_datatype(),
64 ConcreteDataType::timestamp_microsecond_datatype(),
65 ConcreteDataType::timestamp_millisecond_datatype(),
66 ConcreteDataType::timestamp_second_datatype(),
67 ];
68 pub static ref PARTIBLE_DATA_TYPES: Vec<ConcreteDataType> = vec![
69 ConcreteDataType::int16_datatype(),
70 ConcreteDataType::int32_datatype(),
71 ConcreteDataType::int64_datatype(),
72 ConcreteDataType::float32_datatype(),
73 ConcreteDataType::float64_datatype(),
74 ConcreteDataType::string_datatype(),
75 ];
76 pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
77 vec![ConcreteDataType::string_datatype()];
78 pub static ref MYSQL_TS_DATA_TYPES: Vec<ConcreteDataType> = vec![
79 ConcreteDataType::timestamp_microsecond_datatype(),
81 ConcreteDataType::timestamp_millisecond_datatype(),
82 ConcreteDataType::timestamp_second_datatype(),
83 ];
84}
85
86impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
87impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES);
88impl_random!(
89 ConcreteDataType,
90 MySQLTsColumnTypeGenerator,
91 MYSQL_TS_DATA_TYPES
92);
93impl_random!(
94 ConcreteDataType,
95 PartibleColumnTypeGenerator,
96 PARTIBLE_DATA_TYPES
97);
98impl_random!(
99 ConcreteDataType,
100 StringColumnTypeGenerator,
101 STRING_DATA_TYPES
102);
103
104pub struct ColumnTypeGenerator;
105pub struct TsColumnTypeGenerator;
106pub struct MySQLTsColumnTypeGenerator;
107pub struct PartibleColumnTypeGenerator;
108pub struct StringColumnTypeGenerator;
109
110macro_rules! generate_values {
112 ($data_type:ty, $bounds:expr) => {{
113 let base = 0 as $data_type;
114 let step = <$data_type>::MAX / ($bounds as $data_type + 1 as $data_type) as $data_type;
115 (1..=$bounds)
116 .map(|i| Value::from(base + step * i as $data_type as $data_type))
117 .collect::<Vec<Value>>()
118 }};
119}
120
121pub fn generate_partition_bounds(datatype: &ConcreteDataType, bounds: usize) -> Vec<Value> {
123 match datatype {
124 ConcreteDataType::Int16(_) => generate_values!(i16, bounds),
125 ConcreteDataType::Int32(_) => generate_values!(i32, bounds),
126 ConcreteDataType::Int64(_) => generate_values!(i64, bounds),
127 ConcreteDataType::Float32(_) => generate_values!(f32, bounds),
128 ConcreteDataType::Float64(_) => generate_values!(f64, bounds),
129 ConcreteDataType::String(_) => {
130 let base = b'A';
131 let range = b'z' - b'A';
132 let step = range / (bounds as u8 + 1);
133 (1..=bounds)
134 .map(|i| {
135 Value::from(
136 char::from(base + step * i as u8)
137 .escape_default()
138 .to_string(),
139 )
140 })
141 .collect()
142 }
143 _ => unimplemented!("unsupported type: {datatype}"),
144 }
145}
146
147pub fn generate_random_value<R: Rng>(
149 rng: &mut R,
150 datatype: &ConcreteDataType,
151 random_str: Option<&dyn Random<Ident, R>>,
152) -> Value {
153 match datatype {
154 &ConcreteDataType::Boolean(_) => Value::from(rng.random::<bool>()),
155 ConcreteDataType::Int16(_) => Value::from(rng.random::<i16>()),
156 ConcreteDataType::Int32(_) => Value::from(rng.random::<i32>()),
157 ConcreteDataType::Int64(_) => Value::from(rng.random::<i64>()),
158 ConcreteDataType::Float32(_) => Value::from(rng.random::<f32>()),
159 ConcreteDataType::Float64(_) => Value::from(rng.random::<f64>()),
160 ConcreteDataType::String(_) => match random_str {
161 Some(random) => Value::from(random.generate(rng).value),
162 None => Value::from(rng.random::<char>().to_string()),
163 },
164 ConcreteDataType::Date(_) => generate_random_date(rng),
165
166 _ => unimplemented!("unsupported type: {datatype}"),
167 }
168}
169
170pub fn generate_unique_timestamp_for_mysql<R: Rng>(base: i64) -> TsValueGenerator<R> {
172 let base = Timestamp::new_millisecond(base);
173 generate_unique_timestamp_for_mysql_with_clock(Arc::new(Mutex::new(base)))
174}
175
176pub fn generate_unique_timestamp_for_mysql_with_clock<R: Rng>(
178 clock: Arc<Mutex<Timestamp>>,
179) -> TsValueGenerator<R> {
180 Box::new(move |_rng, ts_type| -> Value {
181 let mut clock = clock.lock().unwrap();
182 let ts = clock.add_duration(Duration::from_secs(1)).unwrap();
183 *clock = ts;
184
185 let v = match ts_type {
186 TimestampType::Second(_) => ts.convert_to(TimeUnit::Second).unwrap(),
187 TimestampType::Millisecond(_) => ts.convert_to(TimeUnit::Millisecond).unwrap(),
188 TimestampType::Microsecond(_) => ts.convert_to(TimeUnit::Microsecond).unwrap(),
189 TimestampType::Nanosecond(_) => ts.convert_to(TimeUnit::Nanosecond).unwrap(),
190 };
191 Value::from(v)
192 })
193}
194
195pub fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
197 let v = match ts_type {
198 TimestampType::Second(_) => {
199 let min = i64::from(Timestamp::MIN_SECOND);
200 let max = i64::from(Timestamp::MAX_SECOND);
201 let value = rng.random_range(min..=max);
202 Timestamp::new_second(value)
203 }
204 TimestampType::Millisecond(_) => {
205 let min = i64::from(Timestamp::MIN_MILLISECOND);
206 let max = i64::from(Timestamp::MAX_MILLISECOND);
207 let value = rng.random_range(min..=max);
208 Timestamp::new_millisecond(value)
209 }
210 TimestampType::Microsecond(_) => {
211 let min = i64::from(Timestamp::MIN_MICROSECOND);
212 let max = i64::from(Timestamp::MAX_MICROSECOND);
213 let value = rng.random_range(min..=max);
214 Timestamp::new_microsecond(value)
215 }
216 TimestampType::Nanosecond(_) => {
217 let min = i64::from(Timestamp::MIN_NANOSECOND);
218 let max = i64::from(Timestamp::MAX_NANOSECOND);
219 let value = rng.random_range(min..=max);
220 Timestamp::new_nanosecond(value)
221 }
222 };
223 Value::from(v)
224}
225
226pub fn generate_random_timestamp_for_mysql<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
228 let v = match ts_type {
229 TimestampType::Second(_) => {
230 let min = 1;
231 let max = 2_147_483_647;
232 let value = rng.random_range(min..=max);
233 Timestamp::new_second(value)
234 }
235 TimestampType::Millisecond(_) => {
236 let min = 1000;
237 let max = 2_147_483_647_499;
238 let value = rng.random_range(min..=max);
239 Timestamp::new_millisecond(value)
240 }
241 TimestampType::Microsecond(_) => {
242 let min = 1_000_000;
243 let max = 2_147_483_647_499_999;
244 let value = rng.random_range(min..=max);
245 Timestamp::new_microsecond(value)
246 }
247 TimestampType::Nanosecond(_) => {
248 let min = 1_000_000_000;
249 let max = 2_147_483_647_499_999_000;
250 let value = rng.random_range(min..=max);
251 Timestamp::new_nanosecond(value)
252 }
253 };
254 Value::from(v)
255}
256
257fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
258 let min = i64::from(Timestamp::MIN_MILLISECOND);
259 let max = i64::from(Timestamp::MAX_MILLISECOND);
260 let value = rng.random_range(min..=max);
261 let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
262 Value::from(Date::from(date))
263}
264
265pub fn generate_partition_value<R: Rng + 'static>(
267 rng: &mut R,
268 column_type: &ConcreteDataType,
269 bounds: &[Value],
270 bound_idx: usize,
271) -> Value {
272 if bounds.is_empty() {
273 return generate_random_value(rng, column_type, None);
274 }
275 let first = bounds.first().unwrap();
276 let last = bounds.last().unwrap();
277 match column_type {
278 datatypes::data_type::ConcreteDataType::Int16(_) => {
279 let first_value = match first {
280 datatypes::value::Value::Int16(v) => *v,
281 _ => 0,
282 };
283 if bound_idx == 0 {
284 datatypes::value::Value::from(first_value.saturating_sub(1))
285 } else if bound_idx < bounds.len() {
286 bounds[bound_idx - 1].clone()
287 } else {
288 last.clone()
289 }
290 }
291 datatypes::data_type::ConcreteDataType::Int32(_) => {
292 let first_value = match first {
293 datatypes::value::Value::Int32(v) => *v,
294 _ => 0,
295 };
296 if bound_idx == 0 {
297 datatypes::value::Value::from(first_value.saturating_sub(1))
298 } else if bound_idx < bounds.len() {
299 bounds[bound_idx - 1].clone()
300 } else {
301 last.clone()
302 }
303 }
304 datatypes::data_type::ConcreteDataType::Int64(_) => {
305 let first_value = match first {
306 datatypes::value::Value::Int64(v) => *v,
307 _ => 0,
308 };
309 if bound_idx == 0 {
310 datatypes::value::Value::from(first_value.saturating_sub(1))
311 } else if bound_idx < bounds.len() {
312 bounds[bound_idx - 1].clone()
313 } else {
314 last.clone()
315 }
316 }
317 datatypes::data_type::ConcreteDataType::Float32(_) => {
318 let first_value = match first {
319 datatypes::value::Value::Float32(v) => v.0,
320 _ => 0.0,
321 };
322 if bound_idx == 0 {
323 datatypes::value::Value::from(first_value - 1.0)
324 } else if bound_idx < bounds.len() {
325 bounds[bound_idx - 1].clone()
326 } else {
327 last.clone()
328 }
329 }
330 datatypes::data_type::ConcreteDataType::Float64(_) => {
331 let first_value = match first {
332 datatypes::value::Value::Float64(v) => v.0,
333 _ => 0.0,
334 };
335 if bound_idx == 0 {
336 datatypes::value::Value::from(first_value - 1.0)
337 } else if bound_idx < bounds.len() {
338 bounds[bound_idx - 1].clone()
339 } else {
340 last.clone()
341 }
342 }
343 datatypes::data_type::ConcreteDataType::String(_) => {
344 let upper = match first {
345 datatypes::value::Value::String(v) => v.as_utf8(),
346 _ => "",
347 };
348 if bound_idx == 0 {
349 if upper <= "A" {
350 datatypes::value::Value::from("")
351 } else {
352 datatypes::value::Value::from("A")
353 }
354 } else if bound_idx < bounds.len() {
355 bounds[bound_idx - 1].clone()
356 } else {
357 last.clone()
358 }
359 }
360 _ => unimplemented!("unsupported partition column type: {column_type}"),
361 }
362}
363
364#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
366pub struct Ident {
367 pub value: String,
368 pub quote_style: Option<char>,
369}
370
371impl Ident {
372 pub fn new<S>(value: S) -> Self
374 where
375 S: Into<String>,
376 {
377 Ident {
378 value: value.into(),
379 quote_style: None,
380 }
381 }
382
383 pub fn with_quote<S>(quote: char, value: S) -> Self
385 where
386 S: Into<String>,
387 {
388 Ident {
389 value: value.into(),
390 quote_style: Some(quote),
391 }
392 }
393
394 pub fn is_empty(&self) -> bool {
395 self.value.is_empty()
396 }
397}
398
399impl From<&str> for Ident {
400 fn from(value: &str) -> Self {
401 Ident {
402 value: value.to_string(),
403 quote_style: None,
404 }
405 }
406}
407
408impl From<String> for Ident {
409 fn from(value: String) -> Self {
410 Ident {
411 value,
412 quote_style: None,
413 }
414 }
415}
416
417impl fmt::Display for Ident {
418 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419 match self.quote_style {
420 Some(q) => write!(f, "{q}{}{q}", self.value),
421 None => f.write_str(&self.value),
422 }
423 }
424}
425
426#[derive(Debug, Builder, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
428pub struct Column {
429 #[builder(setter(into))]
430 pub name: Ident,
431 pub column_type: ConcreteDataType,
432 #[builder(default, setter(into))]
433 pub options: Vec<ColumnOption>,
434}
435
436impl Column {
437 pub fn timestamp_type(&self) -> Option<TimestampType> {
439 if let ConcreteDataType::Timestamp(ts_type) = self.column_type {
440 Some(ts_type)
441 } else {
442 None
443 }
444 }
445
446 pub fn is_time_index(&self) -> bool {
448 self.options
449 .iter()
450 .any(|opt| opt == &ColumnOption::TimeIndex)
451 }
452
453 pub fn is_primary_key(&self) -> bool {
455 self.options
456 .iter()
457 .any(|opt| opt == &ColumnOption::PrimaryKey)
458 }
459
460 pub fn is_nullable(&self) -> bool {
462 !self
463 .options
464 .iter()
465 .any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
466 }
467
468 pub fn has_default_value(&self) -> bool {
470 self.options.iter().any(|opt| {
471 matches!(
472 opt,
473 ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
474 )
475 })
476 }
477
478 pub fn default_value(&self) -> Option<&Value> {
480 self.options.iter().find_map(|opt| match opt {
481 ColumnOption::DefaultValue(value) => Some(value),
482 _ => None,
483 })
484 }
485}
486
487pub fn droppable_columns(columns: &[Column]) -> Vec<&Column> {
489 columns
490 .iter()
491 .filter(|column| {
492 !column.options.iter().any(|option| {
493 option == &ColumnOption::PrimaryKey || option == &ColumnOption::TimeIndex
494 })
495 })
496 .collect::<Vec<_>>()
497}
498
499pub fn modifiable_columns(columns: &[Column]) -> Vec<&Column> {
501 columns
502 .iter()
503 .filter(|column| {
504 !column.options.iter().any(|option| {
505 option == &ColumnOption::PrimaryKey
506 || option == &ColumnOption::TimeIndex
507 || option == &ColumnOption::NotNull
508 })
509 })
510 .collect::<Vec<_>>()
511}
512
513pub fn column_options_generator<R: Rng>(
515 rng: &mut R,
516 column_type: &ConcreteDataType,
517) -> Vec<ColumnOption> {
518 let option_idx = rng.random_range(0..5);
524 match option_idx {
525 0 => vec![ColumnOption::Null],
526 1 => vec![ColumnOption::NotNull],
527 2 => vec![ColumnOption::DefaultValue(generate_random_value(
528 rng,
529 column_type,
530 None,
531 ))],
532 3 => vec![ColumnOption::PrimaryKey],
533 _ => vec![],
534 }
535}
536
537pub fn partible_column_options_generator<R: Rng + 'static>(
539 rng: &mut R,
540 column_type: &ConcreteDataType,
541) -> Vec<ColumnOption> {
542 let option_idx = rng.random_range(0..4);
547 match option_idx {
548 0 => vec![ColumnOption::PrimaryKey, ColumnOption::Null],
549 1 => vec![ColumnOption::PrimaryKey, ColumnOption::NotNull],
550 2 => vec![
551 ColumnOption::PrimaryKey,
552 ColumnOption::DefaultValue(generate_random_value(
553 rng,
554 column_type,
555 Some(&WordGenerator),
556 )),
557 ],
558 3 => vec![ColumnOption::PrimaryKey],
559 _ => unreachable!(),
560 }
561}
562
563pub fn ts_column_options_generator<R: Rng + 'static>(
565 _: &mut R,
566 _: &ConcreteDataType,
567) -> Vec<ColumnOption> {
568 vec![ColumnOption::TimeIndex]
569}
570
571pub fn primary_key_and_not_null_column_options_generator<R: Rng + 'static>(
572 _: &mut R,
573 _: &ConcreteDataType,
574) -> Vec<ColumnOption> {
575 vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
576}
577
578pub fn primary_key_options_generator<R: Rng + 'static>(
579 _: &mut R,
580 _: &ConcreteDataType,
581) -> Vec<ColumnOption> {
582 vec![ColumnOption::PrimaryKey]
583}
584
585pub fn generate_columns<R: Rng + 'static>(
587 rng: &mut R,
588 names: impl IntoIterator<Item = Ident>,
589 types: &(impl Random<ConcreteDataType, R> + ?Sized),
590 options: impl Fn(&mut R, &ConcreteDataType) -> Vec<ColumnOption>,
591) -> Vec<Column> {
592 names
593 .into_iter()
594 .map(|name| {
595 let column_type = types.generate(rng);
596 let options = options(rng, &column_type);
597 Column {
598 name,
599 options,
600 column_type,
601 }
602 })
603 .collect()
604}
605
606pub fn replace_default(
608 rows: &[RowValues],
609 table_ctx_ref: &TableContextRef,
610 insert_expr: &InsertIntoExpr,
611) -> Vec<RowValues> {
612 let index_map: HashMap<usize, usize> = insert_expr
613 .columns
614 .iter()
615 .enumerate()
616 .map(|(insert_idx, insert_column)| {
617 let create_idx = table_ctx_ref
618 .columns
619 .iter()
620 .position(|create_column| create_column.name == insert_column.name)
621 .expect("Column not found in create_expr");
622 (insert_idx, create_idx)
623 })
624 .collect();
625
626 let mut new_rows = Vec::new();
627 for row in rows {
628 let mut new_row = Vec::new();
629 for (idx, value) in row.iter().enumerate() {
630 if let RowValue::Default = value {
631 let column = &table_ctx_ref.columns[index_map[&idx]];
632 new_row.push(RowValue::Value(column.default_value().unwrap().clone()));
633 } else {
634 new_row.push(value.clone());
635 }
636 }
637 new_rows.push(new_row);
638 }
639 new_rows
640}
641
642pub fn sort_by_primary_keys(rows: &mut [RowValues], primary_keys_idx: Vec<usize>) {
644 rows.sort_by(|a, b| {
645 let a_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &a[i]).collect();
646 let b_keys: Vec<_> = primary_keys_idx.iter().map(|&i| &b[i]).collect();
647 for (a_key, b_key) in a_keys.iter().zip(b_keys.iter()) {
648 match a_key.cmp(b_key) {
649 Some(std::cmp::Ordering::Equal) => continue,
650 non_eq => return non_eq.unwrap(),
651 }
652 }
653 std::cmp::Ordering::Equal
654 });
655}
656
657pub fn format_columns(columns: &[Column]) -> String {
659 columns
660 .iter()
661 .map(|c| c.name.to_string())
662 .collect::<Vec<_>>()
663 .join(", ")
664}
665
666#[cfg(test)]
667mod tests {
668 use super::*;
669
670 #[test]
671 fn test_droppable_columns() {
672 let columns = vec![
673 Column {
674 name: "hi".into(),
675 column_type: ConcreteDataType::uint64_datatype(),
676 options: vec![ColumnOption::PrimaryKey],
677 },
678 Column {
679 name: "foo".into(),
680 column_type: ConcreteDataType::uint64_datatype(),
681 options: vec![ColumnOption::TimeIndex],
682 },
683 ];
684 let droppable = droppable_columns(&columns);
685 assert!(droppable.is_empty());
686
687 let columns = vec![
688 Column {
689 name: "hi".into(),
690 column_type: ConcreteDataType::uint64_datatype(),
691 options: vec![],
692 },
693 Column {
694 name: "foo".into(),
695 column_type: ConcreteDataType::uint64_datatype(),
696 options: vec![],
697 },
698 ];
699 let droppable = droppable_columns(&columns);
700 assert_eq!(droppable.len(), 2);
701 }
702}