1use api::v1::CreateTableExpr;
16use datafusion_common::tree_node::TreeNode;
17use datafusion_expr::LogicalPlan;
18use datatypes::prelude::ConcreteDataType;
19use datatypes::schema::ColumnSchema;
20use operator::expr_helper::column_schemas_to_defs;
21use snafu::ResultExt;
22
23use crate::Error;
24use crate::adapter::{AUTO_CREATED_PLACEHOLDER_TS_COL, AUTO_CREATED_UPDATE_AT_TS_COL};
25use crate::batching_mode::utils::FindGroupByFinalName;
26use crate::error::{ConvertColumnSchemaSnafu, DatafusionSnafu};
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum QueryType {
30 Tql,
32 Sql,
34}
35
36pub(super) fn create_table_with_expr(
39 plan: &LogicalPlan,
40 sink_table_name: &[String; 3],
41 query_type: &QueryType,
42) -> Result<CreateTableExpr, Error> {
43 let table_def = match query_type {
44 &QueryType::Sql => {
45 if let Some(def) = build_pk_from_aggr(plan)? {
46 def
47 } else {
48 build_by_sql_schema(plan)?
49 }
50 }
51 QueryType::Tql => {
52 if let Some(table_def) = build_pk_from_aggr(plan)? {
54 table_def
55 } else {
56 build_by_tql_schema(plan)?
57 }
58 }
59 };
60 let first_time_stamp = table_def.ts_col;
61 let primary_keys = table_def.pks;
62
63 let mut column_schemas = Vec::new();
64 for field in plan.schema().fields() {
65 let name = field.name();
66 let ty = ConcreteDataType::from_arrow_type(field.data_type());
67 let col_schema = if first_time_stamp == Some(name.clone()) {
68 ColumnSchema::new(name, ty, false).with_time_index(true)
69 } else {
70 ColumnSchema::new(name, ty, true)
71 };
72
73 match query_type {
74 QueryType::Sql => {
75 column_schemas.push(col_schema);
76 }
77 QueryType::Tql => {
78 let is_tag_column = primary_keys.contains(name);
81 let is_val_column = !is_tag_column && first_time_stamp.as_ref() != Some(name);
82 if is_val_column {
83 let col_schema =
84 ColumnSchema::new(name, ConcreteDataType::float64_datatype(), true);
85 column_schemas.push(col_schema);
86 } else if is_tag_column {
87 let col_schema =
88 ColumnSchema::new(name, ConcreteDataType::string_datatype(), true);
89 column_schemas.push(col_schema);
90 } else {
91 column_schemas.push(col_schema);
93 }
94 }
95 }
96 }
97
98 if query_type == &QueryType::Sql {
99 let update_at_schema = ColumnSchema::new(
100 AUTO_CREATED_UPDATE_AT_TS_COL,
101 ConcreteDataType::timestamp_millisecond_datatype(),
102 true,
103 );
104 column_schemas.push(update_at_schema);
105 }
106
107 let time_index = if let Some(time_index) = first_time_stamp {
108 time_index
109 } else {
110 column_schemas.push(
111 ColumnSchema::new(
112 AUTO_CREATED_PLACEHOLDER_TS_COL,
113 ConcreteDataType::timestamp_millisecond_datatype(),
114 false,
115 )
116 .with_time_index(true),
117 );
118 AUTO_CREATED_PLACEHOLDER_TS_COL.to_string()
119 };
120
121 let column_defs =
122 column_schemas_to_defs(column_schemas, &primary_keys).context(ConvertColumnSchemaSnafu)?;
123 Ok(CreateTableExpr {
124 catalog_name: sink_table_name[0].clone(),
125 schema_name: sink_table_name[1].clone(),
126 table_name: sink_table_name[2].clone(),
127 desc: "Auto created table by flow engine".to_string(),
128 column_defs,
129 time_index,
130 primary_keys,
131 create_if_not_exists: true,
132 table_options: Default::default(),
133 table_id: None,
134 engine: "mito".to_string(),
135 })
136}
137
138fn build_by_sql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
140 let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
141 if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
142 Some(f.name().clone())
143 } else {
144 None
145 }
146 });
147 Ok(TableDef {
148 ts_col: first_time_stamp,
149 pks: vec![],
150 })
151}
152
153fn build_by_tql_schema(plan: &LogicalPlan) -> Result<TableDef, Error> {
155 let first_time_stamp = plan.schema().fields().iter().find_map(|f| {
156 if ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp() {
157 Some(f.name().clone())
158 } else {
159 None
160 }
161 });
162 let string_columns = plan
163 .schema()
164 .fields()
165 .iter()
166 .filter_map(|f| {
167 if ConcreteDataType::from_arrow_type(f.data_type()).is_string() {
168 Some(f.name().clone())
169 } else {
170 None
171 }
172 })
173 .collect::<Vec<_>>();
174
175 Ok(TableDef {
176 ts_col: first_time_stamp,
177 pks: string_columns,
178 })
179}
180
181struct TableDef {
182 ts_col: Option<String>,
183 pks: Vec<String>,
184}
185
186fn build_pk_from_aggr(plan: &LogicalPlan) -> Result<Option<TableDef>, Error> {
195 let fields = plan.schema().fields();
196 let mut pk_names = FindGroupByFinalName::default();
197
198 plan.visit(&mut pk_names)
199 .with_context(|_| DatafusionSnafu {
200 context: format!("Can't find aggr expr in plan {plan:?}"),
201 })?;
202
203 let Some(pk_final_names) = pk_names.get_group_expr_names() else {
205 return Ok(None);
206 };
207 if pk_final_names.is_empty() {
208 let first_ts_col = fields
209 .iter()
210 .find(|f| ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp())
211 .map(|f| f.name().clone());
212 return Ok(Some(TableDef {
213 ts_col: first_ts_col,
214 pks: vec![],
215 }));
216 }
217
218 let all_pk_cols: Vec<_> = fields
219 .iter()
220 .filter(|f| pk_final_names.contains(f.name()))
221 .map(|f| f.name().clone())
222 .collect();
223 let first_time_stamp = fields
229 .iter()
230 .find(|f| {
231 all_pk_cols.contains(&f.name().clone())
232 && ConcreteDataType::from_arrow_type(f.data_type()).is_timestamp()
233 })
234 .map(|f| f.name().clone());
235
236 let all_pk_cols: Vec<_> = all_pk_cols
237 .into_iter()
238 .filter(|col| first_time_stamp.as_ref() != Some(col))
239 .collect();
240
241 Ok(Some(TableDef {
242 ts_col: first_time_stamp,
243 pks: all_pk_cols,
244 }))
245}
246
247#[cfg(test)]
248mod test {
249 use api::v1::column_def::try_as_column_schema;
250 use datatypes::prelude::ConcreteDataType;
251 use datatypes::schema::ColumnSchema;
252 use pretty_assertions::assert_eq;
253 use session::context::QueryContext;
254
255 use super::*;
256 use crate::adapter::{AUTO_CREATED_PLACEHOLDER_TS_COL, AUTO_CREATED_UPDATE_AT_TS_COL};
257 use crate::batching_mode::utils::sql_to_df_plan;
258 use crate::test_utils::create_test_query_engine;
259
260 #[tokio::test]
261 async fn test_gen_create_table_sql() {
262 let query_engine = create_test_query_engine();
263 let ctx = QueryContext::arc();
264 struct TestCase {
265 sql: String,
266 sink_table_name: String,
267 column_schemas: Vec<ColumnSchema>,
268 primary_keys: Vec<String>,
269 time_index: String,
270 }
271
272 let update_at_schema = ColumnSchema::new(
273 AUTO_CREATED_UPDATE_AT_TS_COL,
274 ConcreteDataType::timestamp_millisecond_datatype(),
275 true,
276 );
277
278 let ts_placeholder_schema = ColumnSchema::new(
279 AUTO_CREATED_PLACEHOLDER_TS_COL,
280 ConcreteDataType::timestamp_millisecond_datatype(),
281 false,
282 )
283 .with_time_index(true);
284
285 let testcases = vec![
286 TestCase {
287 sql: "SELECT number, ts FROM numbers_with_ts".to_string(),
288 sink_table_name: "new_table".to_string(),
289 column_schemas: vec![
290 ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
291 ColumnSchema::new(
292 "ts",
293 ConcreteDataType::timestamp_millisecond_datatype(),
294 false,
295 )
296 .with_time_index(true),
297 update_at_schema.clone(),
298 ],
299 primary_keys: vec![],
300 time_index: "ts".to_string(),
301 },
302 TestCase {
303 sql: "SELECT number, max(ts) FROM numbers_with_ts GROUP BY number".to_string(),
304 sink_table_name: "new_table".to_string(),
305 column_schemas: vec![
306 ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
307 ColumnSchema::new(
308 "max(numbers_with_ts.ts)",
309 ConcreteDataType::timestamp_millisecond_datatype(),
310 true,
311 ),
312 update_at_schema.clone(),
313 ts_placeholder_schema.clone(),
314 ],
315 primary_keys: vec!["number".to_string()],
316 time_index: AUTO_CREATED_PLACEHOLDER_TS_COL.to_string(),
317 },
318 TestCase {
319 sql: "SELECT max(number), ts FROM numbers_with_ts GROUP BY ts".to_string(),
320 sink_table_name: "new_table".to_string(),
321 column_schemas: vec![
322 ColumnSchema::new(
323 "max(numbers_with_ts.number)",
324 ConcreteDataType::uint32_datatype(),
325 true,
326 ),
327 ColumnSchema::new(
328 "ts",
329 ConcreteDataType::timestamp_millisecond_datatype(),
330 false,
331 )
332 .with_time_index(true),
333 update_at_schema.clone(),
334 ],
335 primary_keys: vec![],
336 time_index: "ts".to_string(),
337 },
338 TestCase {
339 sql: "SELECT number, ts FROM numbers_with_ts GROUP BY ts, number".to_string(),
340 sink_table_name: "new_table".to_string(),
341 column_schemas: vec![
342 ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
343 ColumnSchema::new(
344 "ts",
345 ConcreteDataType::timestamp_millisecond_datatype(),
346 false,
347 )
348 .with_time_index(true),
349 update_at_schema.clone(),
350 ],
351 primary_keys: vec!["number".to_string()],
352 time_index: "ts".to_string(),
353 },
354 ];
355
356 for tc in testcases {
357 let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), &tc.sql, true)
358 .await
359 .unwrap();
360 let expr = create_table_with_expr(
361 &plan,
362 &[
363 "greptime".to_string(),
364 "public".to_string(),
365 tc.sink_table_name.clone(),
366 ],
367 &QueryType::Sql,
368 )
369 .unwrap();
370 let column_schemas = expr
372 .column_defs
373 .iter()
374 .map(|c| try_as_column_schema(c).unwrap())
375 .collect::<Vec<_>>();
376 assert_eq!(tc.column_schemas, column_schemas, "{:?}", tc.sql);
377 assert_eq!(tc.primary_keys, expr.primary_keys, "{:?}", tc.sql);
378 assert_eq!(tc.time_index, expr.time_index, "{:?}", tc.sql);
379 }
380 }
381}