Skip to main content

operator/
insert.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
18use api::v1::alter_table_expr::Kind;
19use api::v1::column_def::options_from_skipping;
20use api::v1::region::{
21    InsertRequest as RegionInsertRequest, InsertRequests as RegionInsertRequests,
22    RegionRequestHeader,
23};
24use api::v1::{
25    AlterTableExpr, ColumnDataType, ColumnSchema, CreateTableExpr, InsertRequests,
26    RowInsertRequest, RowInsertRequests, SemanticType,
27};
28use catalog::CatalogManagerRef;
29use client::{OutputData, OutputMeta};
30use common_catalog::consts::{
31    PARENT_SPAN_ID_COLUMN, SERVICE_NAME_COLUMN, TRACE_ID_COLUMN, TRACE_TABLE_NAME,
32    TRACE_TABLE_NAME_SESSION_KEY, default_engine, trace_operations_table_name,
33    trace_services_table_name,
34};
35use common_grpc_expr::util::ColumnExpr;
36use common_meta::cache::TableFlownodeSetCacheRef;
37use common_meta::node_manager::{AffectedRows, NodeManagerRef};
38use common_meta::peer::Peer;
39use common_query::Output;
40use common_query::prelude::{greptime_timestamp, greptime_value};
41use common_telemetry::tracing_context::TracingContext;
42use common_telemetry::{error, info, warn};
43use datatypes::schema::SkippingIndexOptions;
44use futures_util::future;
45use meter_macros::write_meter;
46use partition::manager::PartitionRuleManagerRef;
47use session::context::QueryContextRef;
48use snafu::ResultExt;
49use snafu::prelude::*;
50use sql::partition::partition_rule_for_hexstring;
51use sql::statements::create::Partitions;
52use sql::statements::insert::Insert;
53use store_api::metric_engine_consts::{
54    LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
55};
56use store_api::mito_engine_options::{
57    APPEND_MODE_KEY, COMPACTION_TYPE, COMPACTION_TYPE_TWCS, MERGE_MODE_KEY, TTL_KEY,
58    TWCS_TIME_WINDOW,
59};
60use store_api::storage::{RegionId, TableId};
61use table::TableRef;
62use table::metadata::TableInfo;
63use table::requests::{
64    AUTO_CREATE_TABLE_KEY, InsertRequest as TableInsertRequest, SEMANTIC_PER_TABLE_INDEX_KEY,
65    TABLE_DATA_MODEL, TABLE_DATA_MODEL_TRACE_V1, TRACE_TABLE_PARTITIONS_HINT_KEY,
66    VALID_TABLE_OPTION_KEYS, is_semantic_option_key, validate_semantic_option,
67};
68use table::table_reference::TableReference;
69
70use crate::error::{
71    CatalogSnafu, ColumnOptionsSnafu, CreatePartitionRulesSnafu, FindRegionLeaderSnafu,
72    InvalidInsertRequestSnafu, JoinTaskSnafu, RequestInsertsSnafu, Result, TableNotFoundSnafu,
73};
74use crate::expr_helper;
75use crate::region_req_factory::RegionRequestFactory;
76use crate::req_convert::common::preprocess_row_insert_requests;
77use crate::req_convert::insert::{
78    ColumnToRow, RowToRegion, StatementToRegion, TableToRegion, fill_reqs_with_impure_default,
79};
80use crate::statement::StatementExecutor;
81
82pub struct Inserter {
83    catalog_manager: CatalogManagerRef,
84    pub(crate) partition_manager: PartitionRuleManagerRef,
85    pub(crate) node_manager: NodeManagerRef,
86    pub(crate) table_flownode_set_cache: TableFlownodeSetCacheRef,
87    /// Server-side upper bound for auto table creation on write.
88    /// When `false`, missing tables are never auto-created regardless of the
89    /// per-request `auto_create_table` hint. When `true`, the hint still applies.
90    auto_create_table: bool,
91}
92
93pub type InserterRef = Arc<Inserter>;
94
95/// Hint for the table type to create automatically.
96#[derive(Clone)]
97pub enum AutoCreateTableType {
98    /// A logical table with the physical table name.
99    Logical(String),
100    /// A physical table.
101    Physical,
102    /// A log table which is append-only.
103    Log,
104    /// A table that merges rows by `last_non_null` strategy.
105    LastNonNull,
106    /// Create table that build index and default partition rules on trace_id
107    Trace,
108}
109
110impl AutoCreateTableType {
111    pub fn as_str(&self) -> &'static str {
112        match self {
113            AutoCreateTableType::Logical(_) => "logical",
114            AutoCreateTableType::Physical => "physical",
115            AutoCreateTableType::Log => "log",
116            AutoCreateTableType::LastNonNull => "last_non_null",
117            AutoCreateTableType::Trace => "trace",
118        }
119    }
120}
121
122/// Split insert requests into normal and instant requests.
123///
124/// Where instant requests are requests with ttl=instant,
125/// and normal requests are requests with ttl set to other values.
126///
127/// This is used to split requests for different processing.
128#[derive(Clone)]
129pub struct InstantAndNormalInsertRequests {
130    /// Requests with normal ttl.
131    pub normal_requests: RegionInsertRequests,
132    /// Requests with ttl=instant.
133    /// Will be discarded immediately at frontend, wouldn't even insert into memtable, and only sent to flow node if needed.
134    pub instant_requests: RegionInsertRequests,
135}
136
137impl Inserter {
138    pub fn new(
139        catalog_manager: CatalogManagerRef,
140        partition_manager: PartitionRuleManagerRef,
141        node_manager: NodeManagerRef,
142        table_flownode_set_cache: TableFlownodeSetCacheRef,
143        auto_create_table: bool,
144    ) -> Self {
145        Self {
146            catalog_manager,
147            partition_manager,
148            node_manager,
149            table_flownode_set_cache,
150            auto_create_table,
151        }
152    }
153
154    pub async fn handle_column_inserts(
155        &self,
156        requests: InsertRequests,
157        ctx: QueryContextRef,
158        statement_executor: &StatementExecutor,
159    ) -> Result<Output> {
160        let row_inserts = ColumnToRow::convert(requests)?;
161        self.handle_row_inserts(row_inserts, ctx, statement_executor, false, false)
162            .await
163    }
164
165    /// Handles row inserts request and creates a physical table on demand.
166    pub async fn handle_row_inserts(
167        &self,
168        mut requests: RowInsertRequests,
169        ctx: QueryContextRef,
170        statement_executor: &StatementExecutor,
171        accommodate_existing_schema: bool,
172        is_single_value: bool,
173    ) -> Result<Output> {
174        preprocess_row_insert_requests(&mut requests.inserts)?;
175        self.handle_row_inserts_with_create_type(
176            requests,
177            ctx,
178            statement_executor,
179            AutoCreateTableType::Physical,
180            accommodate_existing_schema,
181            is_single_value,
182        )
183        .await
184    }
185
186    /// Handles row inserts request and creates a log table on demand.
187    pub async fn handle_log_inserts(
188        &self,
189        requests: RowInsertRequests,
190        ctx: QueryContextRef,
191        statement_executor: &StatementExecutor,
192    ) -> Result<Output> {
193        self.handle_row_inserts_with_create_type(
194            requests,
195            ctx,
196            statement_executor,
197            AutoCreateTableType::Log,
198            false,
199            false,
200        )
201        .await
202    }
203
204    pub async fn handle_trace_inserts(
205        &self,
206        requests: RowInsertRequests,
207        ctx: QueryContextRef,
208        statement_executor: &StatementExecutor,
209    ) -> Result<Output> {
210        self.handle_row_inserts_with_create_type(
211            requests,
212            ctx,
213            statement_executor,
214            AutoCreateTableType::Trace,
215            false,
216            false,
217        )
218        .await
219    }
220
221    /// Handles row inserts request and creates a table with `last_non_null` merge mode on demand.
222    pub async fn handle_last_non_null_inserts(
223        &self,
224        requests: RowInsertRequests,
225        ctx: QueryContextRef,
226        statement_executor: &StatementExecutor,
227        accommodate_existing_schema: bool,
228        is_single_value: bool,
229    ) -> Result<Output> {
230        self.handle_row_inserts_with_create_type(
231            requests,
232            ctx,
233            statement_executor,
234            AutoCreateTableType::LastNonNull,
235            accommodate_existing_schema,
236            is_single_value,
237        )
238        .await
239    }
240
241    /// Handles row inserts request with specified [AutoCreateTableType].
242    async fn handle_row_inserts_with_create_type(
243        &self,
244        mut requests: RowInsertRequests,
245        ctx: QueryContextRef,
246        statement_executor: &StatementExecutor,
247        create_type: AutoCreateTableType,
248        accommodate_existing_schema: bool,
249        is_single_value: bool,
250    ) -> Result<Output> {
251        // remove empty requests
252        requests.inserts.retain(|req| {
253            req.rows
254                .as_ref()
255                .map(|r| !r.rows.is_empty())
256                .unwrap_or_default()
257        });
258        validate_column_count_match(&requests)?;
259
260        let CreateAlterTableResult {
261            instant_table_ids,
262            table_infos,
263        } = self
264            .create_or_alter_tables_on_demand(
265                &mut requests,
266                &ctx,
267                create_type,
268                statement_executor,
269                accommodate_existing_schema,
270                is_single_value,
271            )
272            .await?;
273
274        let name_to_info = table_infos
275            .values()
276            .map(|info| (info.name.clone(), info.clone()))
277            .collect::<HashMap<_, _>>();
278        let inserts = RowToRegion::new(
279            name_to_info,
280            instant_table_ids,
281            self.partition_manager.as_ref(),
282        )
283        .convert(requests)
284        .await?;
285
286        self.do_request(inserts, &table_infos, &ctx).await
287    }
288
289    /// Handles row inserts request with metric engine.
290    pub async fn handle_metric_row_inserts(
291        &self,
292        mut requests: RowInsertRequests,
293        ctx: QueryContextRef,
294        statement_executor: &StatementExecutor,
295        physical_table: String,
296    ) -> Result<Output> {
297        // remove empty requests
298        requests.inserts.retain(|req| {
299            req.rows
300                .as_ref()
301                .map(|r| !r.rows.is_empty())
302                .unwrap_or_default()
303        });
304        validate_column_count_match(&requests)?;
305
306        // check and create physical table
307        self.create_physical_table_on_demand(&ctx, physical_table.clone(), statement_executor)
308            .await?;
309
310        // check and create logical tables
311        let CreateAlterTableResult {
312            instant_table_ids,
313            table_infos,
314        } = self
315            .create_or_alter_tables_on_demand(
316                &mut requests,
317                &ctx,
318                AutoCreateTableType::Logical(physical_table.clone()),
319                statement_executor,
320                true,
321                true,
322            )
323            .await?;
324        let name_to_info = table_infos
325            .values()
326            .map(|info| (info.name.clone(), info.clone()))
327            .collect::<HashMap<_, _>>();
328        let inserts = RowToRegion::new(name_to_info, instant_table_ids, &self.partition_manager)
329            .convert(requests)
330            .await?;
331
332        self.do_request(inserts, &table_infos, &ctx).await
333    }
334
335    pub async fn handle_table_insert(
336        &self,
337        request: TableInsertRequest,
338        ctx: QueryContextRef,
339    ) -> Result<Output> {
340        let catalog = request.catalog_name.as_str();
341        let schema = request.schema_name.as_str();
342        let table_name = request.table_name.as_str();
343        let table = self.get_table(catalog, schema, table_name).await?;
344        let table = table.with_context(|| TableNotFoundSnafu {
345            table_name: common_catalog::format_full_table_name(catalog, schema, table_name),
346        })?;
347        let table_info = table.table_info();
348
349        let inserts = TableToRegion::new(&table_info, &self.partition_manager)
350            .convert(request)
351            .await?;
352
353        let table_infos = HashMap::from_iter([(table_info.table_id(), table_info.clone())]);
354
355        self.do_request(inserts, &table_infos, &ctx).await
356    }
357
358    pub async fn handle_statement_insert(
359        &self,
360        insert: &Insert,
361        ctx: &QueryContextRef,
362    ) -> Result<Output> {
363        let (inserts, table_info) =
364            StatementToRegion::new(self.catalog_manager.as_ref(), &self.partition_manager, ctx)
365                .convert(insert, ctx)
366                .await?;
367
368        let table_infos = HashMap::from_iter([(table_info.table_id(), table_info.clone())]);
369
370        self.do_request(inserts, &table_infos, ctx).await
371    }
372}
373
374impl Inserter {
375    async fn do_request(
376        &self,
377        requests: InstantAndNormalInsertRequests,
378        table_infos: &HashMap<TableId, Arc<TableInfo>>,
379        ctx: &QueryContextRef,
380    ) -> Result<Output> {
381        // Fill impure default values in the request
382        let requests = fill_reqs_with_impure_default(table_infos, requests)?;
383
384        let write_cost = write_meter!(
385            ctx.current_catalog(),
386            ctx.current_schema(),
387            requests,
388            ctx.channel() as u8
389        );
390        let request_factory = RegionRequestFactory::new(RegionRequestHeader {
391            tracing_context: TracingContext::from_current_span().to_w3c(),
392            dbname: ctx.get_db_string(),
393            ..Default::default()
394        });
395
396        let InstantAndNormalInsertRequests {
397            normal_requests,
398            instant_requests,
399        } = requests;
400
401        // Mirror requests for source table to flownode asynchronously
402        let flow_mirror_task = FlowMirrorTask::new(
403            &self.table_flownode_set_cache,
404            normal_requests
405                .requests
406                .iter()
407                .chain(instant_requests.requests.iter()),
408        )
409        .await?;
410        flow_mirror_task.detach(self.node_manager.clone())?;
411
412        // Write requests to datanode and wait for response
413        let write_tasks = self
414            .group_requests_by_peer(normal_requests)
415            .await?
416            .into_iter()
417            .map(|(peer, inserts)| {
418                let node_manager = self.node_manager.clone();
419                let request = request_factory.build_insert(inserts);
420                common_runtime::spawn_global(async move {
421                    node_manager
422                        .datanode(&peer)
423                        .await
424                        .handle(request)
425                        .await
426                        .context(RequestInsertsSnafu)
427                })
428            });
429        let results = future::try_join_all(write_tasks)
430            .await
431            .context(JoinTaskSnafu)?;
432        let affected_rows = results
433            .into_iter()
434            .map(|resp| resp.map(|r| r.affected_rows))
435            .sum::<Result<AffectedRows>>()?;
436        crate::metrics::DIST_INGEST_ROW_COUNT
437            .with_label_values(&[ctx.get_db_string().as_str()])
438            .inc_by(affected_rows as u64);
439        Ok(Output::new(
440            OutputData::AffectedRows(affected_rows),
441            OutputMeta::new_with_cost(write_cost as _),
442        ))
443    }
444
445    async fn group_requests_by_peer(
446        &self,
447        requests: RegionInsertRequests,
448    ) -> Result<HashMap<Peer, RegionInsertRequests>> {
449        // group by region ids first to reduce repeatedly call `find_region_leader`
450        // TODO(discord9): determine if a addition clone is worth it
451        let mut requests_per_region: HashMap<RegionId, RegionInsertRequests> = HashMap::new();
452        for req in requests.requests {
453            let region_id = RegionId::from_u64(req.region_id);
454            requests_per_region
455                .entry(region_id)
456                .or_default()
457                .requests
458                .push(req);
459        }
460
461        let mut inserts: HashMap<Peer, RegionInsertRequests> = HashMap::new();
462
463        for (region_id, reqs) in requests_per_region {
464            let peer = self
465                .partition_manager
466                .find_region_leader(region_id)
467                .await
468                .context(FindRegionLeaderSnafu)?;
469            inserts
470                .entry(peer)
471                .or_default()
472                .requests
473                .extend(reqs.requests);
474        }
475
476        Ok(inserts)
477    }
478
479    /// Returns `None` if auto table creation is allowed, or `Some(reason)` if
480    /// disabled by either the global config or the request hint. The reason tells
481    /// which one, for a clearer error.
482    fn auto_create_disabled_reason(&self, ctx: &QueryContextRef) -> Result<Option<&'static str>> {
483        let auto_create_table_hint = ctx
484            .extension(AUTO_CREATE_TABLE_KEY)
485            .map(|v| v.parse::<bool>())
486            .transpose()
487            .map_err(|_| {
488                InvalidInsertRequestSnafu {
489                    reason: "`auto_create_table` hint must be a boolean",
490                }
491                .build()
492            })?
493            .unwrap_or(true);
494        Ok(if !self.auto_create_table {
495            Some("auto-create table is disabled by frontend config")
496        } else if !auto_create_table_hint {
497            Some("`auto_create_table` hint is disabled")
498        } else {
499            None
500        })
501    }
502
503    /// Creates or alter tables on demand:
504    /// - if table does not exist, create table by inferred CreateExpr
505    /// - if table exist, check if schema matches. If any new column found, alter table by inferred `AlterExpr`
506    ///
507    /// Returns a mapping from table name to table id, where table name is the table name involved in the requests.
508    /// This mapping is used in the conversion of RowToRegion.
509    ///
510    /// `accommodate_existing_schema` is used to determine if the existing schema should override the new schema.
511    /// It only works for TIME_INDEX and single VALUE columns. This is for the case where the user creates a table with
512    /// custom schema, and then inserts data with endpoints that have default schema setting, like prometheus
513    /// remote write. This will modify the `RowInsertRequests` in place.
514    /// `is_single_value` indicates whether the default schema only contains single value column so we can accommodate it.
515    async fn create_or_alter_tables_on_demand(
516        &self,
517        requests: &mut RowInsertRequests,
518        ctx: &QueryContextRef,
519        auto_create_table_type: AutoCreateTableType,
520        statement_executor: &StatementExecutor,
521        accommodate_existing_schema: bool,
522        is_single_value: bool,
523    ) -> Result<CreateAlterTableResult> {
524        let _timer = crate::metrics::CREATE_ALTER_ON_DEMAND
525            .with_label_values(&[auto_create_table_type.as_str()])
526            .start_timer();
527
528        let catalog = ctx.current_catalog();
529        let schema = ctx.current_schema();
530
531        let mut table_infos = HashMap::new();
532        if let Some(disabled_reason) = self.auto_create_disabled_reason(ctx)? {
533            let mut instant_table_ids = HashSet::new();
534            for req in &requests.inserts {
535                let table = self
536                    .get_table(catalog, &schema, &req.table_name)
537                    .await?
538                    .context(InvalidInsertRequestSnafu {
539                        reason: format!(
540                            "Table `{}` does not exist, and {}",
541                            req.table_name, disabled_reason
542                        ),
543                    })?;
544                let table_info = table.table_info();
545                if table_info.is_ttl_instant_table() {
546                    instant_table_ids.insert(table_info.table_id());
547                }
548                table_infos.insert(table_info.table_id(), table.table_info());
549            }
550            let ret = CreateAlterTableResult {
551                instant_table_ids,
552                table_infos,
553            };
554            return Ok(ret);
555        }
556
557        let mut create_tables = vec![];
558        let mut alter_tables = vec![];
559        let mut need_refresh_table_infos = HashSet::new();
560        let mut instant_table_ids = HashSet::new();
561
562        for req in &mut requests.inserts {
563            match self.get_table(catalog, &schema, &req.table_name).await? {
564                Some(table) => {
565                    let table_info = table.table_info();
566                    if table_info.is_ttl_instant_table() {
567                        instant_table_ids.insert(table_info.table_id());
568                    }
569                    if let Some(alter_expr) = self.get_alter_table_expr_on_demand(
570                        req,
571                        &table,
572                        ctx,
573                        accommodate_existing_schema,
574                        is_single_value,
575                    )? {
576                        alter_tables.push(alter_expr);
577                        need_refresh_table_infos.insert((
578                            catalog.to_string(),
579                            schema.clone(),
580                            req.table_name.clone(),
581                        ));
582                    } else {
583                        table_infos.insert(table_info.table_id(), table.table_info());
584                    }
585                }
586                None => {
587                    let create_expr =
588                        self.get_create_table_expr_on_demand(req, &auto_create_table_type, ctx)?;
589                    create_tables.push(create_expr);
590                }
591            }
592        }
593
594        match auto_create_table_type {
595            AutoCreateTableType::Logical(_) => {
596                if !create_tables.is_empty() {
597                    // Creates logical tables in batch.
598                    let tables = self
599                        .create_logical_tables(create_tables, ctx, statement_executor)
600                        .await?;
601
602                    for table in tables {
603                        let table_info = table.table_info();
604                        if table_info.is_ttl_instant_table() {
605                            instant_table_ids.insert(table_info.table_id());
606                        }
607                        table_infos.insert(table_info.table_id(), table.table_info());
608                    }
609                }
610                if !alter_tables.is_empty() {
611                    // Alter logical tables in batch.
612                    statement_executor
613                        .alter_logical_tables(alter_tables, ctx.clone())
614                        .await?;
615                }
616            }
617            AutoCreateTableType::Physical
618            | AutoCreateTableType::Log
619            | AutoCreateTableType::LastNonNull => {
620                // note that auto create table shouldn't be ttl instant table
621                // for it's a very unexpected behavior and should be set by user explicitly
622                for create_table in create_tables {
623                    let table = self
624                        .create_physical_table(create_table, None, ctx, statement_executor)
625                        .await?;
626                    let table_info = table.table_info();
627                    if table_info.is_ttl_instant_table() {
628                        instant_table_ids.insert(table_info.table_id());
629                    }
630                    table_infos.insert(table_info.table_id(), table.table_info());
631                }
632                for alter_expr in alter_tables.into_iter() {
633                    statement_executor
634                        .alter_table_inner(alter_expr, ctx.clone())
635                        .await?;
636                }
637            }
638
639            AutoCreateTableType::Trace => {
640                let trace_table_name = ctx
641                    .extension(TRACE_TABLE_NAME_SESSION_KEY)
642                    .unwrap_or(TRACE_TABLE_NAME);
643
644                let trace_table_partitions = if let Some(trace_table_partitions) =
645                    ctx.extension(TRACE_TABLE_PARTITIONS_HINT_KEY)
646                {
647                    let p = trace_table_partitions.parse::<u32>().map_err(|_| {
648                        InvalidInsertRequestSnafu {
649                            reason: format!(
650                                "Failed to parse trace_table_partitions: {}",
651                                trace_table_partitions
652                            ),
653                        }
654                        .build()
655                    })?;
656                    Some(p)
657                } else {
658                    None
659                };
660
661                // note that auto create table shouldn't be ttl instant table
662                // for it's a very unexpected behavior and should be set by user explicitly
663                for mut create_table in create_tables {
664                    if create_table.table_name == trace_services_table_name(trace_table_name)
665                        || create_table.table_name == trace_operations_table_name(trace_table_name)
666                    {
667                        // Disable append mode for auxiliary tables (services/operations) since they require upsert behavior.
668                        create_table
669                            .table_options
670                            .insert(APPEND_MODE_KEY.to_string(), "false".to_string());
671                        // Remove `ttl` key from table options if it exists
672                        create_table.table_options.remove(TTL_KEY);
673
674                        let table = self
675                            .create_physical_table(create_table, None, ctx, statement_executor)
676                            .await?;
677                        let table_info = table.table_info();
678                        if table_info.is_ttl_instant_table() {
679                            instant_table_ids.insert(table_info.table_id());
680                        }
681                        table_infos.insert(table_info.table_id(), table.table_info());
682                    } else {
683                        // prebuilt partition rules for uuid data: see the function
684                        // for more information
685                        let partitions = if matches!(trace_table_partitions, Some(0) | Some(1)) {
686                            // disable partitions
687                            None
688                        } else {
689                            let p = partition_rule_for_hexstring(
690                                TRACE_ID_COLUMN,
691                                trace_table_partitions,
692                            )
693                            .context(CreatePartitionRulesSnafu)?;
694                            Some(p)
695                        };
696
697                        // add skip index to
698                        // - trace_id: when searching by trace id
699                        // - parent_span_id: when searching root span
700                        // - span_name: when searching certain types of span
701                        let index_columns =
702                            [TRACE_ID_COLUMN, PARENT_SPAN_ID_COLUMN, SERVICE_NAME_COLUMN];
703                        for index_column in index_columns {
704                            if let Some(col) = create_table
705                                .column_defs
706                                .iter_mut()
707                                .find(|c| c.name == index_column)
708                            {
709                                col.options =
710                                    options_from_skipping(&SkippingIndexOptions::default())
711                                        .context(ColumnOptionsSnafu)?;
712                            } else {
713                                warn!(
714                                    "Column {} not found when creating index for trace table: {}.",
715                                    index_column, create_table.table_name
716                                );
717                            }
718                        }
719
720                        // use table_options to mark table model version
721                        create_table.table_options.insert(
722                            TABLE_DATA_MODEL.to_string(),
723                            TABLE_DATA_MODEL_TRACE_V1.to_string(),
724                        );
725
726                        let table = self
727                            .create_physical_table(
728                                create_table,
729                                partitions,
730                                ctx,
731                                statement_executor,
732                            )
733                            .await?;
734                        let table_info = table.table_info();
735                        if table_info.is_ttl_instant_table() {
736                            instant_table_ids.insert(table_info.table_id());
737                        }
738                        table_infos.insert(table_info.table_id(), table.table_info());
739                    }
740                }
741                for alter_expr in alter_tables.into_iter() {
742                    statement_executor
743                        .alter_table_inner(alter_expr, ctx.clone())
744                        .await?;
745                }
746            }
747        }
748
749        // refresh table infos for altered tables
750        for (catalog, schema, table_name) in need_refresh_table_infos {
751            let table = self
752                .get_table(&catalog, &schema, &table_name)
753                .await?
754                .context(TableNotFoundSnafu {
755                    table_name: common_catalog::format_full_table_name(
756                        &catalog,
757                        &schema,
758                        &table_name,
759                    ),
760                })?;
761            let table_info = table.table_info();
762            table_infos.insert(table_info.table_id(), table.table_info());
763        }
764
765        Ok(CreateAlterTableResult {
766            instant_table_ids,
767            table_infos,
768        })
769    }
770
771    async fn create_physical_table_on_demand(
772        &self,
773        ctx: &QueryContextRef,
774        physical_table: String,
775        statement_executor: &StatementExecutor,
776    ) -> Result<()> {
777        let catalog_name = ctx.current_catalog();
778        let schema_name = ctx.current_schema();
779
780        // check if exist
781        if self
782            .get_table(catalog_name, &schema_name, &physical_table)
783            .await?
784            .is_some()
785        {
786            return Ok(());
787        }
788
789        // Gate here too, otherwise a disabled switch would still leak the physical table.
790        if let Some(disabled_reason) = self.auto_create_disabled_reason(ctx)? {
791            return InvalidInsertRequestSnafu {
792                reason: format!(
793                    "Physical table `{physical_table}` does not exist, and {disabled_reason}"
794                ),
795            }
796            .fail();
797        }
798
799        let table_reference = TableReference::full(catalog_name, &schema_name, &physical_table);
800        info!("Physical metric table `{table_reference}` does not exist, try creating table");
801
802        // schema with timestamp and field column
803        let default_schema = vec![
804            ColumnSchema {
805                column_name: greptime_timestamp().to_string(),
806                datatype: ColumnDataType::TimestampMillisecond as _,
807                semantic_type: SemanticType::Timestamp as _,
808                datatype_extension: None,
809                options: None,
810            },
811            ColumnSchema {
812                column_name: greptime_value().to_string(),
813                datatype: ColumnDataType::Float64 as _,
814                semantic_type: SemanticType::Field as _,
815                datatype_extension: None,
816                options: None,
817            },
818        ];
819        let create_table_expr =
820            &mut build_create_table_expr(&table_reference, &default_schema, default_engine())?;
821
822        create_table_expr.engine = METRIC_ENGINE_NAME.to_string();
823        create_table_expr
824            .table_options
825            .insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "true".to_string());
826
827        // create physical table
828        let res = statement_executor
829            .create_table_inner(create_table_expr, None, ctx.clone())
830            .await;
831
832        match res {
833            Ok(_) => {
834                info!("Successfully created table {table_reference}",);
835                Ok(())
836            }
837            Err(err) => {
838                error!(err; "Failed to create table {table_reference}");
839                Err(err)
840            }
841        }
842    }
843
844    async fn get_table(
845        &self,
846        catalog: &str,
847        schema: &str,
848        table: &str,
849    ) -> Result<Option<TableRef>> {
850        self.catalog_manager
851            .table(catalog, schema, table, None)
852            .await
853            .context(CatalogSnafu)
854    }
855
856    fn get_create_table_expr_on_demand(
857        &self,
858        req: &RowInsertRequest,
859        create_type: &AutoCreateTableType,
860        ctx: &QueryContextRef,
861    ) -> Result<CreateTableExpr> {
862        let mut table_options = std::collections::HashMap::with_capacity(4);
863        fill_table_options_for_create(&mut table_options, create_type, ctx);
864        apply_per_table_semantic_options(&mut table_options, ctx, &req.table_name);
865
866        let engine_name = if let AutoCreateTableType::Logical(_) = create_type {
867            // engine should be metric engine when creating logical tables.
868            METRIC_ENGINE_NAME
869        } else {
870            default_engine()
871        };
872
873        let schema = ctx.current_schema();
874        let table_ref = TableReference::full(ctx.current_catalog(), &schema, &req.table_name);
875        // SAFETY: `req.rows` is guaranteed to be `Some` by `handle_row_inserts_with_create_type()`.
876        let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
877        let mut create_table_expr =
878            build_create_table_expr(&table_ref, request_schema, engine_name)?;
879
880        info!("Table `{table_ref}` does not exist, try creating table");
881        create_table_expr.table_options.extend(table_options);
882        Ok(create_table_expr)
883    }
884
885    /// Returns an alter table expression if it finds new columns in the request.
886    /// When `accommodate_existing_schema` is false, it always adds columns if not exist.
887    /// When `accommodate_existing_schema` is true, it may modify the input `req` to
888    /// accommodate it with existing schema. See [`create_or_alter_tables_on_demand`](Self::create_or_alter_tables_on_demand)
889    /// for more details.
890    /// When `accommodate_existing_schema` is true and `is_single_value` is true, it also consider fields when modifying the
891    /// input `req`.
892    fn get_alter_table_expr_on_demand(
893        &self,
894        req: &mut RowInsertRequest,
895        table: &TableRef,
896        ctx: &QueryContextRef,
897        accommodate_existing_schema: bool,
898        is_single_value: bool,
899    ) -> Result<Option<AlterTableExpr>> {
900        let catalog_name = ctx.current_catalog();
901        let schema_name = ctx.current_schema();
902        let table_name = table.table_info().name.clone();
903
904        let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
905        let column_exprs = ColumnExpr::from_column_schemas(request_schema);
906        let add_columns = expr_helper::extract_add_columns_expr(&table.schema(), column_exprs)?;
907        let Some(mut add_columns) = add_columns else {
908            return Ok(None);
909        };
910
911        // If accommodate_existing_schema is true, update request schema for Timestamp/Field columns
912        if accommodate_existing_schema {
913            let table_schema = table.schema();
914            // Find timestamp column name
915            let ts_col_name = table_schema.timestamp_column().map(|c| c.name.clone());
916            // Find field column name if there is only one and `is_single_value` is true.
917            let mut field_col_name = None;
918            if is_single_value {
919                let mut multiple_field_cols = false;
920                table.field_columns().for_each(|col| {
921                    if field_col_name.is_none() {
922                        field_col_name = Some(col.name.clone());
923                    } else {
924                        multiple_field_cols = true;
925                    }
926                });
927                if multiple_field_cols {
928                    field_col_name = None;
929                }
930            }
931
932            // Update column name in request schema for Timestamp/Field columns
933            if let Some(rows) = req.rows.as_mut() {
934                for col in &mut rows.schema {
935                    match col.semantic_type {
936                        x if x == SemanticType::Timestamp as i32 => {
937                            if let Some(ref ts_name) = ts_col_name
938                                && col.column_name != *ts_name
939                            {
940                                col.column_name = ts_name.clone();
941                            }
942                        }
943                        x if x == SemanticType::Field as i32 => {
944                            if let Some(ref field_name) = field_col_name
945                                && col.column_name != *field_name
946                            {
947                                col.column_name = field_name.clone();
948                            }
949                        }
950                        _ => {}
951                    }
952                }
953            }
954
955            // Only keep columns that are tags or non-single field.
956            add_columns.add_columns.retain(|col| {
957                let def = col.column_def.as_ref().unwrap();
958                def.semantic_type == SemanticType::Tag as i32
959                    || (def.semantic_type == SemanticType::Field as i32 && field_col_name.is_none())
960            });
961
962            if add_columns.add_columns.is_empty() {
963                return Ok(None);
964            }
965        }
966
967        Ok(Some(AlterTableExpr {
968            catalog_name: catalog_name.to_string(),
969            schema_name: schema_name.clone(),
970            table_name: table_name.clone(),
971            kind: Some(Kind::AddColumns(add_columns)),
972        }))
973    }
974
975    /// Creates a table with options.
976    async fn create_physical_table(
977        &self,
978        mut create_table_expr: CreateTableExpr,
979        partitions: Option<Partitions>,
980        ctx: &QueryContextRef,
981        statement_executor: &StatementExecutor,
982    ) -> Result<TableRef> {
983        {
984            let table_ref = TableReference::full(
985                &create_table_expr.catalog_name,
986                &create_table_expr.schema_name,
987                &create_table_expr.table_name,
988            );
989
990            info!("Table `{table_ref}` does not exist, try creating table");
991        }
992        let res = statement_executor
993            .create_table_inner(&mut create_table_expr, partitions, ctx.clone())
994            .await;
995
996        let table_ref = TableReference::full(
997            &create_table_expr.catalog_name,
998            &create_table_expr.schema_name,
999            &create_table_expr.table_name,
1000        );
1001
1002        match res {
1003            Ok(table) => {
1004                info!(
1005                    "Successfully created table {} with options: {:?}",
1006                    table_ref, create_table_expr.table_options,
1007                );
1008                Ok(table)
1009            }
1010            Err(err) => {
1011                error!(err; "Failed to create table {}", table_ref);
1012                Err(err)
1013            }
1014        }
1015    }
1016
1017    async fn create_logical_tables(
1018        &self,
1019        create_table_exprs: Vec<CreateTableExpr>,
1020        ctx: &QueryContextRef,
1021        statement_executor: &StatementExecutor,
1022    ) -> Result<Vec<TableRef>> {
1023        let res = statement_executor
1024            .create_logical_tables(&create_table_exprs, ctx.clone())
1025            .await;
1026
1027        match res {
1028            Ok(res) => {
1029                info!("Successfully created logical tables");
1030                Ok(res)
1031            }
1032            Err(err) => {
1033                let failed_tables = create_table_exprs
1034                    .into_iter()
1035                    .map(|expr| {
1036                        format!(
1037                            "{}.{}.{}",
1038                            expr.catalog_name, expr.schema_name, expr.table_name
1039                        )
1040                    })
1041                    .collect::<Vec<_>>();
1042                error!(
1043                    err;
1044                    "Failed to create logical tables {:?}",
1045                    failed_tables
1046                );
1047                Err(err)
1048            }
1049        }
1050    }
1051
1052    pub fn node_manager(&self) -> &NodeManagerRef {
1053        &self.node_manager
1054    }
1055
1056    pub fn partition_manager(&self) -> &PartitionRuleManagerRef {
1057        &self.partition_manager
1058    }
1059}
1060
1061fn validate_column_count_match(requests: &RowInsertRequests) -> Result<()> {
1062    for request in &requests.inserts {
1063        let rows = request.rows.as_ref().unwrap();
1064        let column_count = rows.schema.len();
1065        rows.rows.iter().try_for_each(|r| {
1066            ensure!(
1067                r.values.len() == column_count,
1068                InvalidInsertRequestSnafu {
1069                    reason: format!(
1070                        "column count mismatch, columns: {}, values: {}",
1071                        column_count,
1072                        r.values.len()
1073                    )
1074                }
1075            );
1076            Ok(())
1077        })?;
1078    }
1079    Ok(())
1080}
1081
1082/// Fill table options for a new table by create type.
1083pub fn fill_table_options_for_create(
1084    table_options: &mut std::collections::HashMap<String, String>,
1085    create_type: &AutoCreateTableType,
1086    ctx: &QueryContextRef,
1087) {
1088    for key in VALID_TABLE_OPTION_KEYS {
1089        if let Some(value) = ctx.extension(key) {
1090            table_options.insert(key.to_string(), value.to_string());
1091        }
1092    }
1093
1094    // Semantic keys use their own vocabulary instead of the fixed option list.
1095    for (key, value) in ctx.extensions() {
1096        if is_semantic_option_key(&key) && validate_semantic_option(&key, &value) {
1097            table_options.insert(key, value);
1098        }
1099    }
1100
1101    match create_type {
1102        AutoCreateTableType::Logical(physical_table) => {
1103            table_options.insert(
1104                LOGICAL_TABLE_METADATA_KEY.to_string(),
1105                physical_table.clone(),
1106            );
1107        }
1108        AutoCreateTableType::Physical => {
1109            if let Some(append_mode) = ctx.extension(APPEND_MODE_KEY) {
1110                table_options.insert(APPEND_MODE_KEY.to_string(), append_mode.to_string());
1111            }
1112            if let Some(merge_mode) = ctx.extension(MERGE_MODE_KEY) {
1113                table_options.insert(MERGE_MODE_KEY.to_string(), merge_mode.to_string());
1114            }
1115            if let Some(time_window) = ctx.extension(TWCS_TIME_WINDOW) {
1116                table_options.insert(TWCS_TIME_WINDOW.to_string(), time_window.to_string());
1117                // We need to set the compaction type explicitly.
1118                table_options.insert(
1119                    COMPACTION_TYPE.to_string(),
1120                    COMPACTION_TYPE_TWCS.to_string(),
1121                );
1122            }
1123        }
1124        // Set append_mode to true for log table.
1125        // because log tables should keep rows with the same ts and tags.
1126        AutoCreateTableType::Log => {
1127            table_options.insert(APPEND_MODE_KEY.to_string(), "true".to_string());
1128        }
1129        AutoCreateTableType::LastNonNull => {
1130            if ctx
1131                .extension(APPEND_MODE_KEY)
1132                .is_some_and(|value| value.eq_ignore_ascii_case("true"))
1133            {
1134                table_options.insert(APPEND_MODE_KEY.to_string(), "true".to_string());
1135                table_options.insert(MERGE_MODE_KEY.to_string(), "last_row".to_string());
1136            } else if let Some(merge_mode) = ctx.extension(MERGE_MODE_KEY) {
1137                table_options.insert(MERGE_MODE_KEY.to_string(), merge_mode.to_string());
1138            } else {
1139                table_options.insert(MERGE_MODE_KEY.to_string(), "last_non_null".to_string());
1140            }
1141        }
1142        AutoCreateTableType::Trace => {
1143            table_options.insert(APPEND_MODE_KEY.to_string(), "true".to_string());
1144        }
1145    }
1146}
1147
1148/// Folds the semantic keys for `table_name` carried on the internal per-table
1149/// index extension into `table_options`.
1150///
1151/// The index is a `{table_name -> {semantic_key: value}}` JSON blob produced by
1152/// the OTLP metrics encode path (where one metric can fan out into several
1153/// tables with distinct keys). Common keys shared by every table in a request
1154/// travel as plain semantic extensions and are handled by
1155/// [`fill_table_options_for_create`]; this carries only the per-table tail.
1156/// Keys are re-checked against the vocabulary defensively. Ingestion paths
1157/// without a per-table index (logs, traces, Prom RW) carry no extension, so this
1158/// is a no-op for them.
1159fn apply_per_table_semantic_options(
1160    table_options: &mut std::collections::HashMap<String, String>,
1161    ctx: &QueryContextRef,
1162    table_name: &str,
1163) {
1164    let Some(raw) = ctx.extension(SEMANTIC_PER_TABLE_INDEX_KEY) else {
1165        return;
1166    };
1167    let Ok(index) = serde_json::from_str::<
1168        std::collections::BTreeMap<String, std::collections::BTreeMap<String, String>>,
1169    >(raw) else {
1170        warn!("failed to parse semantic per-table index, skipping per-table options");
1171        return;
1172    };
1173    let Some(entry) = index.get(table_name) else {
1174        return;
1175    };
1176    for (key, value) in entry {
1177        if is_semantic_option_key(key) && validate_semantic_option(key, value) {
1178            table_options.insert(key.clone(), value.clone());
1179        }
1180    }
1181}
1182
1183pub fn build_create_table_expr(
1184    table: &TableReference,
1185    request_schema: &[ColumnSchema],
1186    engine: &str,
1187) -> Result<CreateTableExpr> {
1188    expr_helper::create_table_expr_by_column_schemas(table, request_schema, engine, None)
1189}
1190
1191/// Result of `create_or_alter_tables_on_demand`.
1192struct CreateAlterTableResult {
1193    /// table ids of ttl=instant tables.
1194    instant_table_ids: HashSet<TableId>,
1195    /// Table Info of the created tables.
1196    table_infos: HashMap<TableId, Arc<TableInfo>>,
1197}
1198
1199struct FlowMirrorTask {
1200    requests: HashMap<Peer, RegionInsertRequests>,
1201    num_rows: usize,
1202}
1203
1204impl FlowMirrorTask {
1205    async fn new(
1206        cache: &TableFlownodeSetCacheRef,
1207        requests: impl Iterator<Item = &RegionInsertRequest>,
1208    ) -> Result<Self> {
1209        let mut src_table_reqs: HashMap<TableId, Option<(Vec<Peer>, RegionInsertRequests)>> =
1210            HashMap::new();
1211        let mut num_rows = 0;
1212
1213        for req in requests {
1214            let table_id = RegionId::from_u64(req.region_id).table_id();
1215            match src_table_reqs.get_mut(&table_id) {
1216                Some(Some((_peers, reqs))) => reqs.requests.push(req.clone()),
1217                // already know this is not source table
1218                Some(None) => continue,
1219                _ => {
1220                    // dedup peers
1221                    let peers = cache
1222                        .get(table_id)
1223                        .await
1224                        .context(RequestInsertsSnafu)?
1225                        .unwrap_or_default()
1226                        .values()
1227                        .cloned()
1228                        .collect::<HashSet<_>>()
1229                        .into_iter()
1230                        .collect::<Vec<_>>();
1231
1232                    if !peers.is_empty() {
1233                        let mut reqs = RegionInsertRequests::default();
1234                        reqs.requests.push(req.clone());
1235                        num_rows += reqs
1236                            .requests
1237                            .iter()
1238                            .map(|r| r.rows.as_ref().unwrap().rows.len())
1239                            .sum::<usize>();
1240                        src_table_reqs.insert(table_id, Some((peers, reqs)));
1241                    } else {
1242                        // insert a empty entry to avoid repeat query
1243                        src_table_reqs.insert(table_id, None);
1244                    }
1245                }
1246            }
1247        }
1248
1249        let mut inserts: HashMap<Peer, RegionInsertRequests> = HashMap::new();
1250
1251        for (_table_id, (peers, reqs)) in src_table_reqs
1252            .into_iter()
1253            .filter_map(|(k, v)| v.map(|v| (k, v)))
1254        {
1255            if peers.len() == 1 {
1256                // fast path, zero copy
1257                inserts
1258                    .entry(peers[0].clone())
1259                    .or_default()
1260                    .requests
1261                    .extend(reqs.requests);
1262                continue;
1263            } else {
1264                // TODO(discord9): need to split requests to multiple flownodes
1265                for flownode in peers {
1266                    inserts
1267                        .entry(flownode.clone())
1268                        .or_default()
1269                        .requests
1270                        .extend(reqs.requests.clone());
1271                }
1272            }
1273        }
1274
1275        Ok(Self {
1276            requests: inserts,
1277            num_rows,
1278        })
1279    }
1280
1281    fn detach(self, node_manager: NodeManagerRef) -> Result<()> {
1282        crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.add(self.num_rows as i64);
1283        for (peer, inserts) in self.requests {
1284            let node_manager = node_manager.clone();
1285            common_runtime::spawn_global(async move {
1286                let result = node_manager
1287                    .flownode(&peer)
1288                    .await
1289                    .handle_inserts(inserts)
1290                    .await
1291                    .context(RequestInsertsSnafu);
1292
1293                match result {
1294                    Ok(resp) => {
1295                        let affected_rows = resp.affected_rows;
1296                        crate::metrics::DIST_MIRROR_ROW_COUNT.inc_by(affected_rows);
1297                        crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.sub(affected_rows as _);
1298                    }
1299                    Err(err) => {
1300                        error!(err; "Failed to insert data into flownode {}", peer);
1301                    }
1302                }
1303            });
1304        }
1305
1306        Ok(())
1307    }
1308}
1309
1310#[cfg(test)]
1311mod tests {
1312    use std::sync::Arc;
1313
1314    use api::v1::helper::{field_column_schema, time_index_column_schema};
1315    use api::v1::{RowInsertRequest, Rows, Value};
1316    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
1317    use common_meta::cache::new_table_flownode_set_cache;
1318    use common_meta::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
1319    use common_meta::test_util::MockDatanodeManager;
1320    use datatypes::data_type::ConcreteDataType;
1321    use datatypes::schema::ColumnSchema;
1322    use moka::future::Cache;
1323    use session::context::QueryContext;
1324    use table::TableRef;
1325    use table::dist_table::DummyDataSource;
1326    use table::metadata::{TableInfoBuilder, TableMetaBuilder, TableType};
1327
1328    use super::*;
1329    use crate::tests::{create_partition_rule_manager, prepare_mocked_backend};
1330
1331    fn make_table_ref_with_schema(ts_name: &str, field_name: &str) -> TableRef {
1332        let schema = datatypes::schema::SchemaBuilder::try_from_columns(vec![
1333            ColumnSchema::new(
1334                ts_name,
1335                ConcreteDataType::timestamp_millisecond_datatype(),
1336                false,
1337            )
1338            .with_time_index(true),
1339            ColumnSchema::new(field_name, ConcreteDataType::float64_datatype(), true),
1340        ])
1341        .unwrap()
1342        .build()
1343        .unwrap();
1344        let meta = TableMetaBuilder::empty()
1345            .schema(Arc::new(schema))
1346            .primary_key_indices(vec![])
1347            .value_indices(vec![1])
1348            .engine("mito")
1349            .next_column_id(0)
1350            .options(Default::default())
1351            .created_on(Default::default())
1352            .build()
1353            .unwrap();
1354        let info = Arc::new(
1355            TableInfoBuilder::default()
1356                .table_id(1)
1357                .table_version(0)
1358                .name("test_table")
1359                .schema_name(DEFAULT_SCHEMA_NAME)
1360                .catalog_name(DEFAULT_CATALOG_NAME)
1361                .desc(None)
1362                .table_type(TableType::Base)
1363                .meta(meta)
1364                .build()
1365                .unwrap(),
1366        );
1367        Arc::new(table::Table::new(
1368            info,
1369            table::metadata::FilterPushDownType::Unsupported,
1370            Arc::new(DummyDataSource),
1371        ))
1372    }
1373
1374    #[tokio::test]
1375    async fn test_accommodate_existing_schema_logic() {
1376        let ts_name = "my_ts";
1377        let field_name = "my_field";
1378        let table = make_table_ref_with_schema(ts_name, field_name);
1379
1380        // The request uses different names for timestamp and field columns
1381        let mut req = RowInsertRequest {
1382            table_name: "test_table".to_string(),
1383            rows: Some(Rows {
1384                schema: vec![
1385                    time_index_column_schema("ts_wrong", ColumnDataType::TimestampMillisecond),
1386                    field_column_schema("field_wrong", ColumnDataType::Float64),
1387                ],
1388                rows: vec![api::v1::Row {
1389                    values: vec![Value::default(), Value::default()],
1390                }],
1391            }),
1392        };
1393        let ctx = Arc::new(QueryContext::with(
1394            DEFAULT_CATALOG_NAME,
1395            DEFAULT_SCHEMA_NAME,
1396        ));
1397
1398        let kv_backend = prepare_mocked_backend().await;
1399        let inserter = Inserter::new(
1400            catalog::memory::MemoryCatalogManager::new(),
1401            create_partition_rule_manager(kv_backend.clone()).await,
1402            Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler)),
1403            Arc::new(new_table_flownode_set_cache(
1404                String::new(),
1405                Cache::new(100),
1406                kv_backend.clone(),
1407            )),
1408            true,
1409        );
1410        let alter_expr = inserter
1411            .get_alter_table_expr_on_demand(&mut req, &table, &ctx, true, true)
1412            .unwrap();
1413        assert!(alter_expr.is_none());
1414
1415        // The request's schema should have updated names for timestamp and field columns
1416        let req_schema = req.rows.as_ref().unwrap().schema.clone();
1417        assert_eq!(req_schema[0].column_name, ts_name);
1418        assert_eq!(req_schema[1].column_name, field_name);
1419    }
1420
1421    #[test]
1422    fn test_last_non_null_create_options_preserve_default_without_append_mode() {
1423        let ctx = Arc::new(QueryContext::with(
1424            DEFAULT_CATALOG_NAME,
1425            DEFAULT_SCHEMA_NAME,
1426        ));
1427        let mut table_options = Default::default();
1428
1429        fill_table_options_for_create(&mut table_options, &AutoCreateTableType::LastNonNull, &ctx);
1430
1431        assert_eq!(
1432            Some("last_non_null"),
1433            table_options.get(MERGE_MODE_KEY).map(String::as_str)
1434        );
1435        assert!(!table_options.contains_key(APPEND_MODE_KEY));
1436    }
1437
1438    #[test]
1439    fn test_fill_table_options_copies_semantic_extensions() {
1440        use table::requests::{
1441            SEMANTIC_METRIC_TYPE, SEMANTIC_PER_TABLE_INDEX_KEY, SEMANTIC_SIGNAL_TYPE,
1442            SEMANTIC_SOURCE, SIGNAL_TYPE_METRIC, SOURCE_OPENTELEMETRY,
1443        };
1444
1445        let mut ctx = QueryContext::with(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
1446        ctx.set_extension(SEMANTIC_SIGNAL_TYPE, SIGNAL_TYPE_METRIC);
1447        ctx.set_extension(SEMANTIC_SOURCE, SOURCE_OPENTELEMETRY);
1448        ctx.set_extension(SEMANTIC_METRIC_TYPE, "bogus");
1449        // The internal transport key must NOT be copied into table options.
1450        ctx.set_extension(SEMANTIC_PER_TABLE_INDEX_KEY, "{}");
1451        let ctx = Arc::new(ctx);
1452        let mut table_options = Default::default();
1453
1454        fill_table_options_for_create(&mut table_options, &AutoCreateTableType::Physical, &ctx);
1455
1456        assert_eq!(
1457            Some(SIGNAL_TYPE_METRIC),
1458            table_options.get(SEMANTIC_SIGNAL_TYPE).map(String::as_str)
1459        );
1460        assert_eq!(
1461            Some(SOURCE_OPENTELEMETRY),
1462            table_options.get(SEMANTIC_SOURCE).map(String::as_str)
1463        );
1464        assert!(!table_options.contains_key(SEMANTIC_METRIC_TYPE));
1465        assert!(!table_options.contains_key(SEMANTIC_PER_TABLE_INDEX_KEY));
1466    }
1467
1468    #[test]
1469    fn test_apply_per_table_semantic_options() {
1470        use table::requests::{
1471            SEMANTIC_METRIC_TYPE, SEMANTIC_METRIC_UNIT, SEMANTIC_PER_TABLE_INDEX_KEY,
1472        };
1473
1474        let index = r#"{
1475            "http_requests_total": {
1476                "greptime.semantic.metric.type": "counter",
1477                "greptime.semantic.metric.unit": "By",
1478                "greptime.semantic.metric.type_BOGUS": "x"
1479            },
1480            "other_table": {
1481                "greptime.semantic.metric.type": "gauge"
1482            }
1483        }"#;
1484        let mut ctx = QueryContext::with(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
1485        ctx.set_extension(SEMANTIC_PER_TABLE_INDEX_KEY, index);
1486        let ctx = Arc::new(ctx);
1487
1488        let mut table_options = std::collections::HashMap::new();
1489        apply_per_table_semantic_options(&mut table_options, &ctx, "http_requests_total");
1490        assert_eq!(
1491            table_options.get(SEMANTIC_METRIC_TYPE).map(String::as_str),
1492            Some("counter")
1493        );
1494        assert_eq!(
1495            table_options.get(SEMANTIC_METRIC_UNIT).map(String::as_str),
1496            Some("By")
1497        );
1498        // The unknown key is rejected by the vocabulary check; other tables' keys
1499        // never appear.
1500        assert!(!table_options.contains_key("greptime.semantic.metric.type_BOGUS"));
1501        assert_eq!(table_options.len(), 2);
1502
1503        let mut empty = std::collections::HashMap::new();
1504        apply_per_table_semantic_options(&mut empty, &ctx, "not_in_index");
1505        assert!(empty.is_empty());
1506
1507        // No extension at all is a no-op (e.g. logs / Prom RW).
1508        let bare = Arc::new(QueryContext::with(
1509            DEFAULT_CATALOG_NAME,
1510            DEFAULT_SCHEMA_NAME,
1511        ));
1512        let mut opts = std::collections::HashMap::new();
1513        apply_per_table_semantic_options(&mut opts, &bare, "http_requests_total");
1514        assert!(opts.is_empty());
1515    }
1516
1517    #[test]
1518    fn test_last_non_null_create_options_preserve_default_with_append_mode_false() {
1519        let mut ctx = QueryContext::with(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
1520        ctx.set_extension(APPEND_MODE_KEY, "false");
1521        let ctx = Arc::new(ctx);
1522        let mut table_options = Default::default();
1523
1524        fill_table_options_for_create(&mut table_options, &AutoCreateTableType::LastNonNull, &ctx);
1525
1526        assert!(!table_options.contains_key(APPEND_MODE_KEY));
1527        assert_eq!(
1528            Some("last_non_null"),
1529            table_options.get(MERGE_MODE_KEY).map(String::as_str)
1530        );
1531    }
1532
1533    #[test]
1534    fn test_last_non_null_create_options_use_configured_merge_mode() {
1535        let mut ctx = QueryContext::with(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
1536        ctx.set_extension(MERGE_MODE_KEY, "last_row");
1537        let ctx = Arc::new(ctx);
1538        let mut table_options = Default::default();
1539
1540        fill_table_options_for_create(&mut table_options, &AutoCreateTableType::LastNonNull, &ctx);
1541
1542        assert_eq!(
1543            Some("last_row"),
1544            table_options.get(MERGE_MODE_KEY).map(String::as_str)
1545        );
1546        assert!(!table_options.contains_key(APPEND_MODE_KEY));
1547    }
1548
1549    #[test]
1550    fn test_last_non_null_create_options_use_last_row_with_append_mode_true() {
1551        let mut ctx = QueryContext::with(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
1552        ctx.set_extension(APPEND_MODE_KEY, "true");
1553        let ctx = Arc::new(ctx);
1554        let mut table_options = Default::default();
1555
1556        fill_table_options_for_create(&mut table_options, &AutoCreateTableType::LastNonNull, &ctx);
1557
1558        assert_eq!(
1559            Some("true"),
1560            table_options.get(APPEND_MODE_KEY).map(String::as_str)
1561        );
1562        assert_eq!(
1563            Some("last_row"),
1564            table_options.get(MERGE_MODE_KEY).map(String::as_str)
1565        );
1566    }
1567}