Skip to main content

metric_engine/engine/
put.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use api::helper::ColumnDataTypeWrapper;
18use api::v1::{
19    ColumnSchema, PrimaryKeyEncoding as PrimaryKeyEncodingProto, Row, Rows, SemanticType, Value,
20    WriteHint,
21};
22use common_telemetry::{error, info};
23use fxhash::FxHashMap;
24use snafu::{OptionExt, ResultExt, ensure};
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::ColumnMetadata;
27use store_api::region_request::{
28    AffectedRows, RegionDeleteRequest, RegionPutRequest, RegionRequest,
29};
30use store_api::storage::{RegionId, TableId};
31
32use crate::engine::MetricEngineInner;
33use crate::error::{
34    ColumnNotFoundSnafu, CreateDefaultSnafu, ForbiddenPhysicalWriteSnafu, InvalidRequestSnafu,
35    LogicalRegionNotFoundSnafu, PhysicalRegionNotFoundSnafu, Result, UnexpectedRequestSnafu,
36    UnsupportedRegionRequestSnafu,
37};
38use crate::metrics::{FORBIDDEN_OPERATION_COUNT, MITO_OPERATION_ELAPSED};
39use crate::row_modifier::{RowsIter, TableIdInput};
40use crate::utils::to_data_region_id;
41
42impl MetricEngineInner {
43    /// Dispatch region put request
44    pub async fn put_region(
45        &self,
46        region_id: RegionId,
47        request: RegionPutRequest,
48    ) -> Result<AffectedRows> {
49        let is_putting_physical_region =
50            self.state.read().unwrap().exist_physical_region(region_id);
51
52        if is_putting_physical_region {
53            info!(
54                "Metric region received put request {request:?} on physical region {region_id:?}"
55            );
56            FORBIDDEN_OPERATION_COUNT.inc();
57
58            ForbiddenPhysicalWriteSnafu.fail()
59        } else {
60            self.put_logical_region(region_id, request).await
61        }
62    }
63
64    /// Batch write multiple logical regions to the same physical region.
65    ///
66    /// Dispatch region put requests in batch.
67    ///
68    /// Requests may span multiple physical regions. We group them by physical
69    /// region and write sequentially. This method fails fast on validation or
70    /// preparation errors within a group and stops at the first failure.
71    /// Writes in earlier physical-region groups are not rolled back if a later
72    /// group fails.
73    pub async fn put_regions_batch(
74        &self,
75        requests: impl ExactSizeIterator<Item = (RegionId, RegionPutRequest)>,
76    ) -> Result<AffectedRows> {
77        let len = requests.len();
78
79        if len == 0 {
80            return Ok(0);
81        }
82
83        let _timer = MITO_OPERATION_ELAPSED
84            .with_label_values(&["put_batch"])
85            .start_timer();
86
87        // Fast path: single request, no batching overhead
88        if len == 1 {
89            let (region_id, req) = requests.into_iter().next().unwrap();
90            let is_putting_physical_region =
91                self.state.read().unwrap().exist_physical_region(region_id);
92            if is_putting_physical_region {
93                FORBIDDEN_OPERATION_COUNT.inc();
94                return ForbiddenPhysicalWriteSnafu.fail();
95            }
96
97            return self.put_logical_region(region_id, req).await;
98        }
99
100        let mut requests_per_physical: HashMap<RegionId, Vec<(RegionId, RegionPutRequest)>> =
101            HashMap::new();
102        for (region_id, request) in requests {
103            let is_putting_physical_region =
104                self.state.read().unwrap().exist_physical_region(region_id);
105            if is_putting_physical_region {
106                FORBIDDEN_OPERATION_COUNT.inc();
107                return ForbiddenPhysicalWriteSnafu.fail();
108            }
109            let physical_region_id = self.find_physical_region_id(region_id)?;
110            requests_per_physical
111                .entry(physical_region_id)
112                .or_default()
113                .push((region_id, request));
114        }
115
116        let mut total_affected_rows: AffectedRows = 0;
117        for (physical_region_id, requests) in requests_per_physical {
118            let affected_rows = self
119                .put_regions_batch_single_physical(physical_region_id, requests)
120                .await?;
121            total_affected_rows += affected_rows;
122        }
123
124        Ok(total_affected_rows)
125    }
126
127    /// Write a batch of requests that all belong to the same physical region.
128    ///
129    /// This function orchestrates the batch write process:
130    /// 1. Validates all requests
131    /// 2. Merges requests according to the encoding strategy (sparse or dense)
132    /// 3. Writes the merged batch to the physical region
133    async fn put_regions_batch_single_physical(
134        &self,
135        physical_region_id: RegionId,
136        mut requests: Vec<(RegionId, RegionPutRequest)>,
137    ) -> Result<AffectedRows> {
138        if requests.is_empty() {
139            return Ok(0);
140        }
141
142        let data_region_id = to_data_region_id(physical_region_id);
143        let primary_key_encoding = self.get_primary_key_encoding(data_region_id)?;
144
145        // Validate all requests
146        self.validate_batch_requests(physical_region_id, &mut requests)
147            .await?;
148
149        // Merge requests according to encoding strategy
150        let (merged_request, total_affected_rows) = match primary_key_encoding {
151            PrimaryKeyEncoding::Sparse => self.merge_sparse_batch(physical_region_id, requests)?,
152            PrimaryKeyEncoding::Dense => self.merge_dense_batch(data_region_id, requests)?,
153        };
154
155        // Write once to the physical region
156        self.data_region
157            .write_data(data_region_id, RegionRequest::Put(merged_request))
158            .await?;
159
160        Ok(total_affected_rows)
161    }
162
163    /// Get primary key encoding for a data region.
164    fn get_primary_key_encoding(&self, data_region_id: RegionId) -> Result<PrimaryKeyEncoding> {
165        let state = self.state.read().unwrap();
166        state
167            .get_primary_key_encoding(data_region_id)
168            .context(PhysicalRegionNotFoundSnafu {
169                region_id: data_region_id,
170            })
171    }
172
173    /// Validates all requests in a batch.
174    async fn validate_batch_requests(
175        &self,
176        physical_region_id: RegionId,
177        requests: &mut [(RegionId, RegionPutRequest)],
178    ) -> Result<()> {
179        for (logical_region_id, request) in requests {
180            self.verify_rows(
181                *logical_region_id,
182                physical_region_id,
183                &mut request.rows,
184                true,
185            )
186            .await?;
187        }
188        Ok(())
189    }
190
191    /// Merges multiple requests using sparse primary key encoding.
192    fn merge_sparse_batch(
193        &self,
194        physical_region_id: RegionId,
195        requests: Vec<(RegionId, RegionPutRequest)>,
196    ) -> Result<(RegionPutRequest, AffectedRows)> {
197        let total_rows: usize = requests.iter().map(|(_, req)| req.rows.rows.len()).sum();
198        let mut merged_rows = Vec::with_capacity(total_rows);
199        let mut total_affected_rows: AffectedRows = 0;
200        let mut output_schema: Option<Vec<ColumnSchema>> = None;
201        let mut merged_version: Option<u64> = None;
202
203        // Modify and collect rows from each request
204        for (logical_region_id, mut request) in requests {
205            if let Some(request_version) = request.partition_expr_version {
206                if let Some(merged_version) = merged_version {
207                    ensure!(
208                        merged_version == request_version,
209                        InvalidRequestSnafu {
210                            region_id: physical_region_id,
211                            reason: "inconsistent partition expr version in batch"
212                        }
213                    );
214                } else {
215                    merged_version = Some(request_version);
216                }
217            }
218            self.modify_rows(
219                physical_region_id,
220                logical_region_id.table_id(),
221                &mut request.rows,
222                PrimaryKeyEncoding::Sparse,
223            )?;
224
225            let row_count = request.rows.rows.len();
226            total_affected_rows += row_count as AffectedRows;
227
228            // Capture the output schema from the first modified request
229            if output_schema.is_none() {
230                output_schema = Some(request.rows.schema.clone());
231            }
232
233            merged_rows.extend(request.rows.rows);
234        }
235
236        // Safe to unwrap: requests is guaranteed non-empty by caller
237        let schema = output_schema.unwrap();
238
239        let merged_request = RegionPutRequest {
240            rows: Rows {
241                schema,
242                rows: merged_rows,
243            },
244            hint: Some(WriteHint {
245                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
246            }),
247            partition_expr_version: merged_version,
248        };
249
250        Ok((merged_request, total_affected_rows))
251    }
252
253    /// Merges multiple requests using dense primary key encoding.
254    ///
255    /// In dense mode, different requests can have different columns.
256    /// We merge all schemas into a union schema, align each row to this schema,
257    /// then batch-modify all rows together (adding __table_id and __tsid).
258    fn merge_dense_batch(
259        &self,
260        data_region_id: RegionId,
261        requests: Vec<(RegionId, RegionPutRequest)>,
262    ) -> Result<(RegionPutRequest, AffectedRows)> {
263        // Build union schema from all requests
264        let merged_schema = Self::build_union_schema(&requests);
265
266        // Align all rows to the merged schema and collect table_ids
267        let (merged_rows, table_ids, merged_version) =
268            Self::align_requests_to_schema(requests, &merged_schema)?;
269
270        // Batch-modify all rows (add __table_id and __tsid columns)
271        let final_rows = {
272            let state = self.state.read().unwrap();
273            let physical_columns = state
274                .physical_region_states()
275                .get(&data_region_id)
276                .with_context(|| PhysicalRegionNotFoundSnafu {
277                    region_id: data_region_id,
278                })?
279                .physical_columns();
280
281            let iter = RowsIter::new(
282                Rows {
283                    schema: merged_schema,
284                    rows: merged_rows,
285                },
286                physical_columns,
287            );
288
289            self.row_modifier.modify_rows(
290                iter,
291                TableIdInput::Batch(&table_ids),
292                PrimaryKeyEncoding::Dense,
293            )?
294        };
295
296        let merged_request = RegionPutRequest {
297            rows: final_rows,
298            hint: None,
299            partition_expr_version: merged_version,
300        };
301
302        Ok((merged_request, table_ids.len() as AffectedRows))
303    }
304
305    /// Builds a union schema containing all columns from all requests.
306    fn build_union_schema(requests: &[(RegionId, RegionPutRequest)]) -> Vec<ColumnSchema> {
307        let mut schema_map: HashMap<&str, ColumnSchema> = HashMap::new();
308        for (_, request) in requests {
309            for col in &request.rows.schema {
310                schema_map
311                    .entry(col.column_name.as_str())
312                    .or_insert_with(|| col.clone());
313            }
314        }
315        schema_map.into_values().collect()
316    }
317
318    fn align_requests_to_schema(
319        requests: Vec<(RegionId, RegionPutRequest)>,
320        merged_schema: &[ColumnSchema],
321    ) -> Result<(Vec<Row>, Vec<TableId>, Option<u64>)> {
322        // Pre-calculate total capacity
323        let total_rows: usize = requests.iter().map(|(_, req)| req.rows.rows.len()).sum();
324        let mut merged_rows = Vec::with_capacity(total_rows);
325        let mut table_ids = Vec::with_capacity(total_rows);
326        let mut merged_version: Option<u64> = None;
327
328        let null_value = Value { value_data: None };
329
330        for (logical_region_id, request) in requests {
331            if let Some(request_version) = request.partition_expr_version {
332                if let Some(merged_version) = merged_version {
333                    ensure!(
334                        merged_version == request_version,
335                        InvalidRequestSnafu {
336                            region_id: logical_region_id,
337                            reason: "inconsistent partition expr version in batch"
338                        }
339                    );
340                } else {
341                    merged_version = Some(request_version);
342                }
343            }
344            let table_id = logical_region_id.table_id();
345
346            // Build column name to index mapping once per request
347            let col_name_to_idx: FxHashMap<&str, usize> = request
348                .rows
349                .schema
350                .iter()
351                .enumerate()
352                .map(|(idx, col)| (col.column_name.as_str(), idx))
353                .collect();
354
355            // Build column mapping array once per request
356            // col_mapping[i] = Some(idx) means merged_schema[i] is at request.schema[idx]
357            // col_mapping[i] = None means merged_schema[i] doesn't exist in request.schema
358            let col_mapping: Vec<Option<usize>> = merged_schema
359                .iter()
360                .map(|merged_col| {
361                    col_name_to_idx
362                        .get(merged_col.column_name.as_str())
363                        .copied()
364                })
365                .collect();
366
367            // Apply the mapping to all rows
368            for mut row in request.rows.rows {
369                let mut aligned_values = Vec::with_capacity(merged_schema.len());
370                for &opt_idx in &col_mapping {
371                    aligned_values.push(match opt_idx {
372                        Some(idx) => std::mem::take(&mut row.values[idx]),
373                        None => null_value.clone(),
374                    });
375                }
376                merged_rows.push(Row {
377                    values: aligned_values,
378                });
379                table_ids.push(table_id);
380            }
381        }
382
383        Ok((merged_rows, table_ids, merged_version))
384    }
385
386    /// Find the physical region id for a logical region.
387    fn find_physical_region_id(&self, logical_region_id: RegionId) -> Result<RegionId> {
388        let state = self.state.read().unwrap();
389        state
390            .logical_regions()
391            .get(&logical_region_id)
392            .copied()
393            .context(LogicalRegionNotFoundSnafu {
394                region_id: logical_region_id,
395            })
396    }
397
398    /// Dispatch region delete request
399    pub async fn delete_region(
400        &self,
401        region_id: RegionId,
402        request: RegionDeleteRequest,
403    ) -> Result<AffectedRows> {
404        if self.is_physical_region(region_id) {
405            info!(
406                "Metric region received delete request {request:?} on physical region {region_id:?}"
407            );
408            FORBIDDEN_OPERATION_COUNT.inc();
409
410            UnsupportedRegionRequestSnafu {
411                request: RegionRequest::Delete(request),
412            }
413            .fail()
414        } else {
415            self.delete_logical_region(region_id, request).await
416        }
417    }
418
419    async fn put_logical_region(
420        &self,
421        logical_region_id: RegionId,
422        mut request: RegionPutRequest,
423    ) -> Result<AffectedRows> {
424        let _timer = MITO_OPERATION_ELAPSED
425            .with_label_values(&["put"])
426            .start_timer();
427
428        let (physical_region_id, data_region_id, primary_key_encoding) =
429            self.find_data_region_meta(logical_region_id)?;
430
431        self.verify_rows(
432            logical_region_id,
433            physical_region_id,
434            &mut request.rows,
435            true,
436        )
437        .await?;
438
439        // write to data region
440        // TODO: retrieve table name
441        self.modify_rows(
442            physical_region_id,
443            logical_region_id.table_id(),
444            &mut request.rows,
445            primary_key_encoding,
446        )?;
447        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
448            request.hint = Some(WriteHint {
449                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
450            });
451        }
452        self.data_region
453            .write_data(data_region_id, RegionRequest::Put(request))
454            .await
455    }
456
457    async fn delete_logical_region(
458        &self,
459        logical_region_id: RegionId,
460        mut request: RegionDeleteRequest,
461    ) -> Result<AffectedRows> {
462        let _timer = MITO_OPERATION_ELAPSED
463            .with_label_values(&["delete"])
464            .start_timer();
465
466        let (physical_region_id, data_region_id, primary_key_encoding) =
467            self.find_data_region_meta(logical_region_id)?;
468
469        self.verify_rows(
470            logical_region_id,
471            physical_region_id,
472            &mut request.rows,
473            false,
474        )
475        .await?;
476
477        // write to data region
478        // TODO: retrieve table name
479        self.modify_rows(
480            physical_region_id,
481            logical_region_id.table_id(),
482            &mut request.rows,
483            primary_key_encoding,
484        )?;
485        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
486            request.hint = Some(WriteHint {
487                primary_key_encoding: PrimaryKeyEncodingProto::Sparse.into(),
488            });
489        }
490        self.data_region
491            .write_data(data_region_id, RegionRequest::Delete(request))
492            .await
493    }
494
495    pub(crate) fn find_data_region_meta(
496        &self,
497        logical_region_id: RegionId,
498    ) -> Result<(RegionId, RegionId, PrimaryKeyEncoding)> {
499        let state = self.state.read().unwrap();
500        let physical_region_id = *state
501            .logical_regions()
502            .get(&logical_region_id)
503            .with_context(|| LogicalRegionNotFoundSnafu {
504                region_id: logical_region_id,
505            })?;
506        let data_region_id = to_data_region_id(physical_region_id);
507        let primary_key_encoding = state.get_primary_key_encoding(data_region_id).context(
508            PhysicalRegionNotFoundSnafu {
509                region_id: data_region_id,
510            },
511        )?;
512        Ok((physical_region_id, data_region_id, primary_key_encoding))
513    }
514
515    /// Verifies a request for a logical region against its corresponding metadata region.
516    ///
517    /// Includes:
518    /// - Check if the logical region exists
519    /// - Check if every column in the request exists in the physical region
520    /// - Check each column's datatype and semantic type match the physical region's schema
521    /// - Check the time index column is present
522    /// - When `check_fields` is true, check every physical field column is present.
523    ///   Set this to `false` for delete requests, which legitimately carry only
524    ///   the primary key + timestamp.
525    async fn verify_rows(
526        &self,
527        logical_region_id: RegionId,
528        physical_region_id: RegionId,
529        rows: &mut Rows,
530        check_fields: bool,
531    ) -> Result<()> {
532        // Check if the region exists
533        let data_region_id = to_data_region_id(physical_region_id);
534        let state = self.state.read().unwrap();
535        if !state.is_logical_region_exist(logical_region_id) {
536            error!("Trying to write to an nonexistent region {logical_region_id}");
537            return LogicalRegionNotFoundSnafu {
538                region_id: logical_region_id,
539            }
540            .fail();
541        }
542
543        // Type + semantic check on every column in the request schema.
544        let physical_state = state
545            .physical_region_states()
546            .get(&data_region_id)
547            .context(PhysicalRegionNotFoundSnafu {
548                region_id: data_region_id,
549            })?;
550        let physical_columns = physical_state.physical_columns();
551        for col in &rows.schema {
552            let info = physical_columns
553                .get(&col.column_name)
554                .context(ColumnNotFoundSnafu {
555                    name: &col.column_name,
556                    region_id: logical_region_id,
557                })?;
558
559            ensure!(
560                api::helper::is_column_type_value_eq(
561                    col.datatype,
562                    col.datatype_extension.clone(),
563                    &info.column_schema.data_type
564                ),
565                InvalidRequestSnafu {
566                    region_id: logical_region_id,
567                    reason: format!(
568                        "column {} expect type {:?}, given: {}({})",
569                        col.column_name,
570                        info.column_schema.data_type,
571                        api::v1::ColumnDataType::try_from(col.datatype)
572                            .map(|v| v.as_str_name())
573                            .unwrap_or("Unknown"),
574                        col.datatype,
575                    ),
576                }
577            );
578
579            ensure!(
580                api::helper::is_semantic_type_eq(col.semantic_type, info.semantic_type),
581                InvalidRequestSnafu {
582                    region_id: logical_region_id,
583                    reason: format!(
584                        "column {} expect semantic type {:?}, given: {}({})",
585                        col.column_name,
586                        info.semantic_type,
587                        api::v1::SemanticType::try_from(col.semantic_type)
588                            .map(|v| v.as_str_name())
589                            .unwrap_or("Unknown"),
590                        col.semantic_type,
591                    ),
592                }
593            );
594        }
595
596        let ts_name = physical_state.time_index_column_name();
597        ensure!(
598            rows.schema.iter().any(|col| col.column_name == ts_name),
599            InvalidRequestSnafu {
600                region_id: logical_region_id,
601                reason: format!("missing required time index column {ts_name}"),
602            }
603        );
604
605        if check_fields {
606            let field_name = physical_state.field_column_name();
607            if !rows.schema.iter().any(|col| col.column_name == field_name) {
608                let field_meta =
609                    physical_columns
610                        .get(field_name)
611                        .with_context(|| ColumnNotFoundSnafu {
612                            name: field_name,
613                            region_id: logical_region_id,
614                        })?;
615                Self::fill_missing_field_column(logical_region_id, field_name, field_meta, rows)?;
616            }
617        }
618
619        Ok(())
620    }
621
622    fn fill_missing_field_column(
623        logical_region_id: RegionId,
624        field_name: &str,
625        field_meta: &ColumnMetadata,
626        rows: &mut Rows,
627    ) -> Result<()> {
628        ensure!(
629            !field_meta.column_schema.is_default_impure(),
630            UnexpectedRequestSnafu {
631                reason: format!(
632                    "unexpected impure default value with region_id: {logical_region_id}, column: {field_name}, default_value: {:?}",
633                    field_meta.column_schema.default_constraint(),
634                ),
635            }
636        );
637
638        let default_value = field_meta
639            .column_schema
640            .create_default()
641            .context(CreateDefaultSnafu {
642                region_id: logical_region_id,
643                column: field_name,
644            })?
645            .with_context(|| InvalidRequestSnafu {
646                region_id: logical_region_id,
647                reason: format!("missing required field column {field_name}"),
648            })?;
649        let default_value = api::helper::to_grpc_value(default_value);
650        let (datatype, datatype_extension) =
651            ColumnDataTypeWrapper::try_from(field_meta.column_schema.data_type.clone())
652                .map_err(|e| {
653                    InvalidRequestSnafu {
654                        region_id: logical_region_id,
655                        reason: format!(
656                            "no protobuf type for field column {field_name} ({:?}): {e}",
657                            field_meta.column_schema.data_type
658                        ),
659                    }
660                    .build()
661                })?
662                .to_parts();
663
664        rows.schema.push(ColumnSchema {
665            column_name: field_name.to_string(),
666            datatype: datatype as i32,
667            semantic_type: SemanticType::Field as i32,
668            datatype_extension,
669            options: None,
670        });
671
672        for row in &mut rows.rows {
673            row.values.push(default_value.clone());
674        }
675
676        Ok(())
677    }
678
679    /// Perform metric engine specific logic to incoming rows.
680    /// - Add table_id column
681    /// - Generate tsid
682    fn modify_rows(
683        &self,
684        physical_region_id: RegionId,
685        table_id: TableId,
686        rows: &mut Rows,
687        encoding: PrimaryKeyEncoding,
688    ) -> Result<()> {
689        let input = std::mem::take(rows);
690        let iter = {
691            let state = self.state.read().unwrap();
692            let physical_columns = state
693                .physical_region_states()
694                .get(&physical_region_id)
695                .with_context(|| PhysicalRegionNotFoundSnafu {
696                    region_id: physical_region_id,
697                })?
698                .physical_columns();
699            RowsIter::new(input, physical_columns)
700        };
701        let output =
702            self.row_modifier
703                .modify_rows(iter, TableIdInput::Single(table_id), encoding)?;
704        *rows = output;
705        Ok(())
706    }
707}
708
709#[cfg(test)]
710mod tests {
711    use std::collections::HashSet;
712
713    use api::v1::value::ValueData;
714    use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema};
715    use common_error::ext::ErrorExt;
716    use common_error::status_code::StatusCode;
717    use common_function::utils::partition_expr_version;
718    use common_recordbatch::RecordBatches;
719    use datatypes::prelude::ConcreteDataType;
720    use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
721    use datatypes::value::Value as PartitionValue;
722    use partition::expr::col;
723    use store_api::metadata::ColumnMetadata;
724    use store_api::metric_engine_consts::{
725        DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME, PRIMARY_KEY_ENCODING,
726    };
727    use store_api::path_utils::table_dir;
728    use store_api::region_engine::RegionEngine;
729    use store_api::region_request::{
730        EnterStagingRequest, RegionRequest, StagingPartitionDirective,
731    };
732    use store_api::storage::ScanRequest;
733    use store_api::storage::consts::PRIMARY_KEY_COLUMN_NAME;
734
735    use super::*;
736    use crate::test_util::{self, TestEnv};
737
738    fn assert_merged_schema(rows: &Rows, expect_sparse: bool) {
739        let column_names: HashSet<String> = rows
740            .schema
741            .iter()
742            .map(|col| col.column_name.clone())
743            .collect();
744
745        if expect_sparse {
746            assert!(
747                column_names.contains(PRIMARY_KEY_COLUMN_NAME),
748                "sparse encoding should include primary key column"
749            );
750            assert!(
751                !column_names.contains(DATA_SCHEMA_TABLE_ID_COLUMN_NAME),
752                "sparse encoding should not include table id column"
753            );
754            assert!(
755                !column_names.contains(DATA_SCHEMA_TSID_COLUMN_NAME),
756                "sparse encoding should not include tsid column"
757            );
758            assert!(
759                !column_names.contains("job"),
760                "sparse encoding should not include tag columns"
761            );
762            assert!(
763                !column_names.contains("instance"),
764                "sparse encoding should not include tag columns"
765            );
766        } else {
767            assert!(
768                !column_names.contains(PRIMARY_KEY_COLUMN_NAME),
769                "dense encoding should not include primary key column"
770            );
771            assert!(
772                column_names.contains(DATA_SCHEMA_TABLE_ID_COLUMN_NAME),
773                "dense encoding should include table id column"
774            );
775            assert!(
776                column_names.contains(DATA_SCHEMA_TSID_COLUMN_NAME),
777                "dense encoding should include tsid column"
778            );
779            assert!(
780                column_names.contains("job"),
781                "dense encoding should keep tag columns"
782            );
783            assert!(
784                column_names.contains("instance"),
785                "dense encoding should keep tag columns"
786            );
787        }
788    }
789
790    fn job_partition_expr_json() -> String {
791        let expr = col("job")
792            .gt_eq(PartitionValue::String("job-0".into()))
793            .and(col("job").lt(PartitionValue::String("job-9".into())));
794        expr.as_json_str().unwrap()
795    }
796
797    async fn create_logical_region_with_tags(
798        env: &TestEnv,
799        physical_region_id: RegionId,
800        logical_region_id: RegionId,
801        tags: &[&str],
802    ) {
803        let region_create_request = test_util::create_logical_region_request(
804            tags,
805            physical_region_id,
806            &table_dir("test", logical_region_id.table_id()),
807        );
808        env.metric()
809            .handle_request(
810                logical_region_id,
811                RegionRequest::Create(region_create_request),
812            )
813            .await
814            .unwrap();
815    }
816
817    async fn run_batch_write_with_schema_variants(
818        env: &TestEnv,
819        physical_region_id: RegionId,
820        options: Vec<(String, String)>,
821        expect_sparse: bool,
822    ) {
823        env.create_physical_region(physical_region_id, &TestEnv::default_table_dir(), options)
824            .await;
825
826        let logical_region_1 = env.default_logical_region_id();
827        let logical_region_2 = RegionId::new(1024, 1);
828
829        create_logical_region_with_tags(env, physical_region_id, logical_region_1, &["job"]).await;
830        create_logical_region_with_tags(
831            env,
832            physical_region_id,
833            logical_region_2,
834            &["job", "instance"],
835        )
836        .await;
837
838        let schema_1 = test_util::row_schema_with_tags(&["job"]);
839        let schema_2 = test_util::row_schema_with_tags(&["job", "instance"]);
840
841        let data_region_id = RegionId::new(physical_region_id.table_id(), 2);
842        let primary_key_encoding = env
843            .metric()
844            .inner
845            .get_primary_key_encoding(data_region_id)
846            .unwrap();
847        assert_eq!(
848            primary_key_encoding,
849            if expect_sparse {
850                PrimaryKeyEncoding::Sparse
851            } else {
852                PrimaryKeyEncoding::Dense
853            }
854        );
855
856        let build_requests = || {
857            let rows_1 = test_util::build_rows(1, 3);
858            let rows_2 = test_util::build_rows(2, 2);
859
860            vec![
861                (
862                    logical_region_1,
863                    RegionPutRequest {
864                        rows: Rows {
865                            schema: schema_1.clone(),
866                            rows: rows_1,
867                        },
868                        hint: None,
869                        partition_expr_version: None,
870                    },
871                ),
872                (
873                    logical_region_2,
874                    RegionPutRequest {
875                        rows: Rows {
876                            schema: schema_2.clone(),
877                            rows: rows_2,
878                        },
879                        hint: None,
880                        partition_expr_version: None,
881                    },
882                ),
883            ]
884        };
885
886        let merged_request = if expect_sparse {
887            let (merged_request, _) = env
888                .metric()
889                .inner
890                .merge_sparse_batch(physical_region_id, build_requests())
891                .unwrap();
892            let hint = merged_request
893                .hint
894                .as_ref()
895                .expect("missing sparse write hint");
896            assert_eq!(
897                hint.primary_key_encoding,
898                PrimaryKeyEncodingProto::Sparse as i32
899            );
900            merged_request
901        } else {
902            let (merged_request, _) = env
903                .metric()
904                .inner
905                .merge_dense_batch(data_region_id, build_requests())
906                .unwrap();
907            assert!(merged_request.hint.is_none());
908            merged_request
909        };
910
911        assert_merged_schema(&merged_request.rows, expect_sparse);
912
913        let affected_rows = env
914            .metric()
915            .inner
916            .put_regions_batch(build_requests().into_iter())
917            .await
918            .unwrap();
919        assert_eq!(affected_rows, 5);
920
921        let request = ScanRequest::default();
922        let stream = env
923            .mito()
924            .scan_to_stream(data_region_id, request)
925            .await
926            .unwrap();
927        let batches = RecordBatches::try_collect(stream).await.unwrap();
928
929        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 5);
930    }
931
932    #[tokio::test]
933    async fn test_write_logical_region() {
934        let env = TestEnv::new().await;
935        env.init_metric_region().await;
936
937        // prepare data
938        let schema = test_util::row_schema_with_tags(&["job"]);
939        let rows = test_util::build_rows(1, 5);
940        let request = RegionRequest::Put(RegionPutRequest {
941            rows: Rows { schema, rows },
942            hint: None,
943            partition_expr_version: None,
944        });
945
946        // write data
947        let logical_region_id = env.default_logical_region_id();
948        let result = env
949            .metric()
950            .handle_request(logical_region_id, request)
951            .await
952            .unwrap();
953        assert_eq!(result.affected_rows, 5);
954
955        // read data from physical region
956        let physical_region_id = env.default_physical_region_id();
957        let request = ScanRequest::default();
958        let stream = env
959            .metric()
960            .scan_to_stream(physical_region_id, request)
961            .await
962            .unwrap();
963        let batches = RecordBatches::try_collect(stream).await.unwrap();
964        let expected = "\
965+-------------------------+----------------+------------+---------------------+-------+
966| greptime_timestamp      | greptime_value | __table_id | __tsid              | job   |
967+-------------------------+----------------+------------+---------------------+-------+
968| 1970-01-01T00:00:00     | 0.0            | 3          | 2955007454552897459 | tag_0 |
969| 1970-01-01T00:00:00.001 | 1.0            | 3          | 2955007454552897459 | tag_0 |
970| 1970-01-01T00:00:00.002 | 2.0            | 3          | 2955007454552897459 | tag_0 |
971| 1970-01-01T00:00:00.003 | 3.0            | 3          | 2955007454552897459 | tag_0 |
972| 1970-01-01T00:00:00.004 | 4.0            | 3          | 2955007454552897459 | tag_0 |
973+-------------------------+----------------+------------+---------------------+-------+";
974        assert_eq!(expected, batches.pretty_print().unwrap(), "physical region");
975
976        // read data from logical region
977        let request = ScanRequest::default();
978        let stream = env
979            .metric()
980            .scan_to_stream(logical_region_id, request)
981            .await
982            .unwrap();
983        let batches = RecordBatches::try_collect(stream).await.unwrap();
984        let expected = "\
985+-------------------------+----------------+-------+
986| greptime_timestamp      | greptime_value | job   |
987+-------------------------+----------------+-------+
988| 1970-01-01T00:00:00     | 0.0            | tag_0 |
989| 1970-01-01T00:00:00.001 | 1.0            | tag_0 |
990| 1970-01-01T00:00:00.002 | 2.0            | tag_0 |
991| 1970-01-01T00:00:00.003 | 3.0            | tag_0 |
992| 1970-01-01T00:00:00.004 | 4.0            | tag_0 |
993+-------------------------+----------------+-------+";
994        assert_eq!(expected, batches.pretty_print().unwrap(), "logical region");
995    }
996
997    #[tokio::test]
998    async fn test_write_logical_region_row_count() {
999        let env = TestEnv::new().await;
1000        env.init_metric_region().await;
1001        let engine = env.metric();
1002
1003        // add columns
1004        let logical_region_id = env.default_logical_region_id();
1005        let columns = &["odd", "even", "Ev_En"];
1006        let alter_request = test_util::alter_logical_region_add_tag_columns(123456, columns);
1007        engine
1008            .handle_request(logical_region_id, RegionRequest::Alter(alter_request))
1009            .await
1010            .unwrap();
1011
1012        // prepare data
1013        let schema = test_util::row_schema_with_tags(columns);
1014        let rows = test_util::build_rows(3, 100);
1015        let request = RegionRequest::Put(RegionPutRequest {
1016            rows: Rows { schema, rows },
1017            hint: None,
1018            partition_expr_version: None,
1019        });
1020
1021        // write data
1022        let result = engine
1023            .handle_request(logical_region_id, request)
1024            .await
1025            .unwrap();
1026        assert_eq!(100, result.affected_rows);
1027    }
1028
1029    #[tokio::test]
1030    async fn test_write_physical_region() {
1031        let env = TestEnv::new().await;
1032        env.init_metric_region().await;
1033        let engine = env.metric();
1034
1035        let physical_region_id = env.default_physical_region_id();
1036        let schema = test_util::row_schema_with_tags(&["abc"]);
1037        let rows = test_util::build_rows(1, 100);
1038        let request = RegionRequest::Put(RegionPutRequest {
1039            rows: Rows { schema, rows },
1040            hint: None,
1041            partition_expr_version: None,
1042        });
1043
1044        engine
1045            .handle_request(physical_region_id, request)
1046            .await
1047            .unwrap_err();
1048    }
1049
1050    #[tokio::test]
1051    async fn test_write_nonexist_logical_region() {
1052        let env = TestEnv::new().await;
1053        env.init_metric_region().await;
1054        let engine = env.metric();
1055
1056        let logical_region_id = RegionId::new(175, 8345);
1057        let schema = test_util::row_schema_with_tags(&["def"]);
1058        let rows = test_util::build_rows(1, 100);
1059        let request = RegionRequest::Put(RegionPutRequest {
1060            rows: Rows { schema, rows },
1061            hint: None,
1062            partition_expr_version: None,
1063        });
1064
1065        engine
1066            .handle_request(logical_region_id, request)
1067            .await
1068            .unwrap_err();
1069    }
1070
1071    #[tokio::test]
1072    async fn test_batch_write_multiple_logical_regions() {
1073        let env = TestEnv::new().await;
1074        env.init_metric_region().await;
1075        let engine = env.metric();
1076
1077        // Create two additional logical regions
1078        let physical_region_id = env.default_physical_region_id();
1079        let logical_region_1 = env.default_logical_region_id();
1080        let logical_region_2 = RegionId::new(1024, 1);
1081        let logical_region_3 = RegionId::new(1024, 2);
1082
1083        env.create_logical_region(physical_region_id, logical_region_2)
1084            .await;
1085        env.create_logical_region(physical_region_id, logical_region_3)
1086            .await;
1087
1088        // Prepare batch requests with non-overlapping timestamps
1089        let schema = test_util::row_schema_with_tags(&["job"]);
1090
1091        // Use build_rows_with_ts to create non-overlapping timestamps
1092        // logical_region_1: ts 0, 1, 2
1093        // logical_region_2: ts 10, 11  (offset to avoid overlap)
1094        // logical_region_3: ts 20, 21, 22, 23, 24  (offset to avoid overlap)
1095        let rows1 = test_util::build_rows(1, 3);
1096        let mut rows2 = test_util::build_rows(1, 2);
1097        let mut rows3 = test_util::build_rows(1, 5);
1098
1099        // Adjust timestamps to avoid conflicts
1100        use api::v1::value::ValueData;
1101        for (i, row) in rows2.iter_mut().enumerate() {
1102            if let Some(ValueData::TimestampMillisecondValue(ts)) =
1103                row.values.get_mut(0).and_then(|v| v.value_data.as_mut())
1104            {
1105                *ts = (10 + i) as i64;
1106            }
1107        }
1108        for (i, row) in rows3.iter_mut().enumerate() {
1109            if let Some(ValueData::TimestampMillisecondValue(ts)) =
1110                row.values.get_mut(0).and_then(|v| v.value_data.as_mut())
1111            {
1112                *ts = (20 + i) as i64;
1113            }
1114        }
1115
1116        let requests = vec![
1117            (
1118                logical_region_1,
1119                RegionPutRequest {
1120                    rows: Rows {
1121                        schema: schema.clone(),
1122                        rows: rows1,
1123                    },
1124                    hint: None,
1125                    partition_expr_version: None,
1126                },
1127            ),
1128            (
1129                logical_region_2,
1130                RegionPutRequest {
1131                    rows: Rows {
1132                        schema: schema.clone(),
1133                        rows: rows2,
1134                    },
1135                    hint: None,
1136                    partition_expr_version: None,
1137                },
1138            ),
1139            (
1140                logical_region_3,
1141                RegionPutRequest {
1142                    rows: Rows {
1143                        schema: schema.clone(),
1144                        rows: rows3,
1145                    },
1146                    hint: None,
1147                    partition_expr_version: None,
1148                },
1149            ),
1150        ];
1151
1152        // Batch write
1153        let affected_rows = engine
1154            .inner
1155            .put_regions_batch(requests.into_iter())
1156            .await
1157            .unwrap();
1158        assert_eq!(affected_rows, 10);
1159
1160        // Verify physical region contains data from all logical regions
1161        let request = ScanRequest::default();
1162        let stream = env
1163            .metric()
1164            .scan_to_stream(physical_region_id, request)
1165            .await
1166            .unwrap();
1167        let batches = RecordBatches::try_collect(stream).await.unwrap();
1168
1169        // Should have 3 + 2 + 5 = 10 rows total
1170        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 10);
1171    }
1172
1173    #[tokio::test]
1174    async fn test_batch_write_with_partial_failure() {
1175        let env = TestEnv::new().await;
1176        env.init_metric_region().await;
1177        let engine = env.metric();
1178
1179        let physical_region_id = env.default_physical_region_id();
1180        let logical_region_1 = env.default_logical_region_id();
1181        let logical_region_2 = RegionId::new(1024, 1);
1182        let nonexistent_region = RegionId::new(9999, 9999);
1183
1184        env.create_logical_region(physical_region_id, logical_region_2)
1185            .await;
1186
1187        // Prepare batch with one invalid region
1188        let schema = test_util::row_schema_with_tags(&["job"]);
1189        let requests = vec![
1190            (
1191                logical_region_1,
1192                RegionPutRequest {
1193                    rows: Rows {
1194                        schema: schema.clone(),
1195                        rows: test_util::build_rows(1, 3),
1196                    },
1197                    hint: None,
1198                    partition_expr_version: None,
1199                },
1200            ),
1201            (
1202                nonexistent_region,
1203                RegionPutRequest {
1204                    rows: Rows {
1205                        schema: schema.clone(),
1206                        rows: test_util::build_rows(1, 2),
1207                    },
1208                    hint: None,
1209                    partition_expr_version: None,
1210                },
1211            ),
1212            (
1213                logical_region_2,
1214                RegionPutRequest {
1215                    rows: Rows {
1216                        schema: schema.clone(),
1217                        rows: test_util::build_rows(1, 5),
1218                    },
1219                    hint: None,
1220                    partition_expr_version: None,
1221                },
1222            ),
1223        ];
1224
1225        // Batch write
1226        let result = engine.inner.put_regions_batch(requests.into_iter()).await;
1227        assert!(result.is_err());
1228
1229        // Invalid region is detected before any write, so the physical region remains empty.
1230        // Fail-fast is per physical-region group; cross-group partial success is possible.
1231        let request = ScanRequest::default();
1232        let stream = env
1233            .metric()
1234            .scan_to_stream(physical_region_id, request)
1235            .await
1236            .unwrap();
1237        let batches = RecordBatches::try_collect(stream).await.unwrap();
1238
1239        assert_eq!(batches.iter().map(|b| b.num_rows()).sum::<usize>(), 0);
1240    }
1241
1242    #[tokio::test]
1243    async fn test_batch_write_single_physical_region_forbidden() {
1244        let env = TestEnv::new().await;
1245        env.init_metric_region().await;
1246        let engine = env.metric();
1247
1248        let physical_region_id = env.default_physical_region_id();
1249        let schema = test_util::row_schema_with_tags(&["job"]);
1250        let requests = vec![(
1251            physical_region_id,
1252            RegionPutRequest {
1253                rows: Rows {
1254                    schema,
1255                    rows: test_util::build_rows(1, 1),
1256                },
1257                hint: None,
1258                partition_expr_version: None,
1259            },
1260        )];
1261
1262        let err = engine
1263            .inner
1264            .put_regions_batch(requests.into_iter())
1265            .await
1266            .unwrap_err();
1267
1268        assert!(matches!(
1269            err,
1270            crate::error::Error::ForbiddenPhysicalWrite { .. }
1271        ));
1272    }
1273
1274    #[tokio::test]
1275    async fn test_batch_write_physical_region_forbidden() {
1276        let env = TestEnv::new().await;
1277        env.init_metric_region().await;
1278        let engine = env.metric();
1279
1280        let physical_region_id = env.default_physical_region_id();
1281        let logical_region_id = env.default_logical_region_id();
1282        let schema = test_util::row_schema_with_tags(&["job"]);
1283        let requests = vec![
1284            (
1285                logical_region_id,
1286                RegionPutRequest {
1287                    rows: Rows {
1288                        schema: schema.clone(),
1289                        rows: test_util::build_rows(1, 1),
1290                    },
1291                    hint: None,
1292                    partition_expr_version: None,
1293                },
1294            ),
1295            (
1296                physical_region_id,
1297                RegionPutRequest {
1298                    rows: Rows {
1299                        schema,
1300                        rows: test_util::build_rows(1, 1),
1301                    },
1302                    hint: None,
1303                    partition_expr_version: None,
1304                },
1305            ),
1306        ];
1307
1308        let err = engine
1309            .inner
1310            .put_regions_batch(requests.into_iter())
1311            .await
1312            .unwrap_err();
1313
1314        assert!(matches!(
1315            err,
1316            crate::error::Error::ForbiddenPhysicalWrite { .. }
1317        ));
1318    }
1319
1320    #[tokio::test]
1321    async fn test_batch_write_single_request_fast_path() {
1322        let env = TestEnv::new().await;
1323        env.init_metric_region().await;
1324        let engine = env.metric();
1325
1326        let logical_region_id = env.default_logical_region_id();
1327        let schema = test_util::row_schema_with_tags(&["job"]);
1328
1329        // Single request should use fast path
1330        let requests = vec![(
1331            logical_region_id,
1332            RegionPutRequest {
1333                rows: Rows {
1334                    schema,
1335                    rows: test_util::build_rows(1, 5),
1336                },
1337                hint: None,
1338                partition_expr_version: None,
1339            },
1340        )];
1341
1342        let affected_rows = engine
1343            .inner
1344            .put_regions_batch(requests.into_iter())
1345            .await
1346            .unwrap();
1347        assert_eq!(affected_rows, 5);
1348    }
1349
1350    #[tokio::test]
1351    async fn test_batch_write_empty_requests() {
1352        let env = TestEnv::new().await;
1353        env.init_metric_region().await;
1354        let engine = env.metric();
1355
1356        // Empty batch should return zero affected rows
1357        let requests = vec![];
1358        let affected_rows = engine
1359            .inner
1360            .put_regions_batch(requests.into_iter())
1361            .await
1362            .unwrap();
1363
1364        assert_eq!(affected_rows, 0);
1365    }
1366
1367    #[tokio::test]
1368    async fn test_batch_write_sparse_encoding() {
1369        let env = TestEnv::new().await;
1370        let physical_region_id = env.default_physical_region_id();
1371
1372        run_batch_write_with_schema_variants(
1373            &env,
1374            physical_region_id,
1375            vec![(PRIMARY_KEY_ENCODING.to_string(), "sparse".to_string())],
1376            true,
1377        )
1378        .await;
1379    }
1380
1381    #[tokio::test]
1382    async fn test_batch_write_dense_encoding() {
1383        let env = TestEnv::new().await;
1384        let physical_region_id = env.default_physical_region_id();
1385
1386        run_batch_write_with_schema_variants(
1387            &env,
1388            physical_region_id,
1389            vec![(PRIMARY_KEY_ENCODING.to_string(), "dense".to_string())],
1390            false,
1391        )
1392        .await;
1393    }
1394
1395    #[tokio::test]
1396    async fn test_metric_put_rejects_bad_partition_expr_version() {
1397        let env = TestEnv::new().await;
1398        env.init_metric_region().await;
1399
1400        let logical_region_id = env.default_logical_region_id();
1401        let rows = Rows {
1402            schema: test_util::row_schema_with_tags(&["job"]),
1403            rows: test_util::build_rows(1, 3),
1404        };
1405
1406        let err = env
1407            .metric()
1408            .handle_request(
1409                logical_region_id,
1410                RegionRequest::Put(RegionPutRequest {
1411                    rows,
1412                    hint: None,
1413                    partition_expr_version: Some(1),
1414                }),
1415            )
1416            .await
1417            .unwrap_err();
1418
1419        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1420    }
1421
1422    #[tokio::test]
1423    async fn test_metric_put_respects_staging_partition_expr_version() {
1424        let env = TestEnv::new().await;
1425        env.init_metric_region().await;
1426
1427        let logical_region_id = env.default_logical_region_id();
1428        let physical_region_id = env.default_physical_region_id();
1429        let partition_expr = job_partition_expr_json();
1430        env.metric()
1431            .handle_request(
1432                physical_region_id,
1433                RegionRequest::EnterStaging(EnterStagingRequest {
1434                    partition_directive: StagingPartitionDirective::UpdatePartitionExpr(
1435                        partition_expr.clone(),
1436                    ),
1437                }),
1438            )
1439            .await
1440            .unwrap();
1441
1442        let expected_version = partition_expr_version(Some(&partition_expr));
1443        let rows = Rows {
1444            schema: test_util::row_schema_with_tags(&["job"]),
1445            rows: test_util::build_rows(1, 3),
1446        };
1447
1448        let err = env
1449            .metric()
1450            .handle_request(
1451                logical_region_id,
1452                RegionRequest::Put(RegionPutRequest {
1453                    rows: rows.clone(),
1454                    hint: None,
1455                    partition_expr_version: Some(expected_version.wrapping_add(1)),
1456                }),
1457            )
1458            .await
1459            .unwrap_err();
1460        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1461
1462        let response = env
1463            .metric()
1464            .handle_request(
1465                logical_region_id,
1466                RegionRequest::Put(RegionPutRequest {
1467                    rows: rows.clone(),
1468                    hint: None,
1469                    partition_expr_version: None,
1470                }),
1471            )
1472            .await
1473            .unwrap();
1474        assert_eq!(response.affected_rows, 3);
1475
1476        let response = env
1477            .metric()
1478            .handle_request(
1479                logical_region_id,
1480                RegionRequest::Put(RegionPutRequest {
1481                    rows,
1482                    hint: None,
1483                    partition_expr_version: Some(expected_version),
1484                }),
1485            )
1486            .await
1487            .unwrap();
1488        assert_eq!(response.affected_rows, 3);
1489    }
1490
1491    /// Regression test for issue #7990: the metric engine must reject a row
1492    /// whose timestamp column carries a non-timestamp datatype, rather than
1493    /// letting it panic inside mito's `ValueBuilder::push`.
1494    #[tokio::test]
1495    async fn test_verify_rows_rejects_wrong_type() {
1496        use api::v1::value::ValueData;
1497        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1498        use common_query::prelude::{greptime_timestamp, greptime_value};
1499
1500        let env = TestEnv::new().await;
1501        env.init_metric_region().await;
1502
1503        let logical_region_id = env.default_logical_region_id();
1504
1505        // Timestamp column is declared as String — the very payload that
1506        // caused #7990. It should surface a typed error rather than panic.
1507        let schema = vec![
1508            PbColumnSchema {
1509                column_name: greptime_timestamp().to_string(),
1510                datatype: ColumnDataType::String as i32,
1511                semantic_type: SemanticType::Timestamp as _,
1512                datatype_extension: None,
1513                options: None,
1514            },
1515            PbColumnSchema {
1516                column_name: greptime_value().to_string(),
1517                datatype: ColumnDataType::Float64 as i32,
1518                semantic_type: SemanticType::Field as _,
1519                datatype_extension: None,
1520                options: None,
1521            },
1522            PbColumnSchema {
1523                column_name: "job".to_string(),
1524                datatype: ColumnDataType::String as i32,
1525                semantic_type: SemanticType::Tag as _,
1526                datatype_extension: None,
1527                options: None,
1528            },
1529        ];
1530        let rows = vec![Row {
1531            values: vec![
1532                Value {
1533                    value_data: Some(ValueData::StringValue("not-a-timestamp".to_string())),
1534                },
1535                Value {
1536                    value_data: Some(ValueData::F64Value(1.0)),
1537                },
1538                Value {
1539                    value_data: Some(ValueData::StringValue("tag_0".to_string())),
1540                },
1541            ],
1542        }];
1543
1544        let err = env
1545            .metric()
1546            .handle_request(
1547                logical_region_id,
1548                RegionRequest::Put(RegionPutRequest {
1549                    rows: Rows { schema, rows },
1550                    hint: None,
1551                    partition_expr_version: None,
1552                }),
1553            )
1554            .await
1555            .unwrap_err();
1556        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1557    }
1558
1559    /// The completeness check must reject requests that omit the time index
1560    /// column, since mito cannot default-fill a `TimeIndex` column and would
1561    /// previously panic on the empty builder.
1562    #[tokio::test]
1563    async fn test_verify_rows_rejects_missing_time_index() {
1564        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1565        use common_query::prelude::greptime_value;
1566
1567        let env = TestEnv::new().await;
1568        env.init_metric_region().await;
1569
1570        let logical_region_id = env.default_logical_region_id();
1571
1572        // Payload only carries the field and a tag — no timestamp column.
1573        let schema = vec![
1574            PbColumnSchema {
1575                column_name: greptime_value().to_string(),
1576                datatype: ColumnDataType::Float64 as i32,
1577                semantic_type: SemanticType::Field as _,
1578                datatype_extension: None,
1579                options: None,
1580            },
1581            PbColumnSchema {
1582                column_name: "job".to_string(),
1583                datatype: ColumnDataType::String as i32,
1584                semantic_type: SemanticType::Tag as _,
1585                datatype_extension: None,
1586                options: None,
1587            },
1588        ];
1589        let rows = vec![Row {
1590            values: vec![
1591                Value {
1592                    value_data: Some(api::v1::value::ValueData::F64Value(1.0)),
1593                },
1594                Value {
1595                    value_data: Some(api::v1::value::ValueData::StringValue("tag_0".to_string())),
1596                },
1597            ],
1598        }];
1599
1600        let err = env
1601            .metric()
1602            .handle_request(
1603                logical_region_id,
1604                RegionRequest::Put(RegionPutRequest {
1605                    rows: Rows { schema, rows },
1606                    hint: None,
1607                    partition_expr_version: None,
1608                }),
1609            )
1610            .await
1611            .unwrap_err();
1612        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1613    }
1614
1615    #[tokio::test]
1616    async fn test_verify_rows_rejects_missing_field() {
1617        use api::v1::value::ValueData;
1618        use api::v1::{ColumnDataType, ColumnSchema as PbColumnSchema, SemanticType};
1619        use common_query::prelude::greptime_timestamp;
1620
1621        let env = TestEnv::new().await;
1622        env.init_metric_region().await;
1623
1624        let logical_region_id = env.default_logical_region_id();
1625
1626        // Schema has timestamp + tag but no field column.
1627        let schema = vec![
1628            PbColumnSchema {
1629                column_name: greptime_timestamp().to_string(),
1630                datatype: ColumnDataType::TimestampMillisecond as i32,
1631                semantic_type: SemanticType::Timestamp as _,
1632                datatype_extension: None,
1633                options: None,
1634            },
1635            PbColumnSchema {
1636                column_name: "job".to_string(),
1637                datatype: ColumnDataType::String as i32,
1638                semantic_type: SemanticType::Tag as _,
1639                datatype_extension: None,
1640                options: None,
1641            },
1642        ];
1643        let rows = vec![Row {
1644            values: vec![
1645                Value {
1646                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1647                },
1648                Value {
1649                    value_data: Some(ValueData::StringValue("tag_0".to_string())),
1650                },
1651            ],
1652        }];
1653
1654        let err = env
1655            .metric()
1656            .handle_request(
1657                logical_region_id,
1658                RegionRequest::Put(RegionPutRequest {
1659                    rows: Rows { schema, rows },
1660                    hint: None,
1661                    partition_expr_version: None,
1662                }),
1663            )
1664            .await
1665            .unwrap_err();
1666        let message = err.to_string();
1667        assert!(
1668            message.contains("missing required field column"),
1669            "expected field-completeness rejection, got: {message}"
1670        );
1671        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
1672    }
1673
1674    #[test]
1675    fn test_fill_missing_field_column_nullable_no_default() {
1676        let field_meta = ColumnMetadata {
1677            column_id: 1,
1678            semantic_type: SemanticType::Field,
1679            column_schema: ColumnSchema::new(
1680                "greptime_value".to_string(),
1681                ConcreteDataType::float64_datatype(),
1682                true, // nullable, no default
1683            ),
1684        };
1685        let mut rows = Rows {
1686            schema: vec![PbColumnSchema {
1687                column_name: "ts".to_string(),
1688                datatype: ColumnDataType::TimestampMillisecond as i32,
1689                semantic_type: SemanticType::Timestamp as _,
1690                datatype_extension: None,
1691                options: None,
1692            }],
1693            rows: vec![Row {
1694                values: vec![Value {
1695                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1696                }],
1697            }],
1698        };
1699
1700        MetricEngineInner::fill_missing_field_column(
1701            RegionId::new(1, 1),
1702            "greptime_value",
1703            &field_meta,
1704            &mut rows,
1705        )
1706        .unwrap();
1707
1708        assert_eq!(rows.schema.len(), 2);
1709        assert_eq!(rows.schema[1].column_name, "greptime_value");
1710        assert_eq!(rows.rows[0].values.len(), 2);
1711        assert!(
1712            rows.rows[0].values[1].value_data.is_none(),
1713            "missing nullable field should be filled with null"
1714        );
1715    }
1716
1717    #[test]
1718    fn test_fill_missing_field_column_rejects_impure_default() {
1719        let field_meta = ColumnMetadata {
1720            column_id: 1,
1721            semantic_type: SemanticType::Field,
1722            column_schema: ColumnSchema::new(
1723                "greptime_value".to_string(),
1724                ConcreteDataType::timestamp_millisecond_datatype(),
1725                false,
1726            )
1727            .with_default_constraint(Some(ColumnDefaultConstraint::Function("now()".to_string())))
1728            .unwrap(),
1729        };
1730        let mut rows = Rows {
1731            schema: vec![PbColumnSchema {
1732                column_name: "ts".to_string(),
1733                datatype: api::v1::ColumnDataType::TimestampMillisecond as i32,
1734                semantic_type: SemanticType::Timestamp as _,
1735                datatype_extension: None,
1736                options: None,
1737            }],
1738            rows: vec![Row {
1739                values: vec![Value {
1740                    value_data: Some(ValueData::TimestampMillisecondValue(0)),
1741                }],
1742            }],
1743        };
1744
1745        let err = MetricEngineInner::fill_missing_field_column(
1746            RegionId::new(1, 1),
1747            "greptime_value",
1748            &field_meta,
1749            &mut rows,
1750        )
1751        .unwrap_err();
1752        assert!(
1753            err.to_string().contains("impure default value"),
1754            "expected impure-default rejection, got: {err}"
1755        );
1756    }
1757}